|
| 1 | +""" |
| 2 | +Fixtures for online-store integration tests. |
| 3 | +""" |
| 4 | + |
| 5 | +from typing import Dict |
| 6 | + |
| 7 | +import pytest |
| 8 | +from testcontainers.core.container import DockerContainer |
| 9 | +from testcontainers.core.waiting_utils import wait_for_logs |
| 10 | + |
| 11 | +from tests.universal.feature_repos.universal.online_store_creator import ( |
| 12 | + OnlineStoreCreator, |
| 13 | +) |
| 14 | + |
| 15 | + |
| 16 | +class _SharedDbDynamoDBOnlineStoreCreator(OnlineStoreCreator): |
| 17 | + """DynamoDB Local container started with ``-sharedDb -inMemory``. |
| 18 | +
|
| 19 | + Why ``-sharedDb`` |
| 20 | + ----------------- |
| 21 | + DynamoDB Local 2.x namespaces tables by the **access key ID** in the |
| 22 | + request signature. In CI, the sync ``boto3`` client and the async |
| 23 | + ``aiobotocore`` client can resolve credentials from *different* sources |
| 24 | + (env vars, credential file, ``credential_process``, container IAM role, |
| 25 | + etc.) even after ``monkeypatch.setenv`` has set fake keys—because the |
| 26 | + credential chain is evaluated lazily and various caches may hold stale |
| 27 | + values. |
| 28 | +
|
| 29 | + When the two clients end up using *different* access keys, the sync |
| 30 | + client creates tables in namespace A while the async client queries |
| 31 | + namespace B, which is empty → ``ResourceNotFoundException``. |
| 32 | +
|
| 33 | + ``-sharedDb`` collapses all namespaces into a single in-memory database, |
| 34 | + making table visibility completely independent of which credentials each |
| 35 | + client uses. This is the correct setting for integration tests that want |
| 36 | + to verify async read/write behaviour without caring about credential |
| 37 | + isolation. |
| 38 | + """ |
| 39 | + |
| 40 | + def __init__(self, project_name: str, **kwargs): |
| 41 | + super().__init__(project_name) |
| 42 | + self.container = ( |
| 43 | + DockerContainer("amazon/dynamodb-local:latest") |
| 44 | + .with_exposed_ports("8000") |
| 45 | + .with_command("-jar DynamoDBLocal.jar -sharedDb -inMemory") |
| 46 | + ) |
| 47 | + |
| 48 | + def create_online_store(self) -> Dict[str, str]: |
| 49 | + self.container.start() |
| 50 | + wait_for_logs( |
| 51 | + container=self.container, |
| 52 | + predicate="Initializing DynamoDB Local with the following configuration:", |
| 53 | + timeout=10, |
| 54 | + ) |
| 55 | + exposed_port = self.container.get_exposed_port("8000") |
| 56 | + return { |
| 57 | + "type": "dynamodb", |
| 58 | + "endpoint_url": f"http://localhost:{exposed_port}", |
| 59 | + "region": "us-west-2", |
| 60 | + } |
| 61 | + |
| 62 | + def teardown(self): |
| 63 | + self.container.stop() |
| 64 | + |
| 65 | + |
| 66 | +@pytest.fixture |
| 67 | +async def dynamodb_local_environment(monkeypatch, worker_id): |
| 68 | + """Isolated, self-contained Environment for DynamoDB async tests. |
| 69 | +
|
| 70 | + Root cause of the async credential failures |
| 71 | + ------------------------------------------- |
| 72 | + DynamoDB Local 2.x isolates tables **per access key ID**. In CI, |
| 73 | + ``boto3`` (sync, used to provision tables via ``store.apply()``) and |
| 74 | + ``aiobotocore`` (async, used for reads/writes in the test body) may |
| 75 | + resolve credentials from *different* sources even when ``monkeypatch`` |
| 76 | + has set fake static keys—the credential chain is evaluated lazily and |
| 77 | + caches may hold stale values from a real AWS session configured in the |
| 78 | + runner environment. |
| 79 | +
|
| 80 | + When the two clients end up using different access key IDs they land in |
| 81 | + different DynamoDB Local namespaces: |
| 82 | +
|
| 83 | + * sync client → namespace ``KEY_A`` → tables exist ✓ |
| 84 | + * async client → namespace ``KEY_B`` → tables not found → ``ResourceNotFoundException`` |
| 85 | +
|
| 86 | + Fix: ``_SharedDbDynamoDBOnlineStoreCreator`` |
| 87 | + -------------------------------------------- |
| 88 | + The isolated container is started with ``-sharedDb -inMemory``. In |
| 89 | + shared-DB mode DynamoDB Local stores *all* tables in a single namespace |
| 90 | + regardless of the access key, so sync and async clients always see the |
| 91 | + same tables. |
| 92 | +
|
| 93 | + Why async + ``await fs.initialize()`` before yielding |
| 94 | + ----------------------------------------------------- |
| 95 | + Calling ``await fs.initialize()`` eagerly creates the ``aiobotocore`` |
| 96 | + client inside this fixture's event loop (the *same* loop the test will |
| 97 | + run in). This pre-caches: |
| 98 | +
|
| 99 | + 1. ``FeatureStore._provider`` so the identical ``DynamoDBOnlineStore`` |
| 100 | + instance is reused for the entire test. |
| 101 | + 2. The aiobotocore client, which is now unambiguously pointed at our |
| 102 | + isolated container's ``endpoint_url``. |
| 103 | +
|
| 104 | + Yields |
| 105 | + ------ |
| 106 | + tuple[Environment, TestData] |
| 107 | + ``(environment, (entities, datasets, data_sources))`` |
| 108 | + """ |
| 109 | + from feast.infra.online_stores.dynamodb import DynamoDBOnlineStore |
| 110 | + from tests.universal.feature_repos.integration_test_repo_config import ( |
| 111 | + IntegrationTestRepoConfig, |
| 112 | + ) |
| 113 | + from tests.universal.feature_repos.repo_configuration import ( |
| 114 | + construct_test_environment, |
| 115 | + construct_universal_test_data, |
| 116 | + ) |
| 117 | + from tests.universal.feature_repos.universal.data_sources.file import ( |
| 118 | + FileDataSourceCreator, |
| 119 | + ) |
| 120 | + |
| 121 | + # Set fake static credentials before any boto client is created. |
| 122 | + # These are accepted by DynamoDB Local regardless of validity. |
| 123 | + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "fakeaccesskey000000") |
| 124 | + monkeypatch.setenv( |
| 125 | + "AWS_SECRET_ACCESS_KEY", "fakesecretkey0000000000000000000000000000" |
| 126 | + ) |
| 127 | + monkeypatch.delenv("AWS_SESSION_TOKEN", raising=False) |
| 128 | + monkeypatch.delenv("AWS_SECURITY_TOKEN", raising=False) |
| 129 | + # Prevent IMDS from injecting real session tokens on EC2-backed runners. |
| 130 | + monkeypatch.setenv("AWS_EC2_METADATA_DISABLED", "true") |
| 131 | + # Disable the container credentials provider (ECS/EKS IAM roles). |
| 132 | + monkeypatch.delenv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", raising=False) |
| 133 | + monkeypatch.delenv("AWS_CONTAINER_CREDENTIALS_FULL_URI", raising=False) |
| 134 | + # Ensure no profile redirects boto to a different credential source. |
| 135 | + monkeypatch.delenv("AWS_PROFILE", raising=False) |
| 136 | + monkeypatch.delenv("AWS_DEFAULT_PROFILE", raising=False) |
| 137 | + |
| 138 | + # Reset class-level boto3 client caches so that no stale client from a |
| 139 | + # previous test in this worker bleeds into our isolated environment. |
| 140 | + DynamoDBOnlineStore._dynamodb_client = None |
| 141 | + DynamoDBOnlineStore._dynamodb_resource = None |
| 142 | + |
| 143 | + config = IntegrationTestRepoConfig( |
| 144 | + provider="local", |
| 145 | + offline_store_creator=FileDataSourceCreator, |
| 146 | + online_store_creator=_SharedDbDynamoDBOnlineStoreCreator, |
| 147 | + online_store=None, |
| 148 | + ) |
| 149 | + |
| 150 | + environment = construct_test_environment( |
| 151 | + config, |
| 152 | + fixture_request=None, |
| 153 | + worker_id=worker_id, |
| 154 | + ) |
| 155 | + environment.setup() |
| 156 | + |
| 157 | + # FileDataSourceCreator writes only local Parquet files — no AWS calls. |
| 158 | + universal_test_data = construct_universal_test_data(environment) |
| 159 | + |
| 160 | + # Eagerly initialise the aiobotocore client in *this* event loop so it |
| 161 | + # is guaranteed to point at our container and is reused throughout the |
| 162 | + # test body without lazy-init surprises. |
| 163 | + await environment.feature_store.initialize() |
| 164 | + |
| 165 | + yield environment, universal_test_data |
| 166 | + |
| 167 | + # Cleanly shut down the async client before the container disappears. |
| 168 | + await environment.feature_store.close() |
| 169 | + environment.teardown() |
| 170 | + |
| 171 | + # Flush class-level caches so the next test starts completely fresh. |
| 172 | + DynamoDBOnlineStore._dynamodb_client = None |
| 173 | + DynamoDBOnlineStore._dynamodb_resource = None |
0 commit comments