Skip to content

Commit 59a33b1

Browse files
committed
Add user_data test
1 parent 0ad37c0 commit 59a33b1

File tree

2 files changed

+37
-17
lines changed

2 files changed

+37
-17
lines changed

src/apify/storage_clients/_apify/_request_queue_single_client.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ async def _get_request(self, id: str) -> Request | None:
197197
if id in self._requests_cache:
198198
return self._requests_cache[id]
199199

200+
# Requests that were not added by this client are not in local cache. Fetch them from platform.
200201
response = await self._api_client.get_request(id)
201202

202203
if response is None:
@@ -206,13 +207,13 @@ async def _get_request(self, id: str) -> Request | None:
206207

207208
# Updated local caches
208209
if id in self._requests_in_progress:
209-
# Ignore requests that are already in progress, client is already aware of them.
210-
self._requests_already_handled.add(id)
210+
# No caching of requests that are already in progress, client is already aware of them.
211+
pass
211212
elif request.was_already_handled:
212213
# Cache only id for already handled requests
213214
self._requests_already_handled.add(id)
214215
else:
215-
# Cache full request for unhandled requests that are not yet in progress
216+
# Cache full request for unhandled requests that are not yet in progress and are not yet handled.
216217
self._requests_cache[id] = request
217218
return request
218219

@@ -268,16 +269,9 @@ async def _list_head(self) -> None:
268269
if request.was_already_handled:
269270
# Do not cache fully handled requests, we do not need them. Just cache their id.
270271
self._requests_already_handled.add(request_id)
271-
else:
272-
# Only fetch the request if we do not know it yet.
273-
if request_id not in self._requests_cache:
274-
complete_request_data = await self._api_client.get_request(request_id)
275-
request = Request.model_validate(complete_request_data)
276-
self._requests_cache[request_id] = request
277-
278-
# Add new requests to the end of the head, unless already present in head
279-
if request_id not in self._head_requests:
280-
self._head_requests.appendleft(request_id)
272+
# Add new requests to the end of the head, unless already present in head
273+
elif request_id not in self._head_requests:
274+
self._head_requests.appendleft(request_id)
281275

282276
async def mark_request_as_handled(self, request: Request) -> ProcessedRequest | None:
283277
"""Mark a request as handled after successful processing.

tests/integration/test_request_queue.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,6 +1193,7 @@ async def test_request_queue_has_stats(request_queue_apify: RequestQueue) -> Non
11931193

11941194

11951195
async def test_long_request(request_queue_apify: RequestQueue) -> None:
1196+
rq = request_queue_apify
11961197
request = Request.from_url(
11971198
'https://portal.isoss.gov.cz/irj/portal/anonymous/mvrest?path=/eosm-public-offer&officeLabels=%7B%7D&page=1&pageSize=100000&sortColumn=zdatzvsm&sortOrder=-1',
11981199
use_extended_unique_key=True,
@@ -1201,13 +1202,38 @@ async def test_long_request(request_queue_apify: RequestQueue) -> None:
12011202

12021203
request_id = unique_key_to_request_id(request.unique_key)
12031204

1204-
processed_request = await request_queue_apify.add_request(request)
1205+
processed_request = await rq.add_request(request)
12051206
assert processed_request.id == request_id
12061207

1207-
request_obtained = await request_queue_apify.fetch_next_request()
1208+
request_obtained = await rq.fetch_next_request()
12081209
assert request_obtained is not None
12091210

1210-
await request_queue_apify.mark_request_as_handled(request_obtained)
1211+
await rq.mark_request_as_handled(request_obtained)
12111212

1212-
is_finished = await request_queue_apify.is_finished()
1213+
is_finished = await rq.is_finished()
12131214
assert is_finished
1215+
1216+
1217+
async def test_pre_existing_request_with_user_data(
1218+
request_queue_apify: RequestQueue, apify_client_async: ApifyClientAsync
1219+
) -> None:
1220+
"""Test that pre-existing requests with user data are fully correctly.
1221+
1222+
list_head does not return user data, so we need to test that fetching unknown requests is not relying on it."""
1223+
custom_data = {'key': 'value'}
1224+
1225+
rq = request_queue_apify
1226+
request = Request.from_url(
1227+
'https://example.com',
1228+
user_data=custom_data,
1229+
)
1230+
1231+
# Add request by a different producer
1232+
rq_client = apify_client_async.request_queue(request_queue_id=rq.id)
1233+
await rq_client.add_request(request.model_dump(by_alias=True))
1234+
1235+
# Fetch the request by the client under test
1236+
request_obtained = await rq.fetch_next_request()
1237+
assert request_obtained is not None
1238+
# Test that custom_data is preserved in user_data (custom_data should be subset of obtained user_data)
1239+
assert custom_data.items() <= request_obtained.user_data.items()

0 commit comments

Comments
 (0)