@@ -24,57 +24,57 @@ def _is_request_produced_by_middleware(scrapy_request: Request) -> bool:
24
24
return bool (scrapy_request .meta .get ('redirect_times' )) or bool (scrapy_request .meta .get ('retry_times' ))
25
25
26
26
27
- def to_apify_request (scrapy_request : Request , spider : Spider ) -> dict :
27
+ def to_apify_request (scrapy_request : Request , spider : Spider ) -> dict | None :
28
28
"""Convert a Scrapy request to an Apify request.
29
29
30
30
Args:
31
31
scrapy_request: The Scrapy request to be converted.
32
32
spider: The Scrapy spider that the request is associated with.
33
33
34
- Raises:
35
- TypeError: If the scrapy_request is not an instance of the scrapy.Request class.
36
-
37
34
Returns:
38
- The converted Apify request.
35
+ The converted Apify request if the conversion was successful, otherwise None .
39
36
"""
40
37
if not isinstance (scrapy_request , Request ):
41
- raise TypeError ('scrapy_request must be an instance of the scrapy.Request class' )
38
+ Actor .log .warning ('Failed to convert to Apify request: Scrapy request must be a Request instance.' )
39
+ return None
42
40
43
41
call_id = crypto_random_object_id (8 )
44
42
Actor .log .debug (f'[{ call_id } ]: to_apify_request was called (scrapy_request={ scrapy_request } )...' )
45
43
46
- apify_request = {
47
- 'url' : scrapy_request .url ,
48
- 'method' : scrapy_request .method ,
49
- 'userData' : scrapy_request .meta .get ('userData' , {}),
50
- }
44
+ try :
45
+ apify_request = {
46
+ 'url' : scrapy_request .url ,
47
+ 'method' : scrapy_request .method ,
48
+ 'userData' : scrapy_request .meta .get ('userData' , {}),
49
+ }
51
50
52
- if isinstance (scrapy_request .headers , Headers ):
53
- apify_request ['headers' ] = dict (scrapy_request .headers .to_unicode_dict ())
54
- else :
55
- Actor .log .warning (
56
- f'scrapy_request.headers is not an instance of the scrapy.http.headers.Headers class, scrapy_request.headers = { scrapy_request .headers } ' ,
57
- )
51
+ if isinstance (scrapy_request .headers , Headers ):
52
+ apify_request ['headers' ] = dict (scrapy_request .headers .to_unicode_dict ())
53
+ else :
54
+ Actor .log .warning (f'Invalid scrapy_request.headers type, not scrapy.http.headers.Headers: { scrapy_request .headers } ' )
58
55
59
- if _is_request_produced_by_middleware (scrapy_request ):
60
- apify_request ['uniqueKey' ] = scrapy_request .url
61
- else :
62
- # Add 'id' to the apify_request
63
- if scrapy_request .meta .get ('apify_request_id' ):
64
- apify_request ['id' ] = scrapy_request .meta ['apify_request_id' ]
65
-
66
- # Add 'uniqueKey' to the apify_request
67
- if scrapy_request .meta .get ('apify_request_unique_key' ):
68
- apify_request ['uniqueKey' ] = scrapy_request .meta ['apify_request_unique_key' ]
69
-
70
- # Serialize the Scrapy Request and store it in the apify_request.
71
- # - This process involves converting the Scrapy Request object into a dictionary, encoding it to base64,
72
- # and storing it as 'scrapy_request' within the 'userData' dictionary of the apify_request.
73
- # - The serialization process can be referenced at: https://stackoverflow.com/questions/30469575/.
74
- scrapy_request_dict = scrapy_request .to_dict (spider = spider )
75
- scrapy_request_dict_encoded = codecs .encode (pickle .dumps (scrapy_request_dict ), 'base64' ).decode ()
76
-
77
- apify_request ['userData' ]['scrapy_request' ] = scrapy_request_dict_encoded
56
+ if _is_request_produced_by_middleware (scrapy_request ):
57
+ apify_request ['uniqueKey' ] = scrapy_request .url
58
+ else :
59
+ # Add 'id' to the apify_request
60
+ if scrapy_request .meta .get ('apify_request_id' ):
61
+ apify_request ['id' ] = scrapy_request .meta ['apify_request_id' ]
62
+
63
+ # Add 'uniqueKey' to the apify_request
64
+ if scrapy_request .meta .get ('apify_request_unique_key' ):
65
+ apify_request ['uniqueKey' ] = scrapy_request .meta ['apify_request_unique_key' ]
66
+
67
+ # Serialize the Scrapy Request and store it in the apify_request.
68
+ # - This process involves converting the Scrapy Request object into a dictionary, encoding it to base64,
69
+ # and storing it as 'scrapy_request' within the 'userData' dictionary of the apify_request.
70
+ # - The serialization process can be referenced at: https://stackoverflow.com/questions/30469575/.
71
+ scrapy_request_dict = scrapy_request .to_dict (spider = spider )
72
+ scrapy_request_dict_encoded = codecs .encode (pickle .dumps (scrapy_request_dict ), 'base64' ).decode ()
73
+ apify_request ['userData' ]['scrapy_request' ] = scrapy_request_dict_encoded
74
+
75
+ except Exception as exc :
76
+ Actor .log .warning (f'Conversion of Scrapy request { scrapy_request } to Apify request failed; { exc } ' )
77
+ return None
78
78
79
79
Actor .log .debug (f'[{ call_id } ]: scrapy_request was converted to the apify_request={ apify_request } ' )
80
80
return apify_request
0 commit comments