Skip to content

Commit 268adda

Browse files
authored
Fix conversion of headers fields in Apify <--> Scrapy request translation (#182)
1 parent d88b21a commit 268adda

File tree

4 files changed

+30
-5
lines changed

4 files changed

+30
-5
lines changed

CHANGELOG.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
## [1.5.5](../../releases/tag/v1.5.5) - Unreleased
44

5-
...
5+
### Fixed
6+
7+
- Fix conversion of `headers` fields in Apify <--> Scrapy request translation
68

79
## [1.5.4](../../releases/tag/v1.5.4) - 2024-01-24
810

src/apify/scrapy/requests.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
try:
77
from scrapy import Request, Spider
8+
from scrapy.http.headers import Headers
89
from scrapy.utils.request import request_from_dict
910
except ImportError as exc:
1011
raise ImportError(
@@ -37,10 +38,16 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> dict:
3738
apify_request = {
3839
'url': scrapy_request.url,
3940
'method': scrapy_request.method,
40-
'headers': scrapy_request.headers,
4141
'userData': scrapy_request.meta.get('userData', {}),
4242
}
4343

44+
if isinstance(scrapy_request.headers, Headers):
45+
apify_request['headers'] = dict(scrapy_request.headers.to_unicode_dict())
46+
else:
47+
Actor.log.warning(
48+
f'scrapy_request.headers is not an instance of the scrapy.http.headers.Headers class, scrapy_request.headers = {scrapy_request.headers}',
49+
)
50+
4451
# Add 'id' to the apify_request
4552
if scrapy_request.meta.get('apify_request_id'):
4653
apify_request['id'] = scrapy_request.meta['apify_request_id']
@@ -129,7 +136,12 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
129136

130137
# Add optional 'headers' field
131138
if 'headers' in apify_request:
132-
scrapy_request.headers = apify_request['headers']
139+
if isinstance(apify_request['headers'], dict):
140+
scrapy_request.headers = Headers(apify_request['headers'])
141+
else:
142+
Actor.log.warning(
143+
f'apify_request[headers] is not an instance of the dict class, apify_request[headers] = {apify_request["headers"]}',
144+
)
133145

134146
# Add optional 'userData' field
135147
if 'userData' in apify_request:

tests/unit/scrapy/requests/test_to_apify_request.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import pytest
44
from scrapy import Request, Spider
5+
from scrapy.http.headers import Headers
56

67
from apify.scrapy.requests import to_apify_request
78

@@ -28,6 +29,15 @@ def test__to_apify_request__simple(spider: Spider) -> None:
2829
assert isinstance(user_data.get('scrapy_request'), str)
2930

3031

32+
def test__to_apify_request__headers(spider: Spider) -> None:
33+
scrapy_request_headers = Headers({'Authorization': 'Bearer access_token'})
34+
scrapy_request = Request(url='https://example.com', headers=scrapy_request_headers)
35+
36+
apify_request = to_apify_request(scrapy_request, spider)
37+
38+
assert apify_request['headers'] == dict(scrapy_request_headers.to_unicode_dict())
39+
40+
3141
def test__to_apify_request__without_id_and_unique_key(spider: Spider) -> None:
3242
scrapy_request = Request(
3343
url='https://example.com',

tests/unit/scrapy/requests/test_to_scrapy_request.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import pytest
66
from scrapy import Request, Spider
7+
from scrapy.http.headers import Headers
78

89
from apify.scrapy.requests import to_scrapy_request
910

@@ -54,7 +55,7 @@ def test__to_scrapy_request__without_reconstruction_with_optional_fields(spider:
5455
assert apify_request['method'] == scrapy_request.method
5556
assert apify_request['id'] == scrapy_request.meta.get('apify_request_id')
5657
assert apify_request['uniqueKey'] == scrapy_request.meta.get('apify_request_unique_key')
57-
assert apify_request['headers'] == scrapy_request.headers
58+
assert Headers(apify_request['headers']) == scrapy_request.headers
5859
assert apify_request['userData'] == scrapy_request.meta.get('userData')
5960

6061

@@ -101,7 +102,7 @@ def test__to_scrapy_request__with_reconstruction_with_optional_fields(spider: Sp
101102
assert apify_request['method'] == scrapy_request.method
102103
assert apify_request['id'] == scrapy_request.meta.get('apify_request_id')
103104
assert apify_request['uniqueKey'] == scrapy_request.meta.get('apify_request_unique_key')
104-
assert apify_request['headers'] == scrapy_request.headers
105+
assert Headers(apify_request['headers']) == scrapy_request.headers
105106
assert apify_request['userData'] == scrapy_request.meta.get('userData')
106107

107108

0 commit comments

Comments
 (0)