Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tests/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def test_headers_raw_dict_none(self):
assert headers_raw_to_dict(None) is None
assert headers_dict_to_raw(None) is None

def test_headers_raw_dict_empty(self):
assert headers_raw_to_dict(b"") == {}
assert headers_dict_to_raw({}) == b""

def test_headers_raw_to_dict(self):
raw = b"Content-type: text/html\n\rAccept: gzip\n\r\
Cache-Control: no-cache\n\rCache-Control: no-store\n\n"
Expand Down
41 changes: 28 additions & 13 deletions w3lib/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from base64 import b64encode
from collections.abc import Mapping, MutableMapping, Sequence
from io import BytesIO
from typing import Any, Union, overload

from w3lib.util import to_bytes, to_unicode
Expand Down Expand Up @@ -44,21 +45,23 @@ def headers_raw_to_dict(headers_raw: bytes | None) -> HeadersDictOutput | None:

if headers_raw is None:
return None
headers = headers_raw.splitlines()
headers_tuples = [header.split(b":", 1) for header in headers]

if not headers_raw:
return {}

result_dict: HeadersDictOutput = {}
for header_item in headers_tuples:
if len(header_item) != 2:

for header in BytesIO(headers_raw):
key, sep, value = header.partition(b":")
if not sep:
continue

item_key = header_item[0].strip()
item_value = header_item[1].strip()
key, value = key.strip(), value.strip()

if item_key in result_dict:
result_dict[item_key].append(item_value)
if key in result_dict:
result_dict[key].append(value)
else:
result_dict[item_key] = [item_value]
result_dict[key] = [value]

return result_dict

Expand Down Expand Up @@ -93,13 +96,25 @@ def headers_dict_to_raw(headers_dict: HeadersDictInput | None) -> bytes | None:

if headers_dict is None:
return None
raw_lines = []

if not headers_dict:
return b""

parts = b""
Copy link
Contributor Author

@abebus abebus Jul 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Surprisingly, concatenating plain bytes is noticeably faster than using bytearray. I believe this is thanks to Python’s "new" adaptive interpreter

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, it's only faster with relatively small headers — see #247. I had incorrectly assumed that bytes would be faster due to adaptive interpreter optimizations. But now I believe the real reason is that strings and bytes don’t always recreate themselves on each concatenation, as explained here


for key, value in headers_dict.items():
if isinstance(value, bytes):
raw_lines.append(b": ".join([key, value]))
if parts:
parts += b"\r\n"
parts += key + b": " + value

elif isinstance(value, (list, tuple)):
raw_lines.extend(b": ".join([key, v]) for v in value)
return b"\r\n".join(raw_lines)
for v in value:
if parts:
parts += b"\r\n"
parts += key + b": " + v

return parts


def basic_auth_header(
Expand Down