Skip to content

Commit 44a0676

Browse files
authored
HTTP 1.0: support HEAD in reconstruction (#4307)
1 parent df6eabe commit 44a0676

File tree

3 files changed

+50
-10
lines changed

3 files changed

+50
-10
lines changed

scapy/layers/http.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
Note that this layer ISN'T loaded by default, as quite experimental for now.
2020
2121
To follow HTTP packets streams = group packets together to get the
22-
whole request/answer, use ``TCPSession`` as:
22+
whole request/answer, use ``TCPSession`` as::
2323
2424
>>> sniff(session=TCPSession) # Live on-the-flow session
2525
>>> sniff(offline="./http_chunk.pcap", session=TCPSession) # pcap
@@ -28,14 +28,14 @@
2828
and will also decompress the packets when needed.
2929
Note: on failure, decompression will be ignored.
3030
31-
You can turn auto-decompression/auto-compression off with:
31+
You can turn auto-decompression/auto-compression off with::
3232
3333
>>> conf.contribs["http"]["auto_compression"] = False
3434
3535
(Defaults to True)
3636
"""
3737

38-
# This file is a modified version of the former scapy_http plugin.
38+
# This file is a rewritten version of the former scapy_http plugin.
3939
# It was reimplemented for scapy 2.4.3+ using sessions, stream handling.
4040
# Original Authors : Steeve Barbeau, Luca Invernizzi
4141

@@ -66,6 +66,12 @@
6666
except ImportError:
6767
_is_brotli_available = False
6868

69+
try:
70+
import lzw
71+
_is_lzw_available = True
72+
except ImportError:
73+
_is_lzw_available = False
74+
6975
try:
7076
import zstandard
7177
_is_zstd_available = True
@@ -312,8 +318,13 @@ def post_dissect(self, s):
312318
elif "gzip" in encodings:
313319
s = gzip.decompress(s)
314320
elif "compress" in encodings:
315-
import lzw
316-
s = lzw.decompress(s)
321+
if _is_lzw_available:
322+
s = lzw.decompress(s)
323+
else:
324+
log_loading.info(
325+
"Can't import lzw. compress decompression "
326+
"will be ignored !"
327+
)
317328
elif "br" in encodings:
318329
if _is_brotli_available:
319330
s = brotli.decompress(s)
@@ -351,8 +362,13 @@ def post_build(self, pkt, pay):
351362
elif "gzip" in encodings:
352363
pay = gzip.compress(pay)
353364
elif "compress" in encodings:
354-
import lzw
355-
pay = lzw.compress(pay)
365+
if _is_lzw_available:
366+
pay = lzw.compress(pay)
367+
else:
368+
log_loading.info(
369+
"Can't import lzw. compress compression "
370+
"will be ignored !"
371+
)
356372
elif "br" in encodings:
357373
if _is_brotli_available:
358374
pay = brotli.compress(pay)
@@ -589,14 +605,22 @@ def dispatch_hook(cls, _pkt=None, *args, **kargs):
589605
def tcp_reassemble(cls, data, metadata, _):
590606
detect_end = metadata.get("detect_end", None)
591607
is_unknown = metadata.get("detect_unknown", True)
608+
# General idea of the following is explained at
609+
# https://datatracker.ietf.org/doc/html/rfc2616#section-4.4
592610
if not detect_end or is_unknown:
593611
metadata["detect_unknown"] = False
594612
http_packet = cls(data)
595613
# Detect packing method
596614
if not isinstance(http_packet.payload, _HTTPContent):
597615
return http_packet
616+
is_response = isinstance(http_packet.payload, cls.clsresp)
617+
# Packets may have a Content-Length we must honnor
598618
length = http_packet.Content_Length
599-
if length is not None:
619+
# Heuristic to try and detect instant HEAD responses, as those include a
620+
# Content-Length that must not be honored.
621+
if is_response and data.endswith(b"\r\n\r\n"):
622+
detect_end = lambda _: True
623+
elif length is not None:
600624
# The packet provides a Content-Length attribute: let's
601625
# use it. When the total size of the frags is high enough,
602626
# we have the packet
@@ -613,11 +637,10 @@ def tcp_reassemble(cls, data, metadata, _):
613637
# It's not Content-Length based. It could be chunked
614638
encodings = http_packet[cls].payload._get_encodings()
615639
chunked = ("chunked" in encodings)
616-
is_response = isinstance(http_packet.payload, cls.clsresp)
617640
if chunked:
618641
detect_end = lambda dat: dat.endswith(b"0\r\n\r\n")
619642
# HTTP Requests that do not have any content,
620-
# end with a double CRLF
643+
# end with a double CRLF. Same for HEAD responses
621644
elif isinstance(http_packet.payload, cls.clsreq):
622645
detect_end = lambda dat: dat.endswith(b"\r\n\r\n")
623646
# In case we are handling a HTTP Request,

test/pcaps/http_head.pcapng.gz

952 Bytes
Binary file not shown.

test/scapy/layers/http.uts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,23 @@ assert HTTPResponse in pkt
6868
print(pkt[Raw].load, expected_data)
6969
assert pkt[Raw].load == expected_data
7070

71+
= TCPSession - dissect HTTP 1.0 HEAD response
72+
~ http
73+
74+
load_layer("http")
75+
76+
a = sniff(offline=scapy_path("/test/pcaps/http_head.pcapng.gz"), session=TCPSession)
77+
78+
assert HTTPRequest in a[3]
79+
assert a[3].Method == b"HEAD"
80+
assert a[3].User_Agent == b'curl/7.88.1'
81+
82+
assert HTTPResponse in a[5]
83+
assert a[5].Content_Type == b'text/html; charset=UTF-8'
84+
assert a[5].Expires == b'Mon, 01 Apr 2024 22:25:38 GMT'
85+
assert a[5].Reason_Phrase == b'Moved Permanently'
86+
assert a[5].X_Frame_Options == b"SAMEORIGIN"
87+
7188
= HTTP decompression (gzip)
7289

7390
conf.debug_dissector = True

0 commit comments

Comments
 (0)