Skip to content

Commit 9fe01e3

Browse files
author
Stefan Machmeier
committed
Add hashing of packets
1 parent affb01c commit 9fe01e3

File tree

12 files changed

+274
-61
lines changed

12 files changed

+274
-61
lines changed

example.py

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import matplotlib.pyplot as plt
22
import numpy as np
3+
from scapy.all import (TCP, Packet, RandIP, RandIP6, RandMAC, Raw, rdpcap,
4+
sniff, wrpcap)
35

46
from heifip.extractor import FIPExtractor
57
from heifip.images.flow import FlowImage
@@ -10,25 +12,31 @@
1012
from heifip.images.packet import PacketImage
1113
from heifip.layers import PacketProcessorType
1214

13-
extractor = FIPExtractor()
14-
imgs = extractor.create_image_from_file(
15-
"/home/smachmeier/data/USTC-TFC2016-master/2_Session/AllLayers/FTP-ALL/FTP.pcap.TCP_1-1-221-163_51670_1-2-197-24_21.pcap",
16-
PacketProcessorType.NONE,
17-
MarkovTransitionMatrixFlow,
18-
0, # min_image_dim
19-
0, # max_image_dim
20-
3, # min_packets
21-
0, # max_packets
22-
True, # remove_duplicates,
23-
8
24-
# 30, # dim
25-
# 0, # fill
26-
# True # auto_dim
27-
)
28-
i = 0
29-
for img in imgs:
30-
extractor.save_image(img, f"/home/smachmeier/Documents/projects/heiFIP/data/benign/{i}.png")
31-
i += 1
15+
# extractor = FIPExtractor()
16+
# imgs = extractor.create_image_from_file(
17+
# "/home/smachmeier/data/better-split-binary/benign/BitTorrent-0710.pcap",
18+
# PacketProcessorType.NONE,
19+
# MarkovTransitionMatrixFlow,
20+
# 0, # min_image_dim
21+
# 0, # max_image_dim
22+
# 3, # min_packets
23+
# 0, # max_packets
24+
# True, # remove_duplicates,
25+
# 8
26+
# # 30, # dim
27+
# # 0, # fill
28+
# # True # auto_dim
29+
# )
30+
# i = 0
31+
# for img in imgs:
32+
# extractor.save_image(img, f"/home/smachmeier/Documents/projects/heiFIP/data/benign/{i}.png")
33+
# i += 1
34+
pcap = sniff(offline="/home/smachmeier/data/test-data/")
35+
for pkt in pcap:
36+
# if Raw in pkt:
37+
# pkt[TCP].remove_payload()
38+
pkt.show()
39+
# wrpcap("/home/smachmeier/test.pcap", pkt, append=True)
3240

3341
# fig = plt.figure(figsize=(16, 16))
3442
# columns = 4

heifip/extractor.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
MarkovTransitionMatrixPacket)
1414
from heifip.images.packet import PacketImage
1515
from heifip.layers import PacketProcessor, PacketProcessorType
16+
from heifip.layers.packet import FIPPacket
1617

1718

1819
class FIPExtractor:
@@ -27,12 +28,12 @@ def verify(self, image, min_image_dim: int, max_image_dim: int, remove_duplicate
2728
if max_image_dim != 0 and (max_image_dim < image.shape[0] or max_image_dim < image.shape[1]):
2829
return False
2930

30-
if remove_duplicates:
31-
im_str = image.tobytes()
32-
if im_str in self.images_created:
33-
return False
34-
else:
35-
self.images_created.append(im_str)
31+
# if remove_duplicates:
32+
# im_str = image.tobytes()
33+
# if im_str in self.images_created:
34+
# return False
35+
# else:
36+
# self.images_created.append(im_str)
3637

3738
return True
3839

@@ -69,7 +70,7 @@ def create_image_from_file(
6970

7071
def create_image_from_packet(
7172
self,
72-
packets: [Packet],
73+
packets: [FIPPacket],
7374
preprocessing_type: PacketProcessorType = PacketProcessorType.NONE,
7475
image_type: NetworkTrafficImage = PacketImage,
7576
min_image_dim: int = 0,
@@ -98,7 +99,7 @@ def create_image_from_packet(
9899

99100
def __create_matrix(
100101
self,
101-
packets: [Packet],
102+
packets: [FIPPacket],
102103
preprocessing_type: PacketProcessorType = PacketProcessorType.NONE,
103104
image_type: NetworkTrafficImage = PacketImage,
104105
min_image_dim: int = 0,
@@ -179,7 +180,7 @@ def __create_matrix(
179180
return images
180181

181182
def save_image(self, img, output_dir):
182-
pil_img = PILImage.fromarray(self.convert(img, 0, 255, np.uint8))
183+
pil_img = PILImage.fromarray(img)
183184
if not os.path.exists(os.path.realpath(os.path.dirname(output_dir))):
184185
try:
185186
os.makedirs(os.path.realpath(os.path.dirname(output_dir)))

heifip/layers/__init__.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import os
2+
import pickle
23
from abc import ABC
34
from enum import Enum, unique
45
from typing import Type
56

6-
from scapy.all import (Packet, RandIP, RandIP6, RandMAC, Raw, rdpcap, sniff,
7-
wrpcap)
7+
import cryptography
8+
from scapy.all import (Packet, RandIP, RandIP6, RandMAC, Raw, load_layer,
9+
rdpcap, sniff, wrpcap)
810
from scapy.layers.dns import DNS
911
from scapy.layers.http import HTTP, HTTPRequest, HTTPResponse
1012
from scapy.layers.inet import IP, TCP, UDP, Ether
@@ -41,7 +43,11 @@ def __init__(
4143
self,
4244
file_extension="pcap",
4345
) -> None:
44-
pass
46+
self.hash_dict = set()
47+
# if os.path.isfile('hashes_pkt.pkl'):
48+
# with open('hashes_pkt.pkl', 'rb') as f:
49+
# self.hash_dict = pickle.load(f)
50+
load_layer("tls")
4551

4652
def write_packet(self) -> None:
4753
# Write pcap
@@ -53,16 +59,19 @@ def read_packets_file(self, file: str, preprocessing_type: PacketProcessorType)
5359
# Read PCAP file with Scapy
5460
packets = []
5561
# TODO Only read max number of packets
56-
pcap = sniff(offline=file)
62+
pcap = sniff(offline=file, count=64)
5763
for pkt in pcap:
5864
# Start preprocessing for each packet
5965
processed_packet = self.__preprocessing(pkt, preprocessing_type)
6066
# TODO Run extract here to reduce amount of loops in code. Atm very inefficient for computation time and memory
6167
# In case packet returns None
6268
if processed_packet != None:
63-
packets.append(processed_packet)
69+
if not processed_packet.hash in self.hash_dict:
70+
self.hash_dict.add(processed_packet.hash)
71+
packets.append(processed_packet)
6472
return packets
6573

74+
6675
def read_packets_packet(self, packet: [Packet], preprocessing_type: PacketProcessorType) -> [FIPPacket]:
6776
# Read PCAP file with Scapy
6877
packets = []
@@ -71,7 +80,9 @@ def read_packets_packet(self, packet: [Packet], preprocessing_type: PacketProces
7180
processed_packet = self.__preprocessing(pkt, preprocessing_type)
7281
# In case packet returns None
7382
if processed_packet != None:
74-
packets.append(processed_packet)
83+
if not processed_packet.hash in self.hash_dict:
84+
self.hash_dict.add(processed_packet.hash)
85+
packets.append(processed_packet)
7586
return packets
7687

7788
def __preprocessing(self, packet: Packet, preprocessing_type: PacketProcessorType) -> FIPPacket:
@@ -92,7 +103,7 @@ def __preprocessing(self, packet: Packet, preprocessing_type: PacketProcessorTyp
92103
elif Ether in fippacket.layer_map:
93104
fippacket = fippacket.convert(EtherPacket, fippacket)
94105

95-
if preprocessing_type == PacketProcessorType.HEADER:
106+
if preprocessing_type == "HEADER":
96107
fippacket.header_preprocessing()
97108

98109
return fippacket

heifip/layers/dns.py

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import hashlib
12
from typing import Type
23

34
from scapy.all import Packet
@@ -10,17 +11,18 @@
1011
class DNSPacket(TransportPacket):
1112
def __init__(self, packet: Packet, address_mapping={}, layer_map={}) -> None:
1213
TransportPacket.__init__(self, packet, address_mapping, layer_map)
14+
self.hash = hashlib.md5(f"{self.packet[DNS].qr}".encode('utf-8')).hexdigest()
1315

1416
def header_preprocessing(self):
1517
# TODO: Fix issue with DNS processing
16-
# if self.packet[DNS].qd:
17-
# self.__header_preprocessing_message_type(self.packet, "qd")
18-
# if self.packet[DNS].an:
19-
# self.__header_preprocessing_message_type(self.packet, "an")
20-
# if self.packet[DNS].ns:
21-
# self.__header_preprocessing_message_type(self.packet, "ns")
22-
# if self.packet[DNS].ar:
23-
# self.__header_preprocessing_message_type(self.packet, "ar")
18+
if self.packet[DNS].qd:
19+
self.__header_preprocessing_message_type(self.packet, "qd")
20+
if self.packet[DNS].an:
21+
self.__header_preprocessing_message_type(self.packet, "an")
22+
if self.packet[DNS].ns:
23+
self.__header_preprocessing_message_type(self.packet, "ns")
24+
if self.packet[DNS].ar:
25+
self.__header_preprocessing_message_type(self.packet, "ar")
2426

2527
layer_copy = self.packet[DNS]
2628

@@ -51,23 +53,19 @@ def __header_preprocessing_message_type(self, packet: Packet, message_type: str)
5153
if message_type == "qd":
5254
new_message = CustomDNSQR(qname=message.qname, qtype=message.qtype)
5355

54-
message = message.payload
55-
while message:
56+
while message:=message.payload:
5657
new_message /= CustomDNSQR(
5758
qname=message.qname,
5859
qtype=message.qtype,
5960
)
6061
else:
61-
if message_type != "ar":
62-
new_message = CustomDNSRR(
63-
rrname=message.rrname, type=message.type, ttl=message.ttl
64-
)
62+
new_message = CustomDNSRR(
63+
rrname=message.rrname, type=message.type
64+
)
6565

66-
message = message.payload
67-
while message:
68-
new_message /= CustomDNSRR(
69-
rrname=message.rrname, type=message.type, ttl=message.ttl
70-
)
66+
while message:=message.payload:
67+
new_message /= CustomDNSRR(
68+
rrname=message.rrname, type=message.type
69+
)
7170

72-
if message_type != "ar":
73-
setattr(packet[DNS], message_type, new_message)
71+
setattr(packet[DNS], message_type, new_message)

heifip/layers/http.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
import hashlib
12
from typing import Type
23

34
from scapy.all import Packet
4-
from scapy.layers.http import HTTPRequest, HTTPResponse
5+
from scapy.layers.http import HTTP, HTTPRequest, HTTPResponse
56

67
from heifip.layers.transport import TransportPacket
78
from heifip.plugins.header import (CustomHTTP, CustomHTTP_Request,
@@ -18,6 +19,10 @@ def header_preprocessing(self):
1819
class HTTPRequestPacket(HTTPPacket):
1920
def __init__(self, packet: Packet, address_mapping={}, layer_map={}):
2021
HTTPPacket.__init__(self, packet, address_mapping, layer_map)
22+
# self.hash = hashlib.md5(f"{self.packet[HTTPRequest].Path},{self.packet[HTTPRequest].Method},{self.packet[HTTPRequest].Accept}".encode('utf-8')).hexdigest()
23+
self.hash = hashlib.md5(f"{self.packet[HTTPRequest].Method},{self.packet[HTTPRequest].Accept}".encode('utf-8')).hexdigest()
24+
if Raw in self.layer_map:
25+
self.packet[HTTPRequest].remove_payload()
2126

2227
def header_preprocessing(self):
2328
layer_copy = self.packet[HTTPRequest]
@@ -44,6 +49,10 @@ def header_preprocessing(self):
4449
class HTTPResponsePacket(HTTPPacket):
4550
def __init__(self, packet: Packet, address_mapping={}, layer_map={}):
4651
HTTPPacket.__init__(self, packet, address_mapping, layer_map)
52+
# self.hash = hashlib.md5(f"{self.packet[HTTPResponse].Server},{self.packet[HTTPResponse].Status_Code},{self.packet[HTTPResponse].Connection}".encode('utf-8')).hexdigest()
53+
self.hash = hashlib.md5(f"{self.packet[HTTPResponse].Status_Code},{self.packet[HTTPResponse].Connection}".encode('utf-8')).hexdigest()
54+
if Raw in self.layer_map:
55+
self.packet[HTTPResponse].remove_payload()
4756

4857
def header_preprocessing(self):
4958
layer_copy = self.packet[HTTPResponse]

heifip/layers/ip.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1+
import hashlib
12
from typing import Type
23

3-
from scapy.all import Packet, RandIP, RandIP6
4-
from scapy.layers.inet import IP
4+
from scapy.all import Packet, RandIP, RandIP6, Raw
5+
from scapy.layers.http import HTTP
6+
from scapy.layers.inet import IP, TCP, UDP
57
from scapy.layers.inet6 import IPv6
8+
from scapy.layers.tls.all import TLS
69

710
from heifip.layers.packet import EtherPacket
811
from heifip.plugins.header import CustomIP, CustomIPv6
@@ -13,8 +16,18 @@ def __init__(self, packet: Packet, address_mapping={}, layer_map={}):
1316
EtherPacket.__init__(self, packet, address_mapping, layer_map)
1417
if IP in self.layer_map:
1518
self.__filter_ipv4()
19+
self.hash = hashlib.md5(f"{self.packet[IP].version},{self.packet[IP].flags},{self.packet[IP].proto}".encode('utf-8')).hexdigest()
20+
if TLS in self.layer_map and not (TCP in self.layer_map or UDP in self.layer_map):
21+
self.packet[IP].remove_payload()
22+
if Raw in self.layer_map and not (TCP in self.layer_map or UDP in self.layer_map or HTTP in self.layer_map):
23+
self.packet[IP].remove_payload()
1624
elif IPv6 in self.layer_map:
1725
self.__filter_ipv6()
26+
self.hash = hashlib.md5(f"{self.packet[IPv6].version},{self.packet[IPv6].tc},{self.packet[IPv6].hlim}".encode('utf-8')).hexdigest()
27+
if TLS in self.layer_map and not (TCP in self.layer_map or UDP in self.layer_map):
28+
self.packet[IPv6].remove_payload()
29+
if Raw in self.layer_map and not (TCP in self.layer_map or UDP in self.layer_map or HTTP in self.layer_map):
30+
self.packet[IPv6].remove_payload()
1831

1932
def __filter_ipv4(self):
2033
previous_src = self.packet[IP].src

heifip/layers/packet.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import hashlib
12
from typing import Type
23

34
from scapy.all import Packet, RandMAC
@@ -8,6 +9,7 @@ class FIPPacket:
89
def __init__(self, packet, address_mapping={}, layer_map={}):
910
self.address_mapping = address_mapping
1011
self.packet = packet
12+
self.hash = hashlib.md5().hexdigest()
1113

1214
if layer_map == {}:
1315
self.layer_map = self.__get_layers()

heifip/layers/transport.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import hashlib
12
from typing import Type
23

3-
from scapy.all import Packet
4+
from scapy.all import Packet, Raw
5+
from scapy.layers.http import HTTP, HTTPRequest, HTTPResponse
46
from scapy.layers.inet import TCP, UDP
7+
from scapy.layers.tls.all import TLS
58

69
from heifip.layers.ip import IPPacket
710
from heifip.plugins.header import CustomTCP, CustomUDP
@@ -10,6 +13,19 @@
1013
class TransportPacket(IPPacket):
1114
def __init__(self, packet: Packet, address_mapping={}, layer_map={}):
1215
IPPacket.__init__(self, packet, address_mapping, layer_map)
16+
if TCP in self.layer_map:
17+
self.hash = hashlib.md5(f"{self.packet[TCP].flags},{self.packet[TCP].options}".encode('utf-8')).hexdigest()
18+
if TLS in self.layer_map:
19+
self.packet[TCP].remove_payload()
20+
if Raw in self.layer_map and not HTTP in self.layer_map:
21+
self.packet[TCP].remove_payload()
22+
elif UDP in self.layer_map:
23+
self.hash = hashlib.md5(f"{self.packet[UDP].name}".encode('utf-8')).hexdigest()
24+
if TLS in self.layer_map:
25+
self.packet[UDP].remove_payload()
26+
if Raw in self.layer_map and not HTTP in self.layer_map:
27+
self.packet[UDP].remove_payload()
28+
1329

1430
def header_preprocessing(self):
1531
if TCP in self.layer_map:

heifip/main.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
from heifip.extractor import FIPExtractor
1414
from heifip.images.flow import FlowImage
1515

16+
import pickle
17+
1618

1719
class Runner:
1820
def __init__(self, thread_number) -> None:
@@ -79,3 +81,5 @@ def run(
7981
thread.start()
8082
file_queue.join()
8183
pbar.close()
84+
# with open('hashes_pkt.pkl', 'wb') as f:
85+
# pickle.dump(self.extractor.processor.hash_dict, f)

0 commit comments

Comments
 (0)