From 21508adfc2b3b5b7fd818c50951b80024aa4d8f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Cottin?= Date: Thu, 5 Jan 2017 02:02:49 +0100 Subject: [PATCH 1/4] add get_request_header --- haproxy/line.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/haproxy/line.py b/haproxy/line.py index 3d72d53..0d03a7a 100644 --- a/haproxy/line.py +++ b/haproxy/line.py @@ -166,8 +166,17 @@ def is_https(self): def get_ip(self): """Returns the IP provided on the log line.""" + ip = self.get_request_header() + if ip: + return ip + return None + + def get_request_header(self, pos = 0): + """Returns the 'pos' captured request header provided on the log line.""" if self.captured_request_headers is not None: - return self.captured_request_headers[1:-1] + headers = self.captured_request_headers[1:-1].split('|') + if len(headers) > pos and headers[pos]: + return headers[pos] return None def _parse_line(self, line): From 8606ba92531d4333a6060a38b71c1750d0d2f334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Cottin?= Date: Thu, 5 Jan 2017 02:05:57 +0100 Subject: [PATCH 2/4] add http hosts counter --- README.rst | 12 ++++++-- haproxy/line.py | 2 +- haproxy/logfile.py | 29 +++++++++++++++++- haproxy/tests/files/small.log | 4 +-- haproxy/tests/files/top_ips.log | 38 ++++++++++++------------ haproxy/tests/test_log_file.py | 52 ++++++++++++++++++++++++++++++++- haproxy/tests/test_log_line.py | 5 +++- 7 files changed, 115 insertions(+), 27 deletions(-) diff --git a/README.rst b/README.rst index 684d918..08084c2 100644 --- a/README.rst +++ b/README.rst @@ -107,8 +107,16 @@ See the ``--help`` (or the section above) to know how to run them. Something like: ``capture request header X-Forwarded-For len 20`` -``top_ips`` - Reports the 10 IPs with most requests (and the amount of requests). +``http_hosts_counter`` + Reports a breakdown of how many requests have been made per host header. + Note that for this to work you need to configure HAProxy to capture the header. + This must be the _second_ captured header. + Something like: + ``capture request header X-Forwarded-For len 20`` + ``capture request header Host len 30`` + +``top_http_hosts`` + Reports the 10 http hosts with most requests (and the amount of requests). ``status_codes_counter`` Reports a breakdown of how many requests per HTTP status code diff --git a/haproxy/line.py b/haproxy/line.py index 0d03a7a..1f7475f 100644 --- a/haproxy/line.py +++ b/haproxy/line.py @@ -169,7 +169,7 @@ def get_ip(self): ip = self.get_request_header() if ip: return ip - return None + return self.client_ip def get_request_header(self, pos = 0): """Returns the 'pos' captured request header provided on the log line.""" diff --git a/haproxy/logfile.py b/haproxy/logfile.py index f3e033d..c45bf61 100644 --- a/haproxy/logfile.py +++ b/haproxy/logfile.py @@ -164,7 +164,7 @@ def cmd_ip_counter(self): .. note:: To enable this command requests need to provide a header with the - forwarded IP (usually X-Forwarded-For) and be it the only header + forwarded IP (usually X-Forwarded-For) and be it the first header being captured. """ ip_counter = defaultdict(int) @@ -186,6 +186,33 @@ def cmd_top_ips(self): reverse=True ) + def cmd_http_hosts_counter(self): + """Reports a breakdown of how many requests have been made per http_host header. + + .. note:: + To enable this command requests need to provide the http host as + the second header being captured. + """ + host_counter = defaultdict(int) + for line in self._valid_lines: + host = line.get_request_header(1) + if host is not None: + host_counter[host] += 1 + return host_counter + + def cmd_top_http_hosts(self): + """Returns the top most frequent http hosts. + + .. note:: + See :meth:`.Log._sort_and_trim` for its current + limitations. + """ + return self._sort_and_trim( + self.cmd_http_hosts_counter(), + reverse=True + ) + + def cmd_status_codes_counter(self): """Generate statistics about HTTP status codes. 404, 500 and so on. """ diff --git a/haproxy/tests/files/small.log b/haproxy/tests/files/small.log index 2783df0..da140ce 100644 --- a/haproxy/tests/files/small.log +++ b/haproxy/tests/files/small.log @@ -3,7 +3,7 @@ Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [09/Dec/2013:10:01:04. Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [10/Dec/2013:12:03:06.205] loadbalancer default/instance3 0/133/0/94/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.124.123} "POST /hello HTTP/1.1" Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:10:01:04.205] loadbalancer default/instance2 0/133/0/1293/430 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.124} "GET /free HTTP/1.1" Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [09/Dec/2013:11:02:05.205] loadbalancer default/instance3 0/133/0/20095/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "HEAD /fra HTTP/1.1" -Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [10/Dec/2013:11:02:05.205] loadbalancer default/instance1 0/133/0/2936/430 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.124.124} "GET /world HTTP/1.1" +Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [10/Dec/2013:11:02:05.205] loadbalancer default/instance1 0/133/0/2936/430 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.124.124|myhost2} "GET /world HTTP/1.1" Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:12:03:06.205] loadbalancer default/instance1 0/133/0/4/437 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "POST /freitag HTTP/1.1" Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [09/Dec/2013:12:03:06.205] loadbalancer default/instance2 0/133/0/29408/430 200 17610 - - ---- 21/21/21/1/0 0/1 "GET /free HTTP/1.1" -Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:11:02:05.205] loadbalancer default/instance1 0/133/0/409/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "HEAD /hello HTTP/1.1" +Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:11:02:05.205] loadbalancer default/instance1 0/133/0/409/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123|myhost} "HEAD /hello HTTP/1.1" diff --git a/haproxy/tests/files/top_ips.log b/haproxy/tests/files/top_ips.log index 4e17170..09a92d1 100644 --- a/haproxy/tests/files/top_ips.log +++ b/haproxy/tests/files/top_ips.log @@ -1,17 +1,17 @@ -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.21} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.21|myhost9} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.22} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.23} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.23|myhost8} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.24} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.25} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.26} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.25|myhost7} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.26|myhost6} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.27} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.28} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.29} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.30} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.31} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.32} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.33} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.10} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.31|myhost3} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.32|myhost6} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.33|myhost6} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.10|myhost10} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.11} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.12} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.13} "GET / HTTP/1.1" @@ -27,19 +27,19 @@ Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0 Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.12} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.13} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.14} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.15} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.15|myhost3} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.16} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.17} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.18} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.19} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.18|myhost4} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.19|myhost5} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.10} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.10} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.11|myhost2} "GET / HTTP/1.1" Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.11} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.11} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.11} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.15} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.15} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.15} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.15} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.19} "GET / HTTP/1.1" -Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.19} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.11|myhost} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.15|myhost2} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.15|myhost} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.15|myhost} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.15|myhost} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.19|myhost2} "GET / HTTP/1.1" +Dec 9 12:00:03 loc ha[2]: 127.0.0.1:44 [11/Dec/2013:11:02:05.2] load d/srv1 0/0/0/0/0 404 17610 - - ---- 0/0/0/0/0 0/1 {1.1.1.19|myhost} "GET / HTTP/1.1" diff --git a/haproxy/tests/test_log_file.py b/haproxy/tests/test_log_file.py index ed6ed39..f8082cf 100644 --- a/haproxy/tests/test_log_file.py +++ b/haproxy/tests/test_log_file.py @@ -94,12 +94,24 @@ def test_cmd_ip_counter(self): ) ip_counter = log_file.cmd_ip_counter() - self.assertEqual(len(ip_counter), 4) + self.assertEqual(len(ip_counter), 5) + self.assertEqual(ip_counter['127.0.0.1'], 1) self.assertEqual(ip_counter['123.123.123.123'], 4) self.assertEqual(ip_counter['123.123.124.124'], 2) self.assertEqual(ip_counter['123.123.124.123'], 1) self.assertEqual(ip_counter['123.123.123.124'], 1) + def test_cmd_http_hosts_counter(self): + """Check that the http hosts counter command reports as expected.""" + log_file = Log( + logfile='haproxy/tests/files/small.log', + ) + hosts_counter = log_file.cmd_http_hosts_counter() + + self.assertEqual(len(hosts_counter), 2) + self.assertEqual(hosts_counter['myhost'], 1) + self.assertEqual(hosts_counter['myhost'], 1) + def test_cmd_status_codes(self): """Check that the status codes command reports as expected.""" log_file = Log( @@ -251,6 +263,44 @@ def test_cmd_top_ips(self): self.assertEqual(other_ips, []) + def test_cmd_top_http_hosts(self): + """Check that the top http hosts command reports as expected.""" + log_file = Log( + logfile='haproxy/tests/files/top_ips.log', + ) + top_hosts = log_file.cmd_top_http_hosts() + + self.assertEqual(len(top_hosts), 10) + self.assertEqual(top_hosts[0], ('myhost', 5)) + self.assertEqual(top_hosts[1], ('myhost2', 3)) + + # as the 3rd and 4th have the same repetitions their order is unknown + self.assertEqual(top_hosts[2][1], 3) + self.assertEqual(top_hosts[3][1], 2) + self.assertTrue(top_hosts[2][0] in ('myhost', 'myhost6')) + self.assertTrue(top_hosts[3][0] in ('myhost2', 'myhost3')) + + # the same as above for all the others + other_hosts = [ + 'myhost4', + 'myhost5', + 'myhost7', + 'myhost8', + 'myhost9', + 'myhost10', + ] + for host_info in top_hosts[4:]: + self.assertEqual(host_info[1], 1) + self.assertTrue(host_info[0] in other_hosts) + + # remove the other_ips to ensure all ips are there + for position, current in enumerate(other_hosts): + if current == host_info[0]: + del other_hosts[position] + break + + self.assertEqual(other_hosts, []) + def test_cmd_top_request_paths(self): """Check that the top request paths command reports as expected.""" log_file = Log( diff --git a/haproxy/tests/test_log_line.py b/haproxy/tests/test_log_line.py index 6cdfda1..e51c04d 100644 --- a/haproxy/tests/test_log_line.py +++ b/haproxy/tests/test_log_line.py @@ -43,7 +43,7 @@ def setUp(self): self.queue_server = 2 self.queue_backend = 67 - self.headers = ' {77.24.148.74}' + self.headers = ' {77.24.148.74|myhost}' self.http_request = 'GET /path/to/image HTTP/1.1' def _build_test_string(self): @@ -119,6 +119,9 @@ def test_default_values(self): log_line.captured_request_headers) self.assertEqual(None, log_line.captured_response_headers) + self.assertEqual(self.headers.strip()[1:-1].split('|')[1], + log_line.get_request_header(1)) + self.assertEqual(self.http_request, log_line.raw_http_request) self.assertTrue(log_line.valid) From 0887b0ba85b7b4356f25dfceb66801ff1ee2c217 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Cottin?= Date: Thu, 5 Jan 2017 02:16:33 +0100 Subject: [PATCH 3/4] add host filter --- README.rst | 3 +++ haproxy/filters.py | 13 +++++++++++++ haproxy/tests/test_filters.py | 19 +++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/README.rst b/README.rst index 08084c2..172ec25 100644 --- a/README.rst +++ b/README.rst @@ -179,6 +179,9 @@ are a way to reduce the amount of log lines to be processed. Filters log lines by the given IP range (all IPs that begin with the same prefix). +``host`` + Filters log lines by the given host header. + ``path`` Filters log lines by the given string. diff --git a/haproxy/filters.py b/haproxy/filters.py index 2f66375..2166f08 100644 --- a/haproxy/filters.py +++ b/haproxy/filters.py @@ -38,6 +38,19 @@ def filter_func(log_line): return filter_func +def filter_host(host): + """Filter :class:`.Line` objects by their request http host. + + :param host: http host header. + :type host: string + :returns: a function that filters by the provided host. + :rtype: function + """ + def filter_func(log_line): + return log_line.get_request_header(1) == host + + return filter_func + def filter_path(path): """Filter :class:`.Line` objects by their request path. diff --git a/haproxy/tests/test_filters.py b/haproxy/tests/test_filters.py index 7339f9c..4730548 100644 --- a/haproxy/tests/test_filters.py +++ b/haproxy/tests/test_filters.py @@ -37,6 +37,25 @@ def test_filter_ip_range(self): self.assertEqual(results, [True, False, True, ]) + def test_filter_host(self): + """Check that filter_host filter works as expected.""" + filter_func = filters.filter_host('myhost') + method = 'GET' + protocol = 'HTTP/1.1' + path = '/' + + hosts = ('myhost', 'yourhost', 'myhost2', ) + results = [] + for host in hosts: + self.headers = ' {{|{0}}} '.format(host) + self.http_request = '{0} {1} {2}'.format(method, path, protocol) + raw_line = self._build_test_string() + log_line = Line(raw_line) + + results.append(filter_func(log_line)) + + self.assertEqual(results, [True, False, False, ]) + def test_filter_path(self): """Check that filter_path filter works as expected.""" filter_func = filters.filter_path('/image') From d28252434c5bd94c10a75eb5df22d6c0817b9626 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Cottin?= Date: Thu, 5 Jan 2017 02:20:31 +0100 Subject: [PATCH 4/4] re-add wrongly deleted lines --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index 172ec25..152a0ab 100644 --- a/README.rst +++ b/README.rst @@ -107,6 +107,9 @@ See the ``--help`` (or the section above) to know how to run them. Something like: ``capture request header X-Forwarded-For len 20`` +``top_ips`` + Reports the 10 IPs with most requests (and the amount of requests). + ``http_hosts_counter`` Reports a breakdown of how many requests have been made per host header. Note that for this to work you need to configure HAProxy to capture the header.