From 2702e5deece2e830b11e445403b21c3ea1213bff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Sun, 20 Sep 2020 11:26:57 +0200 Subject: [PATCH 1/2] Implement httpbench --- README.md | 1 + bench.py | 18 ++++++++++++++++++ httpbench.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 httpbench.py diff --git a/README.md b/README.md index f951433..ad92785 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,7 @@ The spider `download.py`, dumps the response body as unicode to the files. The l bookworm Spider to scrape locally hosted site broadworm Broad crawl spider to scrape locally hosted sites cssbench Micro-benchmark for extraction using css + httpbench Scrapy HTTP download handler test itemloader Item loader benchmarker linkextractor Micro-benchmark for LinkExtractor() urlparseprofile Urlparse benchmarker diff --git a/bench.py b/bench.py index 0fe725c..5802f3b 100644 --- a/bench.py +++ b/bench.py @@ -168,6 +168,24 @@ def cssbench(obj): obj.vmprof) +@cli.command() +@click.pass_obj +def httpbench(obj): + """Scrapy HTTP download handler test""" + scrapy_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'execute.py') + settings = " ".join("-s '%s'" % s for s in obj.set) + arg = "%s runspider httpbench.py %s" % (scrapy_path, settings) + + calculator( + "HTTP Benchmark", + arg, + obj.n_runs, + obj.only_result, + obj.upload_result, + obj.vmprof + ) + + @cli.command() @click.pass_obj def xpathbench(obj): diff --git a/httpbench.py b/httpbench.py new file mode 100644 index 0000000..9812f8c --- /dev/null +++ b/httpbench.py @@ -0,0 +1,32 @@ +from datetime import datetime + +from scrapy import Request, Spider + + +class HTTPSpider(Spider): + """Spider equivalent to https://http1.golang.org/gophertiles + + Use the DOWNLOAD_HANDLERS setting to set the download handler to test. + """ + name = 'httpbench' + + def start_requests(self): + self.response_count = 0 + self.start_time = datetime.utcnow() + version = ( + '2' if '2' in self.settings.getwithbase('DOWNLOAD_HANDLERS')['https'] + else '1' + ) + for x in range(14): + for y in range(11): + yield Request( + f'https://http{version}.golang.org/gophertiles?x={x}&y={y}&latency=0' + ) + + def parse(self, response): + self.response_count += 1 + + def close(self, reason): + run_time = datetime.utcnow() - self.start_time + with open("Benchmark.txt", 'w') as f: + f.write(f"{self.response_count / run_time.total_seconds()}") From b682e7478d41dc6a40b3f09fdbf878c6dd5af847 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 21 Sep 2020 14:49:05 +0200 Subject: [PATCH 2/2] Support Python 2 and 3.5 --- httpbench.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/httpbench.py b/httpbench.py index 9812f8c..f557618 100644 --- a/httpbench.py +++ b/httpbench.py @@ -1,6 +1,7 @@ from datetime import datetime from scrapy import Request, Spider +from six import text_type class HTTPSpider(Spider): @@ -20,7 +21,12 @@ def start_requests(self): for x in range(14): for y in range(11): yield Request( - f'https://http{version}.golang.org/gophertiles?x={x}&y={y}&latency=0' + 'https://http{version}.golang.org/gophertiles?x={x}&y={y}&latency=0' + .format( + version=version, + x=x, + y=y, + ) ) def parse(self, response): @@ -29,4 +35,4 @@ def parse(self, response): def close(self, reason): run_time = datetime.utcnow() - self.start_time with open("Benchmark.txt", 'w') as f: - f.write(f"{self.response_count / run_time.total_seconds()}") + f.write(text_type(self.response_count / run_time.total_seconds()))