From bac0a72cf7816275884f1753fc8fa7f99b74744a Mon Sep 17 00:00:00 2001 From: M Pacer Date: Tue, 28 Nov 2017 20:40:53 -0800 Subject: [PATCH 01/11] use makedirs rather than try/except; handles all mkdir -p functionality --- allofplos/plos_corpus.py | 15 +++------------ allofplos/samples/corpus_analysis.py | 5 +---- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/allofplos/plos_corpus.py b/allofplos/plos_corpus.py index a1da70d9..45baed0f 100644 --- a/allofplos/plos_corpus.py +++ b/allofplos/plos_corpus.py @@ -218,10 +218,7 @@ def repo_download(dois, tempdir, ignore_existing=True, plos_network=False): :param ignore_existing: Don't re-download to tempdir if already downloaded """ # make temporary directory, if needed - try: - os.mkdir(tempdir) - except FileExistsError: - pass + os.makedirs(tempdir, exist_ok=True) if ignore_existing: existing_articles = [filename_to_doi(file) for file in listdir_nohidden(tempdir)] @@ -423,10 +420,7 @@ def download_updated_xml(article_file, :return: boolean for whether update was available & downloaded """ doi = filename_to_doi(article_file) - try: - os.mkdir(tempdir) - except FileExistsError: - pass + os.makedirs(tempdir, exist_ok=True) url = URL_TMP.format(doi) articletree_remote = et.parse(url) articleXML_remote = et.tostring(articletree_remote, method='xml', encoding='unicode') @@ -693,10 +687,7 @@ def remote_proofs_direct_check(tempdir=newarticledir, article_list=None, plos_ne :param article-list: list of uncorrected proofs to check for updates. :return: list of all articles with updated vor """ - try: - os.mkdir(tempdir) - except FileExistsError: - pass + os.makedirs(tempdir, exist_ok=True) proofs_download_list = [] if article_list is None: article_list = get_uncorrected_proofs_list() diff --git a/allofplos/samples/corpus_analysis.py b/allofplos/samples/corpus_analysis.py index 0b51541d..b1309731 100644 --- a/allofplos/samples/corpus_analysis.py +++ b/allofplos/samples/corpus_analysis.py @@ -290,10 +290,7 @@ def revisiondate_sanity_check(article_list=None, tempdir=newarticledir, director article_list = sorted(pubdates, key=pubdates.__getitem__, reverse=True) article_list = article_list[:30000] - try: - os.mkdir(tempdir) - except FileExistsError: - pass + os.makedirs(tempdir, exist_ok=True) articles_different_list = [] max_value = len(article_list) bar = progressbar.ProgressBar(redirect_stdout=True, max_value=max_value) From fcb8d9b51f45158b53efe52a858c7ddb45336c77 Mon Sep 17 00:00:00 2001 From: M Pacer Date: Wed, 29 Nov 2017 00:25:30 -0800 Subject: [PATCH 02/11] Changing other pattern of os.mkdir with a print statement if not present to os.makedirs --- allofplos/plos_corpus.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/allofplos/plos_corpus.py b/allofplos/plos_corpus.py index 45baed0f..c5717197 100644 --- a/allofplos/plos_corpus.py +++ b/allofplos/plos_corpus.py @@ -857,9 +857,7 @@ def create_local_plos_corpus(corpusdir=corpusdir, rm_metadata=True): :param rm_metadata: COMPLETE HERE :return: None """ - if os.path.isdir(corpusdir) is False: - os.mkdir(corpusdir) - print('Creating folder for article xml') + os.makedirs(tempdir, exist_ok=True) zip_date, zip_size, metadata_path = get_zip_metadata() zip_path = download_file_from_google_drive(zip_id, local_zip, file_size=zip_size) unzip_articles(file_path=zip_path) @@ -876,9 +874,7 @@ def create_test_plos_corpus(corpusdir=corpusdir): :param corpusdir: directory where the corpus is to be downloaded and extracted :return: None """ - if os.path.isdir(corpusdir) is False: - os.mkdir(corpusdir) - print('Creating folder for article xml') + os.makedirs(tempdir, exist_ok=True) zip_path = download_file_from_google_drive(test_zip_id, local_test_zip) unzip_articles(file_path=zip_path, extract_directory=corpusdir) From 17ee2e3ae0340bb376043bf6b8f8a5fd627a08f4 Mon Sep 17 00:00:00 2001 From: M Pacer Date: Wed, 29 Nov 2017 12:48:30 -0800 Subject: [PATCH 03/11] add class method from_bytes for creating articles from byte strings --- allofplos/article_class.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/allofplos/article_class.py b/allofplos/article_class.py index 864baf6d..d78aa038 100644 --- a/allofplos/article_class.py +++ b/allofplos/article_class.py @@ -2,9 +2,12 @@ import re import subprocess +from io import BytesIO + import lxml.etree as et import requests + from allofplos.transformations import (filename_to_doi, EXT_URL_TMP, INT_URL_TMP, BASE_URL_ARTICLE_LANDING_PAGE) from allofplos.plos_regex import (validate_doi, corpusdir) @@ -1097,3 +1100,27 @@ def from_filename(cls, filename): """Initiate an article object using a local XML file. """ return cls(filename_to_doi(filename)) + + @classmethod + def from_bytes(cls, resp, directory=corpusdir, write=False, overwrite=True): + tree = et.parse(BytesIO(resp)) + root = tree.getroot() + tag_path = ["/", + "article", + "front", + "article-meta", + "article-id"] + tag_location = '/'.join(tag_path) + article_ids = root.xpath(tag_location) + for art_id in article_ids: + if art_id.get('pub-id-type')=='doi': + temp = cls(art_id.text, directory=directory) + temp._tree = tree + if write and (not os.path.isfile(temp.filename) or overwrite): + with open(temp.filename, 'w') as file: + file.write(et.tostring(tree, method='xml', encoding='unicode')) + break + return temp + + + From 38be2b1bdb8ded52be419d9004f201e9f245bfef Mon Sep 17 00:00:00 2001 From: M Pacer Date: Wed, 29 Nov 2017 12:49:36 -0800 Subject: [PATCH 04/11] basic async file getting utilities with timing comparisons --- fetch_test.py | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 fetch_test.py diff --git a/fetch_test.py b/fetch_test.py new file mode 100644 index 00000000..673da9c5 --- /dev/null +++ b/fetch_test.py @@ -0,0 +1,118 @@ +import asyncio +import aiohttp +import requests +import time +import os +import shutil + + +import lxml.etree as et +from timeit import default_timer + +from allofplos.plos_corpus import listdir_nohidden +from allofplos.plos_regex import ALLOFPLOS_DIR_PATH, corpusdir +from allofplos.transformations import URL_TMP, url_to_doi +from allofplos.samples.corpus_analysis import get_all_local_dois +from allofplos import Article + +begin_time = default_timer() + +ASYNC_DIRECTORY = os.path.join(ALLOFPLOS_DIR_PATH, "async_test") +MIN_FILES = 9990 +NUM_FILES = 10 + +async def fetch(url, session): + """Fetch a url, using specified ClientSession.""" + fetch.start_time[url] = default_timer() + async with session.get(url) as response: + resp = await response.read() + article = Article.from_bytes(resp, + directory=ASYNC_DIRECTORY, + write=True, + overwrite=True) + now = default_timer() + elapsed = now - fetch.start_time[url] + # print('{0:5.2f} {1:30}{2:5.2} '.format(now, url, elapsed)) + return article + +async def fetch_all(dois, max_rate=1.0, limit_per_host=3.0): + """Launch requests for all web pages.""" + tasks = [] + fetch.start_time = dict() # dictionary of start times for each url + conn = aiohttp.TCPConnector(limit_per_host=limit_per_host) + async with aiohttp.ClientSession(connector=conn) as session: + for doi in dois: + await asyncio.sleep(max_rate) # ensures no more requests than max_rate per second + task = asyncio.ensure_future( + fetch(URL_TMP.format(doi), session)) + tasks.append(task) # create list of tasks + + first_batch = await asyncio.gather(*tasks) # gather task responses + corrected_dois = [article.related_doi + for article in first_batch + if article.type_=="correction"] + for doi in corrected_dois: + await asyncio.sleep(max_rate) # ensures no more requests than max_rate per second + task = asyncio.ensure_future( + fetch(URL_TMP.format(doi), session)) + tasks.append(task) # create list of tasks + + second_batch = await asyncio.gather(*tasks) # gather task responses + + + # -------------- TOTAL SECONDS: 178.59 + +def sequential_fetch(doi): + "Fetch individual web pages as part of a sequence" + url = URL_TMP.format(doi) + response = requests.get(url) + time.sleep(1) + article = Article.from_bytes(response.text.encode('utf-8'), + directory=ASYNC_DIRECTORY, + write=True) + return article + +def demo_sequential(dois): + """Fetch list of web pages sequentially.""" + handle_dir() + start_time = default_timer() + for doi in dois: + start_time_url = default_timer() + article = sequential_fetch(doi) + now = default_timer() + elapsed = now - start_time_url + if article.type_ == "correction": + new_article = sequential_fetch(article.related_doi) + + # print('{0:5.2f} {1:30}{2:5.2f} '.format(now, url, elapsed)) + + tot_elapsed = default_timer() - start_time + print(' TOTAL SECONDS: '.rjust(30, '-') + '{0:5.2f} '. \ + format(tot_elapsed, '\n')) + + +def demo_async(dois): + handle_dir() + start_time = default_timer() + loop = asyncio.get_event_loop() # event loop + future = asyncio.ensure_future(fetch_all(dois)) # tasks to do + loop.run_until_complete(future) # loop until done + loop.run_until_complete(asyncio.sleep(0)) + loop.close() + tot_elapsed = default_timer() - start_time + print(' TOTAL SECONDS: '.rjust(30, '-') + '{0:5.2f} '. \ + format(tot_elapsed, '\n')) + +def main(): + + dois = get_all_local_dois(corpusdir)[MIN_FILES:MIN_FILES+NUM_FILES] + + demo_sequential(dois) + demo_async(dois) + +def handle_dir(): + if os.path.isdir(ASYNC_DIRECTORY): + shutil.rmtree(ASYNC_DIRECTORY) + os.makedirs(ASYNC_DIRECTORY, exist_ok=True) +if __name__ == '__main__': + main() From 0eca201bf43a7a66e2570ad57301ddfd5e5159a9 Mon Sep 17 00:00:00 2001 From: M Pacer Date: Wed, 29 Nov 2017 13:26:53 -0800 Subject: [PATCH 05/11] require python 3.5 or greater --- setup.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 00c25d96..8152ab80 100644 --- a/setup.py +++ b/setup.py @@ -3,9 +3,9 @@ import sys if sys.version_info.major < 3: - sys.exit('Sorry, Python < 3.4 is not supported') -elif sys.version_info.minor < 4: - sys.exit('Sorry, Python < 3.4 is not supported') + sys.exit('Sorry, Python < 3.5 is not supported') +elif sys.version_info.minor < 5: + sys.exit('Sorry, Python < 3.5 is not supported') here = path.abspath(path.dirname(__file__)) @@ -27,7 +27,6 @@ 'Intended Audience :: Science/Research', 'Topic :: Scientific/Engineering', 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', ], @@ -50,7 +49,7 @@ 'tqdm==4.17.1', 'urllib3==1.22', ], - python_requires='>=3.4', + python_requires='>=3.5', # If there are data files included in your packages that need to be # installed, specify them here. If using Python 2.6 or less, then these # have to be included in MANIFEST.in as well. From 5cdd28f015836365df98780657b15c9c71227282 Mon Sep 17 00:00:00 2001 From: M Pacer Date: Wed, 29 Nov 2017 13:29:22 -0800 Subject: [PATCH 06/11] add MIN_DELAY as configurable for testing on plos network --- fetch_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fetch_test.py b/fetch_test.py index 673da9c5..652ba521 100644 --- a/fetch_test.py +++ b/fetch_test.py @@ -17,9 +17,10 @@ begin_time = default_timer() +MIN_DELAY = 1.0 ASYNC_DIRECTORY = os.path.join(ALLOFPLOS_DIR_PATH, "async_test") MIN_FILES = 9990 -NUM_FILES = 10 +NUM_FILES = 10 async def fetch(url, session): """Fetch a url, using specified ClientSession.""" @@ -35,7 +36,7 @@ async def fetch(url, session): # print('{0:5.2f} {1:30}{2:5.2} '.format(now, url, elapsed)) return article -async def fetch_all(dois, max_rate=1.0, limit_per_host=3.0): +async def fetch_all(dois, max_rate=MIN_DELAY, limit_per_host=3.0): """Launch requests for all web pages.""" tasks = [] fetch.start_time = dict() # dictionary of start times for each url @@ -66,7 +67,7 @@ def sequential_fetch(doi): "Fetch individual web pages as part of a sequence" url = URL_TMP.format(doi) response = requests.get(url) - time.sleep(1) + time.sleep(MIN_DELAY) article = Article.from_bytes(response.text.encode('utf-8'), directory=ASYNC_DIRECTORY, write=True) From f85990dcf2de29c0c0b521e711411189e479507a Mon Sep 17 00:00:00 2001 From: M Pacer Date: Wed, 29 Nov 2017 13:33:33 -0800 Subject: [PATCH 07/11] move fetch test inside async_utils package --- allofplos/async_utils/__init__.py | 0 fetch_test.py => allofplos/async_utils/fetch_test.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 allofplos/async_utils/__init__.py rename fetch_test.py => allofplos/async_utils/fetch_test.py (100%) diff --git a/allofplos/async_utils/__init__.py b/allofplos/async_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/fetch_test.py b/allofplos/async_utils/fetch_test.py similarity index 100% rename from fetch_test.py rename to allofplos/async_utils/fetch_test.py From 07ffabcd44d03315fc91a4983d3e73029b5c0f09 Mon Sep 17 00:00:00 2001 From: M Pacer Date: Wed, 29 Nov 2017 13:39:39 -0800 Subject: [PATCH 08/11] open correct directories with makedirs --- allofplos/plos_corpus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/allofplos/plos_corpus.py b/allofplos/plos_corpus.py index c5717197..1093da6b 100644 --- a/allofplos/plos_corpus.py +++ b/allofplos/plos_corpus.py @@ -857,7 +857,7 @@ def create_local_plos_corpus(corpusdir=corpusdir, rm_metadata=True): :param rm_metadata: COMPLETE HERE :return: None """ - os.makedirs(tempdir, exist_ok=True) + os.makedirs(corpusdir, exist_ok=True) zip_date, zip_size, metadata_path = get_zip_metadata() zip_path = download_file_from_google_drive(zip_id, local_zip, file_size=zip_size) unzip_articles(file_path=zip_path) @@ -874,7 +874,7 @@ def create_test_plos_corpus(corpusdir=corpusdir): :param corpusdir: directory where the corpus is to be downloaded and extracted :return: None """ - os.makedirs(tempdir, exist_ok=True) + os.makedirs(corpusdir, exist_ok=True) zip_path = download_file_from_google_drive(test_zip_id, local_test_zip) unzip_articles(file_path=zip_path, extract_directory=corpusdir) From 7e38b3ec85c4d558203ae6586c27e3555ea9fddb Mon Sep 17 00:00:00 2001 From: M Pacer Date: Thu, 30 Nov 2017 18:15:25 -0800 Subject: [PATCH 09/11] build two directories so that they can be compared --- allofplos/async_utils/fetch_test.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/allofplos/async_utils/fetch_test.py b/allofplos/async_utils/fetch_test.py index 652ba521..8ffca305 100644 --- a/allofplos/async_utils/fetch_test.py +++ b/allofplos/async_utils/fetch_test.py @@ -18,7 +18,8 @@ begin_time = default_timer() MIN_DELAY = 1.0 -ASYNC_DIRECTORY = os.path.join(ALLOFPLOS_DIR_PATH, "async_test") +ASYNC_DIRECTORY = os.path.join(ALLOFPLOS_DIR_PATH, "async_test_dir") +SYNC_DIRECTORY = os.path.join(ALLOFPLOS_DIR_PATH, "sync_test_dir") MIN_FILES = 9990 NUM_FILES = 10 @@ -75,7 +76,7 @@ def sequential_fetch(doi): def demo_sequential(dois): """Fetch list of web pages sequentially.""" - handle_dir() + recreate_dir(SYNC_DIRECTORY) start_time = default_timer() for doi in dois: start_time_url = default_timer() @@ -93,7 +94,7 @@ def demo_sequential(dois): def demo_async(dois): - handle_dir() + recreate_dir(ASYNC_DIRECTORY) start_time = default_timer() loop = asyncio.get_event_loop() # event loop future = asyncio.ensure_future(fetch_all(dois)) # tasks to do @@ -103,17 +104,19 @@ def demo_async(dois): tot_elapsed = default_timer() - start_time print(' TOTAL SECONDS: '.rjust(30, '-') + '{0:5.2f} '. \ format(tot_elapsed, '\n')) + +def recreate_dir(directory): + """Removes and recreates the directory. + """ + if os.path.isdir(directory): + shutil.rmtree(directory) + os.makedirs(directory, exist_ok=True) def main(): dois = get_all_local_dois(corpusdir)[MIN_FILES:MIN_FILES+NUM_FILES] - demo_sequential(dois) demo_async(dois) -def handle_dir(): - if os.path.isdir(ASYNC_DIRECTORY): - shutil.rmtree(ASYNC_DIRECTORY) - os.makedirs(ASYNC_DIRECTORY, exist_ok=True) if __name__ == '__main__': main() From 4b18b03db2c20e3bd0fe0d6a571d0e8deaa12648 Mon Sep 17 00:00:00 2001 From: M Pacer Date: Thu, 30 Nov 2017 18:18:04 -0800 Subject: [PATCH 10/11] make the inner functions accept dois (not urls) --- allofplos/async_utils/fetch_test.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/allofplos/async_utils/fetch_test.py b/allofplos/async_utils/fetch_test.py index 8ffca305..3a712e71 100644 --- a/allofplos/async_utils/fetch_test.py +++ b/allofplos/async_utils/fetch_test.py @@ -23,9 +23,10 @@ MIN_FILES = 9990 NUM_FILES = 10 -async def fetch(url, session): """Fetch a url, using specified ClientSession.""" fetch.start_time[url] = default_timer() +async def fetch(doi, session): + url = URL_TMP.format(doi) async with session.get(url) as response: resp = await response.read() article = Article.from_bytes(resp, @@ -45,8 +46,7 @@ async def fetch_all(dois, max_rate=MIN_DELAY, limit_per_host=3.0): async with aiohttp.ClientSession(connector=conn) as session: for doi in dois: await asyncio.sleep(max_rate) # ensures no more requests than max_rate per second - task = asyncio.ensure_future( - fetch(URL_TMP.format(doi), session)) + task = asyncio.ensure_future(fetch(doi, session)) tasks.append(task) # create list of tasks first_batch = await asyncio.gather(*tasks) # gather task responses @@ -55,8 +55,7 @@ async def fetch_all(dois, max_rate=MIN_DELAY, limit_per_host=3.0): if article.type_=="correction"] for doi in corrected_dois: await asyncio.sleep(max_rate) # ensures no more requests than max_rate per second - task = asyncio.ensure_future( - fetch(URL_TMP.format(doi), session)) + task = asyncio.ensure_future(fetch(doi, session)) tasks.append(task) # create list of tasks second_batch = await asyncio.gather(*tasks) # gather task responses From a4a946c4c54e8e3731dc0ff712c52f14a1725fd5 Mon Sep 17 00:00:00 2001 From: M Pacer Date: Thu, 30 Nov 2017 18:26:15 -0800 Subject: [PATCH 11/11] doc improvements and a bunch of cleanup --- allofplos/async_utils/fetch_test.py | 62 +++++++++++++++++++---------- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/allofplos/async_utils/fetch_test.py b/allofplos/async_utils/fetch_test.py index 3a712e71..c21a200d 100644 --- a/allofplos/async_utils/fetch_test.py +++ b/allofplos/async_utils/fetch_test.py @@ -15,17 +15,22 @@ from allofplos.samples.corpus_analysis import get_all_local_dois from allofplos import Article -begin_time = default_timer() +MIN_DELAY = 1.0 # minimum for wait before beginning the next http-request (in s) +MIN_FILES = 9990 # index of the files to start with +NUM_FILES = 10 # how many files do you process -MIN_DELAY = 1.0 ASYNC_DIRECTORY = os.path.join(ALLOFPLOS_DIR_PATH, "async_test_dir") SYNC_DIRECTORY = os.path.join(ALLOFPLOS_DIR_PATH, "sync_test_dir") -MIN_FILES = 9990 -NUM_FILES = 10 - """Fetch a url, using specified ClientSession.""" - fetch.start_time[url] = default_timer() async def fetch(doi, session): + """Given a doi, fetch the associated url, using the given asynchronous + session (a ClientSession) as a context manager. + + Returns the article created by transforming the content of the response. + + NB: This needs to do better error handling if the url fails or points to an + invalid xml file. + """ url = URL_TMP.format(doi) async with session.get(url) as response: resp = await response.read() @@ -33,15 +38,18 @@ async def fetch(doi, session): directory=ASYNC_DIRECTORY, write=True, overwrite=True) - now = default_timer() - elapsed = now - fetch.start_time[url] - # print('{0:5.2f} {1:30}{2:5.2} '.format(now, url, elapsed)) return article async def fetch_all(dois, max_rate=MIN_DELAY, limit_per_host=3.0): - """Launch requests for all web pages.""" + """Launch requests for each doi. + + This first gets all of the dois passed in as dois. + + Then it checks for the existence of dois that are corrected articles that + should also be downloaded. + """ + tasks = [] - fetch.start_time = dict() # dictionary of start times for each url conn = aiohttp.TCPConnector(limit_per_host=limit_per_host) async with aiohttp.ClientSession(connector=conn) as session: for doi in dois: @@ -61,10 +69,17 @@ async def fetch_all(dois, max_rate=MIN_DELAY, limit_per_host=3.0): second_batch = await asyncio.gather(*tasks) # gather task responses - # -------------- TOTAL SECONDS: 178.59 def sequential_fetch(doi): - "Fetch individual web pages as part of a sequence" + """ + Fetch urls on the basis of the doi being passed in as part of a sequential + process. + + Returns the article created by transforming the content of the response. + + NB: This needs to do better error handling if the url fails or points to an + invalid xml file. + """ url = URL_TMP.format(doi) response = requests.get(url) time.sleep(MIN_DELAY) @@ -74,31 +89,36 @@ def sequential_fetch(doi): return article def demo_sequential(dois): - """Fetch list of web pages sequentially.""" + """Organises the process of downloading articles associated with dois + to SYNC_DIRECTORY sequentially. + + Side-effect: prints a timer to indicate how long it took. + """ recreate_dir(SYNC_DIRECTORY) start_time = default_timer() for doi in dois: start_time_url = default_timer() article = sequential_fetch(doi) - now = default_timer() - elapsed = now - start_time_url if article.type_ == "correction": new_article = sequential_fetch(article.related_doi) - - # print('{0:5.2f} {1:30}{2:5.2f} '.format(now, url, elapsed)) - + tot_elapsed = default_timer() - start_time print(' TOTAL SECONDS: '.rjust(30, '-') + '{0:5.2f} '. \ format(tot_elapsed, '\n')) def demo_async(dois): + """Organises the process of downloading articles associated with the doi to + ASYNC_DIRECTORY asynchronous functionality. + + Side-effect: prints a timer to indicate how long it took. + """ recreate_dir(ASYNC_DIRECTORY) start_time = default_timer() loop = asyncio.get_event_loop() # event loop future = asyncio.ensure_future(fetch_all(dois)) # tasks to do loop.run_until_complete(future) # loop until done - loop.run_until_complete(asyncio.sleep(0)) + loop.run_until_complete(asyncio.sleep(0)) loop.close() tot_elapsed = default_timer() - start_time print(' TOTAL SECONDS: '.rjust(30, '-') + '{0:5.2f} '. \ @@ -112,6 +132,8 @@ def recreate_dir(directory): os.makedirs(directory, exist_ok=True) def main(): + """Main loop for running and comparing the different appraoches. + """ dois = get_all_local_dois(corpusdir)[MIN_FILES:MIN_FILES+NUM_FILES] demo_sequential(dois)