Skip to content

Commit 4e06905

Browse files
committed
Make minimal changes
1 parent a5e69f8 commit 4e06905

File tree

1 file changed

+87
-65
lines changed

1 file changed

+87
-65
lines changed

pybliometrics/utils/get_content.py

Lines changed: 87 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
from typing import Literal, Optional, Type
2-
from random import shuffle
32
from requests import Session
43
from requests.adapters import HTTPAdapter
54
from requests.exceptions import JSONDecodeError
6-
from time import sleep, time
75
from urllib3.util import Retry
86

97
from pybliometrics import __version__
@@ -33,12 +31,66 @@ def get_session() -> Type[Session]:
3331
return session
3432

3533

36-
def prepare_headers_and_tokens(params):
37-
"""Prepare headers and tokens for the request."""
34+
def get_content(url: str,
35+
api: str,
36+
params: Optional[dict],
37+
method: Literal['GET', 'PUT'] = 'GET',
38+
**kwds):
39+
"""Helper function to download a file and return its content.
40+
41+
Parameters
42+
----------
43+
url : str
44+
The URL to be parsed.
45+
46+
api : str
47+
The Scopus API to be accessed.
48+
49+
params : dict (optional)
50+
Dictionary containing query parameters. For required keys
51+
and accepted values see e.g.
52+
https://api.elsevier.com/documentation/AuthorRetrievalAPI.wadl
53+
54+
**kwds : key-value parings, optional
55+
Keywords passed on to as query parameters. Must contain fields
56+
and values specified in the respective API specification.
57+
58+
Raises
59+
------
60+
ScopusHtmlError or HTTPError
61+
If the status of the response is not ok.
62+
63+
ValueError
64+
If the accept parameter is not one of the accepted values.
65+
66+
Returns
67+
-------
68+
resp : byte-like object
69+
The content of the file, which needs to be serialized.
70+
"""
71+
from random import shuffle
72+
from time import sleep, time
73+
74+
# Get needed ressources for query
75+
config = get_config()
76+
3877
keys = get_keys()
39-
insttokens = list(zip(keys, get_insttokens()))
78+
79+
# Get tokens and zip with keys
80+
insttokens = get_insttokens()
81+
insttokens = list(zip(keys, insttokens))
82+
83+
# Keep keys that are not insttokens
4084
keys = keys[len(insttokens):]
4185

86+
session = get_session()
87+
88+
params = params or {}
89+
params.update(**kwds)
90+
proxies = dict(config._sections.get("Proxy", {}))
91+
timeout = config.getint("Requests", "Timeout", fallback=20)
92+
93+
# Get keys/tokens and create header
4294
token_key, insttoken = None, None
4395
if "insttoken" in params:
4496
token_key = params.pop("apikey")
@@ -50,66 +102,20 @@ def prepare_headers_and_tokens(params):
50102
else:
51103
key = keys.pop(0)
52104

53-
header = {
54-
'Accept': 'application/json',
55-
'User-Agent': user_agent,
56-
'X-ELS-APIKey': token_key or key
57-
}
105+
header = {'Accept': 'application/json',
106+
'User-Agent': user_agent,
107+
'X-ELS-APIKey': token_key or key}
58108

59-
if insttoken:
60-
header['X-ELS-Insttoken'] = insttoken
61-
62-
return header, insttokens, keys
63-
64-
65-
def handle_throttling(api):
66-
"""Handle throttling based on API limits."""
109+
# Eventually wait bc of throttling
67110
if len(_throttling_params[api]) == _throttling_params[api].maxlen:
68111
try:
69112
sleep(1 - (time() - _throttling_params[api][0]))
70113
except (IndexError, ValueError):
71114
pass
72115

73-
74-
def handle_response(resp):
75-
"""Handle the response and raise appropriate errors."""
76-
try:
77-
error_type = errors[resp.status_code]
78-
try:
79-
reason = resp.json()['service-error']['status']['statusText']
80-
except KeyError:
81-
try:
82-
reason = resp.json()['message']
83-
except KeyError:
84-
try:
85-
reason = resp.json()['error-response']['error-message']
86-
except KeyError:
87-
reason = ""
88-
raise error_type(reason)
89-
except (JSONDecodeError, KeyError):
90-
resp.raise_for_status()
91-
92-
93-
def get_content(url: str,
94-
api: str,
95-
params: Optional[dict],
96-
method: Literal['GET', 'PUT'] = 'GET',
97-
**kwds):
98-
"""Helper function to download a file and return its content."""
99-
config = get_config()
100-
101-
session = get_session()
102-
103-
params = params or {}
104-
params.update(**kwds)
105-
proxies = dict(config._sections.get("Proxy", {}))
106-
timeout = config.getint("Requests", "Timeout", fallback=20)
107-
108-
header, insttokens, keys = prepare_headers_and_tokens(params)
109-
handle_throttling(api)
110-
111116
# Use insttoken if available
112-
if 'X-ELS-Insttoken' in header:
117+
if insttoken:
118+
header['X-ELS-Insttoken'] = insttoken
113119
if method == 'GET':
114120
resp = session.get(url, headers=header, params=params, timeout=timeout)
115121
else:
@@ -120,9 +126,8 @@ def get_content(url: str,
120126
else:
121127
resp = session.put(url, headers=header, json=params, timeout=timeout, proxies=proxies)
122128

123-
124-
# Retry logic for 429 or 401
125-
while resp.status_code in (429, 401):
129+
# If 429 try other tokens
130+
while (resp.status_code == 429) or (resp.status_code == 401):
126131
try:
127132
token_key, token = insttokens.pop(0) # Get and remove current key
128133
header['X-ELS-APIKey'] = token_key
@@ -135,7 +140,12 @@ def get_content(url: str,
135140
except IndexError: # All tokens depleted
136141
break
137142

138-
while resp.status_code in (429, 401):
143+
# Remove Insttoken from header (if present)
144+
if 'X-ELS-Insttoken' in header:
145+
del header['X-ELS-Insttoken']
146+
147+
# If 429 try other keys
148+
while (resp.status_code == 429) or (resp.status_code == 401):
139149
try:
140150
key = keys.pop(0) # Remove current key
141151
header['X-ELS-APIKey'] = key
@@ -147,12 +157,24 @@ def get_content(url: str,
147157
except IndexError: # All keys depleted
148158
break
149159

150-
if 'X-ELS-Insttoken' in header:
151-
del header['X-ELS-Insttoken']
152-
153160
_throttling_params[api].append(time())
154161

155-
handle_response(resp)
162+
# Eventually raise error, if possible with supplied error message
163+
try:
164+
error_type = errors[resp.status_code]
165+
try:
166+
reason = resp.json()['service-error']['status']['statusText']
167+
except KeyError:
168+
try:
169+
reason = resp.json()['message']
170+
except KeyError:
171+
try:
172+
reason = resp.json()['error-response']['error-message']
173+
except KeyError:
174+
reason = ""
175+
raise error_type(reason)
176+
except (JSONDecodeError, KeyError):
177+
resp.raise_for_status()
156178
return resp
157179

158180

0 commit comments

Comments
 (0)