1
1
from typing import Literal , Optional , Type
2
- from random import shuffle
3
2
from requests import Session
4
3
from requests .adapters import HTTPAdapter
5
4
from requests .exceptions import JSONDecodeError
6
- from time import sleep , time
7
5
from urllib3 .util import Retry
8
6
9
7
from pybliometrics import __version__
@@ -33,12 +31,66 @@ def get_session() -> Type[Session]:
33
31
return session
34
32
35
33
36
- def prepare_headers_and_tokens (params ):
37
- """Prepare headers and tokens for the request."""
34
+ def get_content (url : str ,
35
+ api : str ,
36
+ params : Optional [dict ],
37
+ method : Literal ['GET' , 'PUT' ] = 'GET' ,
38
+ ** kwds ):
39
+ """Helper function to download a file and return its content.
40
+
41
+ Parameters
42
+ ----------
43
+ url : str
44
+ The URL to be parsed.
45
+
46
+ api : str
47
+ The Scopus API to be accessed.
48
+
49
+ params : dict (optional)
50
+ Dictionary containing query parameters. For required keys
51
+ and accepted values see e.g.
52
+ https://api.elsevier.com/documentation/AuthorRetrievalAPI.wadl
53
+
54
+ **kwds : key-value parings, optional
55
+ Keywords passed on to as query parameters. Must contain fields
56
+ and values specified in the respective API specification.
57
+
58
+ Raises
59
+ ------
60
+ ScopusHtmlError or HTTPError
61
+ If the status of the response is not ok.
62
+
63
+ ValueError
64
+ If the accept parameter is not one of the accepted values.
65
+
66
+ Returns
67
+ -------
68
+ resp : byte-like object
69
+ The content of the file, which needs to be serialized.
70
+ """
71
+ from random import shuffle
72
+ from time import sleep , time
73
+
74
+ # Get needed ressources for query
75
+ config = get_config ()
76
+
38
77
keys = get_keys ()
39
- insttokens = list (zip (keys , get_insttokens ()))
78
+
79
+ # Get tokens and zip with keys
80
+ insttokens = get_insttokens ()
81
+ insttokens = list (zip (keys , insttokens ))
82
+
83
+ # Keep keys that are not insttokens
40
84
keys = keys [len (insttokens ):]
41
85
86
+ session = get_session ()
87
+
88
+ params = params or {}
89
+ params .update (** kwds )
90
+ proxies = dict (config ._sections .get ("Proxy" , {}))
91
+ timeout = config .getint ("Requests" , "Timeout" , fallback = 20 )
92
+
93
+ # Get keys/tokens and create header
42
94
token_key , insttoken = None , None
43
95
if "insttoken" in params :
44
96
token_key = params .pop ("apikey" )
@@ -50,66 +102,20 @@ def prepare_headers_and_tokens(params):
50
102
else :
51
103
key = keys .pop (0 )
52
104
53
- header = {
54
- 'Accept' : 'application/json' ,
55
- 'User-Agent' : user_agent ,
56
- 'X-ELS-APIKey' : token_key or key
57
- }
105
+ header = {'Accept' : 'application/json' ,
106
+ 'User-Agent' : user_agent ,
107
+ 'X-ELS-APIKey' : token_key or key }
58
108
59
- if insttoken :
60
- header ['X-ELS-Insttoken' ] = insttoken
61
-
62
- return header , insttokens , keys
63
-
64
-
65
- def handle_throttling (api ):
66
- """Handle throttling based on API limits."""
109
+ # Eventually wait bc of throttling
67
110
if len (_throttling_params [api ]) == _throttling_params [api ].maxlen :
68
111
try :
69
112
sleep (1 - (time () - _throttling_params [api ][0 ]))
70
113
except (IndexError , ValueError ):
71
114
pass
72
115
73
-
74
- def handle_response (resp ):
75
- """Handle the response and raise appropriate errors."""
76
- try :
77
- error_type = errors [resp .status_code ]
78
- try :
79
- reason = resp .json ()['service-error' ]['status' ]['statusText' ]
80
- except KeyError :
81
- try :
82
- reason = resp .json ()['message' ]
83
- except KeyError :
84
- try :
85
- reason = resp .json ()['error-response' ]['error-message' ]
86
- except KeyError :
87
- reason = ""
88
- raise error_type (reason )
89
- except (JSONDecodeError , KeyError ):
90
- resp .raise_for_status ()
91
-
92
-
93
- def get_content (url : str ,
94
- api : str ,
95
- params : Optional [dict ],
96
- method : Literal ['GET' , 'PUT' ] = 'GET' ,
97
- ** kwds ):
98
- """Helper function to download a file and return its content."""
99
- config = get_config ()
100
-
101
- session = get_session ()
102
-
103
- params = params or {}
104
- params .update (** kwds )
105
- proxies = dict (config ._sections .get ("Proxy" , {}))
106
- timeout = config .getint ("Requests" , "Timeout" , fallback = 20 )
107
-
108
- header , insttokens , keys = prepare_headers_and_tokens (params )
109
- handle_throttling (api )
110
-
111
116
# Use insttoken if available
112
- if 'X-ELS-Insttoken' in header :
117
+ if insttoken :
118
+ header ['X-ELS-Insttoken' ] = insttoken
113
119
if method == 'GET' :
114
120
resp = session .get (url , headers = header , params = params , timeout = timeout )
115
121
else :
@@ -120,9 +126,8 @@ def get_content(url: str,
120
126
else :
121
127
resp = session .put (url , headers = header , json = params , timeout = timeout , proxies = proxies )
122
128
123
-
124
- # Retry logic for 429 or 401
125
- while resp .status_code in (429 , 401 ):
129
+ # If 429 try other tokens
130
+ while (resp .status_code == 429 ) or (resp .status_code == 401 ):
126
131
try :
127
132
token_key , token = insttokens .pop (0 ) # Get and remove current key
128
133
header ['X-ELS-APIKey' ] = token_key
@@ -135,7 +140,12 @@ def get_content(url: str,
135
140
except IndexError : # All tokens depleted
136
141
break
137
142
138
- while resp .status_code in (429 , 401 ):
143
+ # Remove Insttoken from header (if present)
144
+ if 'X-ELS-Insttoken' in header :
145
+ del header ['X-ELS-Insttoken' ]
146
+
147
+ # If 429 try other keys
148
+ while (resp .status_code == 429 ) or (resp .status_code == 401 ):
139
149
try :
140
150
key = keys .pop (0 ) # Remove current key
141
151
header ['X-ELS-APIKey' ] = key
@@ -147,12 +157,24 @@ def get_content(url: str,
147
157
except IndexError : # All keys depleted
148
158
break
149
159
150
- if 'X-ELS-Insttoken' in header :
151
- del header ['X-ELS-Insttoken' ]
152
-
153
160
_throttling_params [api ].append (time ())
154
161
155
- handle_response (resp )
162
+ # Eventually raise error, if possible with supplied error message
163
+ try :
164
+ error_type = errors [resp .status_code ]
165
+ try :
166
+ reason = resp .json ()['service-error' ]['status' ]['statusText' ]
167
+ except KeyError :
168
+ try :
169
+ reason = resp .json ()['message' ]
170
+ except KeyError :
171
+ try :
172
+ reason = resp .json ()['error-response' ]['error-message' ]
173
+ except KeyError :
174
+ reason = ""
175
+ raise error_type (reason )
176
+ except (JSONDecodeError , KeyError ):
177
+ resp .raise_for_status ()
156
178
return resp
157
179
158
180
0 commit comments