11import requests
22import aiohttp
3- from urllib .parse import urljoin
4-
3+ from urllib .parse import urljoin , unquote , urlsplit
4+ import re
5+
56from .configs import *
67from .classes .requests import Get , Put , Post , Patch , Delete , Download , All
78from .db .response_handler import ResponseHandler
@@ -132,9 +133,10 @@ def _request_handler(self, api_url: str, method: str, params: dict = None, paylo
132133 def make_request ():
133134 try :
134135 if method == "DOWNLOAD" :
135- response_obj = self ._download_content (api_url , params )
136+
137+ output_path , response_obj = self ._download_content (api_url , params )
136138 self .rate_limiter .update_rate_limits (endpoint , response_obj .headers )
137- return response_obj
139+ return output_path , response_obj
138140
139141 if return_all is False :
140142 # Single request
@@ -174,17 +176,108 @@ def make_request():
174176
175177 return make_request ()
176178
179+ # def _download_content(self, url: str, params: dict = None):
180+ # """
181+ # Makes the actual HTTP request based on the method.
182+ # """
183+ # print("Downloading COntent")
184+ # headers = {
185+ # "Authorization": f"Bearer {self.access_token}",
186+ # "Content-Type": "application/pdf",
187+ # }
188+ # def get_base_url(url: str) -> str:
189+ # base = url.rstrip('/').rsplit('/', 1)[0]
190+ # return base + '.json'
191+
192+ # base_params = {'fields':'name'}
193+ # api_base_url = get_base_url(url)
194+ # print(api_base_url)
195+ # response = requests.get(api_base_url, headers=headers, params=base_params)
196+ # print(response.json())
197+
198+ # try:
199+ # response = requests.get(url, headers=headers, params=params)
200+
201+ # if response.status_code == 429:
202+ # retry_after = int(response.headers.get("Retry-After", 1))
203+ # raise requests.exceptions.RequestException(
204+ # f"Rate limited. Retry after {retry_after} seconds."
205+ # )
206+
207+ # if response.status_code != 200:
208+ # raise requests.exceptions.RequestException(f"HTTP {response.status_code}: {response.text}")
209+
210+ # return response
211+
212+ # except requests.exceptions.RequestException as e:
213+ # raise RuntimeError(f"HTTP request failed: {e}") from e
214+
177215 def _download_content (self , url : str , params : dict = None ):
178216 """
179- Makes the actual HTTP request based on the method.
217+ Downloads the file from Clio and saves it to the current working directory.
218+ If no filename is found, fallback to using the document ID with the correct extension.
180219 """
181220
182- headers = {
183- "Authorization" : f"Bearer { self .access_token } " ,
184- "Content-Type" : "application/pdf" ,
221+ # ------------ Internal Helper Functions ------------
222+ def _safe_filename (name : str ) -> str :
223+ name = name .strip ().replace ("/" , "-" ).replace ("\\ " , "-" )
224+ return re .sub (r'[\x00-\x1f<>:"|?*]+' , "-" , name )[:255 ] or "downloaded_file"
225+
226+ def _filename_from_content_disposition (cd : str | None ) -> str | None :
227+ if not cd :
228+ return None
229+ match = re .search (r'filename\*?=(?:UTF-8\'\')?"?([^";]+)"?' , cd )
230+ return _safe_filename (unquote (match .group (1 ))) if match else None
231+
232+ def get_base_url (url : str ) -> str :
233+ base = url .rstrip ('/' ).rsplit ('/' , 1 )[0 ]
234+ return base + '.json'
235+
236+ def get_doc_id_from_url (url : str ) -> str :
237+ parts = urlsplit (url ).path .strip ("/" ).split ("/" )
238+ for i , part in enumerate (parts ):
239+ if part == "documents" and i + 1 < len (parts ):
240+ return parts [i + 1 ]
241+ return "unknown_id"
242+
243+ def guess_extension (content_type : str ) -> str :
244+ mime_map = {
245+ "application/pdf" : ".pdf" ,
246+ "application/msword" : ".doc" ,
247+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document" : ".docx" ,
248+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" : ".xlsx" ,
249+ "application/zip" : ".zip" ,
250+ "image/jpeg" : ".jpg" ,
251+ "image/png" : ".png" ,
185252 }
253+ return mime_map .get (content_type .split (";" )[0 ].strip (), "" )
254+
255+ # ------------ Step 1: Fetch Metadata (.json) ------------
256+ meta_url = get_base_url (url )
257+ print (f"Meta URL: { meta_url } " )
258+
259+ meta_headers = {
260+ "Authorization" : f"Bearer { self .access_token } " ,
261+ "Accept" : "application/json"
262+ }
263+
186264 try :
187- response = requests .get (url , headers = headers , params = params )
265+ response = requests .get (meta_url , headers = meta_headers , params = {'fields' : 'name' })
266+ response .raise_for_status ()
267+ meta = response .json ()
268+
269+ filename_from_json = _safe_filename (meta .get ("data" , {}).get ("name" , "" ))
270+ except Exception :
271+ print ("Failed to fetch metadata, continuing without it." )
272+ filename_from_json = ""
273+
274+ download_headers = {
275+ "Authorization" : f"Bearer { self .access_token } " ,
276+ "Accept" : "*/*"
277+ }
278+
279+ try :
280+ response = requests .get (url , headers = download_headers , params = params , stream = True , allow_redirects = True )
188281
189282 if response .status_code == 429 :
190283 retry_after = int (response .headers .get ("Retry-After" , 1 ))
@@ -195,11 +288,23 @@ def _download_content(self, url: str, params: dict = None):
195288 if response .status_code != 200 :
196289 raise requests .exceptions .RequestException (f"HTTP { response .status_code } : { response .text } " )
197290
198- return response
199-
291+ content_disposition = response .headers .get ("Content-Disposition" )
292+ content_type = response .headers .get ("Content-Type" , "" )
293+ doc_id = get_doc_id_from_url (url )
294+ fallback_ext = guess_extension (content_type )
295+ filename = (
296+ _filename_from_content_disposition (content_disposition )
297+ or filename_from_json
298+ or f"{ doc_id } { fallback_ext } "
299+ )
300+
301+ output_path = self ._save_response_to_file (response , filename )
302+
303+ return output_path , response
304+
200305 except requests .exceptions .RequestException as e :
201306 raise RuntimeError (f"HTTP request failed: { e } " ) from e
202-
307+
203308 #Asyncronous Requests
204309 async def _make_async_request (self , url : str , method : str , params : dict = None , payload : dict = None ):
205310 """
@@ -323,6 +428,38 @@ async def _download_content_async(self, url: str, params: dict = None):
323428 except aiohttp .ClientError as e :
324429 raise RuntimeError (f"HTTP request failed: { e } " ) from e
325430
431+
432+ def _save_response_to_file (self , response : requests .Response , filename : str , subdir : str = "downloads" ) -> Path :
433+ """
434+ Saves the streamed response to a file.
435+ If file already exists, appends (1), (2), etc.
436+ Returns the final Path used.
437+ """
438+ output_dir = Path .cwd () / subdir
439+ output_dir .mkdir (parents = True , exist_ok = True )
440+
441+ base = output_dir / filename
442+ final_path = base
443+
444+ # Split filename into name and extension
445+ stem = final_path .stem
446+ suffix = final_path .suffix
447+ counter = 1
448+
449+ # Generate a non-colliding filename
450+ while final_path .exists ():
451+ final_path = output_dir / f"{ stem } ({ counter } ){ suffix } "
452+ counter += 1
453+
454+ # Save content
455+ with open (final_path , "wb" ) as f :
456+ for chunk in response .iter_content (chunk_size = 8192 ):
457+ if chunk :
458+ f .write (chunk )
459+
460+ print (f"✅ File saved to: { final_path } " )
461+ return final_path
462+
326463 def set_bearer_token (self , new_token : str ):
327464 self .access_token = new_token
328465
0 commit comments