99from ingestors .support .cache import CacheSupport
1010from ingestors .support .temp import TempFileSupport
1111from ingestors .exc import ProcessingException
12+ from ingestors import settings
1213
1314log = logging .getLogger (__name__ )
1415
15- TIMEOUT = 3600 # seconds
16- CONVERT_RETRIES = 5
1716
1817PDF_CACHE_ACCESSED = Counter (
1918 "ingestfile_pdf_cache_accessed" ,
@@ -45,7 +44,9 @@ def document_to_pdf(self, unique_tmpdir, file_path, entity):
4544 self .tags .set (key , content_hash )
4645 return pdf_file
4746
48- def _document_to_pdf (self , unique_tmpdir , file_path , entity , timeout = TIMEOUT ):
47+ def _document_to_pdf (
48+ self , unique_tmpdir , file_path , entity , timeout = settings .CONVERT_TIMEOUT
49+ ):
4950 """Converts an office document to PDF."""
5051 file_name = entity_filename (entity )
5152 log .info ("Converting [%s] to PDF" , entity )
@@ -72,30 +73,20 @@ def _document_to_pdf(self, unique_tmpdir, file_path, entity, timeout=TIMEOUT):
7273 file_path ,
7374 ]
7475 try :
75- for attempt in range (1 , CONVERT_RETRIES ):
76- log .info (
77- f"Starting LibreOffice: %s with timeout %s attempt #{ attempt } /{ CONVERT_RETRIES } " ,
78- cmd ,
79- timeout ,
80- )
81- try :
82- subprocess .run (cmd , timeout = timeout , check = True )
83- except Exception as e :
84- log .info (
85- f"Could not be converted to PDF (attempt { attempt } /{ CONVERT_RETRIES } ): { e } "
86- )
87- continue
76+ log .info (f"Starting LibreOffice: { cmd } with timeout { timeout } " )
77+ try :
78+ subprocess .run (cmd , timeout = timeout , check = True )
79+ except Exception as e :
80+ raise ProcessingException ("Could not be converted to PDF" ) from e
8881
89- for file_name in os .listdir (pdf_output_dir ):
90- if not file_name .endswith (".pdf" ):
91- continue
92- out_file = os .path .join (pdf_output_dir , file_name )
93- if os .stat (out_file ).st_size == 0 :
94- continue
95- log .info (f"Successfully converted { out_file } " )
96- return out_file
97- raise ProcessingException (
98- f"Could not be converted to PDF (attempt #{ attempt } /{ CONVERT_RETRIES } )"
99- )
82+ for file_name in os .listdir (pdf_output_dir ):
83+ if not file_name .endswith (".pdf" ):
84+ continue
85+ out_file = os .path .join (pdf_output_dir , file_name )
86+ if os .stat (out_file ).st_size == 0 :
87+ continue
88+ log .info (f"Successfully converted { out_file } " )
89+ return out_file
90+ raise ProcessingException ("Could not be converted to PDF" )
10091 except Exception as e :
10192 raise ProcessingException ("Could not be converted to PDF" ) from e
0 commit comments