Skip to content

Commit 7d068f8

Browse files
committed
Remove the CONVERT_RETRIES setting
The custom logic for retrying document conversions is no longer necessary. We should be able to rely just on worker retries.
1 parent f5cf4dd commit 7d068f8

File tree

2 files changed

+14
-23
lines changed

2 files changed

+14
-23
lines changed

ingestors/settings.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
TESTING = False
66

77
CONVERT_TIMEOUT = env.to_int("INGESTORS_CONVERT_TIMEOUT", 300) # seconds
8-
CONVERT_RETRIES = env.to_int("INGESTORS_CONVERT_RETRIES", 3)
98

109
# Enable (expensive!) Google Cloud API
1110
OCR_VISION_API = env.to_bool("INGESTORS_OCR_VISION_API", False)

ingestors/support/convert.py

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -73,28 +73,20 @@ def _document_to_pdf(
7373
file_path,
7474
]
7575
try:
76-
for attempt in range(1, settings.CONVERT_RETRIES):
77-
log.info(
78-
f"Starting LibreOffice: {cmd} with timeout {timeout} attempt #{attempt}/{settings.CONVERT_RETRIES}",
79-
)
80-
try:
81-
subprocess.run(cmd, timeout=timeout, check=True)
82-
except Exception as e:
83-
log.info(
84-
f"Could not be converted to PDF (attempt {attempt}/{settings.CONVERT_RETRIES}): {e}"
85-
)
86-
continue
76+
log.info(f"Starting LibreOffice: {cmd} with timeout {timeout}")
77+
try:
78+
subprocess.run(cmd, timeout=timeout, check=True)
79+
except Exception as e:
80+
raise ProcessingException("Could not be converted to PDF") from e
8781

88-
for file_name in os.listdir(pdf_output_dir):
89-
if not file_name.endswith(".pdf"):
90-
continue
91-
out_file = os.path.join(pdf_output_dir, file_name)
92-
if os.stat(out_file).st_size == 0:
93-
continue
94-
log.info(f"Successfully converted {out_file}")
95-
return out_file
96-
raise ProcessingException(
97-
f"Could not be converted to PDF (attempt #{attempt}/{settings.CONVERT_RETRIES})"
98-
)
82+
for file_name in os.listdir(pdf_output_dir):
83+
if not file_name.endswith(".pdf"):
84+
continue
85+
out_file = os.path.join(pdf_output_dir, file_name)
86+
if os.stat(out_file).st_size == 0:
87+
continue
88+
log.info(f"Successfully converted {out_file}")
89+
return out_file
90+
raise ProcessingException("Could not be converted to PDF")
9991
except Exception as e:
10092
raise ProcessingException("Could not be converted to PDF") from e

0 commit comments

Comments
 (0)