Skip to content

Commit f3203cf

Browse files
committed
Improve pypdfium2 integration
1 parent 4127933 commit f3203cf

File tree

3 files changed

+3
-13
lines changed

3 files changed

+3
-13
lines changed

paddlex/inference/models/formula_recognition/result.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,7 @@ def pdf2img(pdf_path: str, img_path: str, is_padding: bool = False):
283283
for page in pdfDoc:
284284
rotate = int(0)
285285
zoom = 2
286-
img = page.render(scale=zoom, rotation=rotate).to_pil()
287-
img = img.convert("RGB")
288-
img = np.array(img)
289-
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
286+
img = page.render(scale=zoom, rotation=rotate).to_numpy()
290287
xywh = crop_white_area(img)
291288

292289
if xywh is not None:

paddlex/inference/serving/infra/utils.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -195,10 +195,7 @@ def read_pdf(
195195
# TODO: Do not always use zoom=2.0
196196
zoom = 2.0
197197
deg = 0
198-
image = page.render(scale=zoom, rotation=deg).to_pil()
199-
image = image.convert("RGB")
200-
image = np.array(image)
201-
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
198+
image = page.render(scale=zoom, rotation=deg).to_numpy()
202199
images.append(image)
203200
page_info = PDFPageInfo(
204201
width=image.shape[1],

paddlex/inference/utils/io/readers.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -293,11 +293,7 @@ def read_file(self, in_path):
293293
doc = pdfium.PdfDocument(in_path)
294294
try:
295295
for page in doc:
296-
image = page.render(scale=self._scale, rotation=self._rotation).to_pil()
297-
image = image.convert("RGB")
298-
img_cv = np.array(image)
299-
img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
300-
yield img_cv
296+
yield page.render(scale=self._scale, rotation=self._rotation).to_numpy()
301297
finally:
302298
doc.close()
303299

0 commit comments

Comments
 (0)