- 
                Notifications
    You must be signed in to change notification settings 
- Fork 653
How to Extract Fonts from a PDF
        dothinking edited this page Apr 16, 2021 
        ·
        4 revisions
      
    This script can be used to extract all fonts referenced by some page of a PDF.
from __future__ import print_function
import fitz
# Open the PDF
doc = fitz.open("some.pdf")
xref_visited = [] # memorize already processed font xrefs here
num = 0 # count the extracted fonts
for page in doc:
    fl = page.getFontList()                                # list of fonts of page
    for f in fl:
        xref = f[0]                                        # xref of font
        if xref in xref_visited:
            continue                                       # skip if already processed
        xref_visited.append(xref)                          # do not process a second time
        # extract font buffer
        basename, ext, _, buffer = doc.extractFont(xref)
        if ext != "n/a":                                   # is the font extractable?
            num += 1
            foutname = "%s-%i.%s" % (basename, xref, ext)  # build the filename
            fout = open(foutname, "wb")                    # and output the font
            fout.write(buffer)
            fout.close()
            print("extracted", foutname)
footer = "extracted %i font files from %s." % (num, doc.name)
footer_line = "-".ljust(len(footer), "-")
# output some protocol
print(footer_line)
print(footer)
print(footer_line)
# Close the PDF
doc.close()HOWTO Button annots with JavaScript
HOWTO work with PDF embedded files
HOWTO extract text from inside rectangles
HOWTO extract text in natural reading order
HOWTO create or extract graphics
HOWTO create your own PDF Drawing
Rectangle inclusion & intersection
Metadata & bookmark maintenance