|
| 1 | +[project] |
| 2 | +name = "ingestors" |
| 3 | +version = "3.22.0" |
| 4 | +description = "Ingestors extract the contents of mixed unstructured documents into structured (followthemoney) data. " |
| 5 | +authors = [ |
| 6 | + { name = "Friedrich Lindenberg", email = "friedrich@pudo.org" }, |
| 7 | + { name = "OCCRP Data Team", email = "data@occrp.org" }, |
| 8 | + { name = "ID.IO", email = "hi@investigativedata.org" }, |
| 9 | +] |
| 10 | +readme = "README.md" |
| 11 | +license = "AGPL-3.0" |
| 12 | +requires-python = ">=3.11,<4.0" |
| 13 | +dependencies = [ |
| 14 | + "banal (==1.0.6)", |
| 15 | + "normality (==2.5.0)", |
| 16 | + "pantomime (==0.6.1)", |
| 17 | + "followthemoney (==3.5.9)", |
| 18 | + "followthemoney-store[postgresql] (>=3.1.0,<3.2.0)", |
| 19 | + "servicelayer @ git+https://github.com/investigativedata/servicelayer.git@main", |
| 20 | + "languagecodes (==1.1.1)", |
| 21 | + "countrytagger (==0.1.2)", |
| 22 | + "pyicu (==2.12)", |
| 23 | + "google-cloud-vision (==3.7.2)", |
| 24 | + "tesserocr (==2.7.1)", |
| 25 | + "spacy (==3.6.1)", |
| 26 | + "numpy (<2.0)", |
| 27 | + "fingerprints (==1.2.3)", |
| 28 | + "fasttext (==0.9.2)", |
| 29 | + "pika (==1.3.2)", |
| 30 | + "nomenklatura (==3.15.2)", |
| 31 | + "dbf (==0.99.9)", |
| 32 | + "pymediainfo (==6.1.0)", |
| 33 | + "python-magic (==0.4.27)", |
| 34 | + "rarfile (==4.2)", |
| 35 | + "xlrd (==2.0.1)", |
| 36 | + "openpyxl (==3.1.2)", |
| 37 | + "odfpy (==1.4.1)", |
| 38 | + "faust-cchardet (==2.1.19)", |
| 39 | + "lxml (==5.0.0)", |
| 40 | + "olefile (==0.47)", |
| 41 | + "Pillow (==10.1.0)", |
| 42 | + "vobject (==0.9.6.1)", |
| 43 | + "msglite (==0.30.0)", |
| 44 | + "icalendar (==5.0.12)", |
| 45 | + "cryptography (==41.0.7)", |
| 46 | + "requests[security] (==2.31.0)", |
| 47 | + "pymupdf (==1.21.1)", |
| 48 | + "prometheus-client (==0.17.1)", |
| 49 | + "sentry_sdk (==2.0.1)", |
| 50 | + # servicelayer extras requirements |
| 51 | + "boto3 (>=1.11.9,<2.0.0)", |
| 52 | + "grpcio (>=1.32.0,<2.0.0)", |
| 53 | + "google-cloud-storage (>=1.31.0,<3.0.0)" |
| 54 | +] |
| 55 | + |
| 56 | +[project.scripts] |
| 57 | +ingestors = "ingestors.cli:cli" |
| 58 | + |
| 59 | +[project.gui-scripts] |
| 60 | +ingestors = "ingestors.cli:cli" |
| 61 | + |
| 62 | +[project.entry-points."ingestors"] |
| 63 | +ignore = "ingestors.ignore:IgnoreIngestor" |
| 64 | +html = "ingestors.documents.html:HTMLIngestor" |
| 65 | +xml = "ingestors.documents.xml:XMLIngestor" |
| 66 | +plain = "ingestors.documents.plain:PlainTextIngestor" |
| 67 | +office = "ingestors.documents.office:DocumentIngestor" |
| 68 | +opendoc = "ingestors.documents.opendoc:OpenDocumentIngestor" |
| 69 | +ooxml = "ingestors.documents.ooxml:OfficeOpenXMLIngestor" |
| 70 | +djvu = "ingestors.documents.djvu:DjVuIngestor" |
| 71 | +pdf = "ingestors.documents.pdf:PDFIngestor" |
| 72 | +rar = "ingestors.packages.rar:RARIngestor" |
| 73 | +zip = "ingestors.packages.zip:ZipIngestor" |
| 74 | +tar = "ingestors.packages.tar:TarIngestor" |
| 75 | +7z = "ingestors.packages:SevenZipIngestor" |
| 76 | +gz = "ingestors.packages:GzipIngestor" |
| 77 | +bz2 = "ingestors.packages:BZ2Ingestor" |
| 78 | +pst = "ingestors.email.outlookpst:OutlookPSTIngestor" |
| 79 | +olm = "ingestors.email.olm:OutlookOLMArchiveIngestor" |
| 80 | +opfmsg = "ingestors.email.olm:OutlookOLMMessageIngestor" |
| 81 | +olemsg = "ingestors.email.outlookmsg:OutlookMsgIngestor" |
| 82 | +msg = "ingestors.email.msg:RFC822Ingestor" |
| 83 | +emlx = "ingestors.email.emlx:AppleEmlxIngestor" |
| 84 | +vcard = "ingestors.email.vcard:VCardIngestor" |
| 85 | +calendar = "ingestors.email.calendar:CalendarIngestor" |
| 86 | +csv = "ingestors.tabular.csv:CSVIngestor" |
| 87 | +access = "ingestors.tabular.access:AccessIngestor" |
| 88 | +sqlite = "ingestors.tabular.sqlite:SQLiteIngestor" |
| 89 | +xls = "ingestors.tabular.xls:ExcelIngestor" |
| 90 | +xlsx = "ingestors.tabular.xlsx:ExcelXMLIngestor" |
| 91 | +ods = "ingestors.tabular.ods:OpenOfficeSpreadsheetIngestor" |
| 92 | +mbox = "ingestors.email.mbox:MboxFileIngestor" |
| 93 | +dbf = "ingestors.tabular.dbf:DBFIngestor" |
| 94 | +image = "ingestors.media.image:ImageIngestor" |
| 95 | +tiff = "ingestors.media.tiff:TIFFIngestor" |
| 96 | +svg = "ingestors.media.svg:SVGIngestor" |
| 97 | +audio = "ingestors.media.audio:AudioIngestor" |
| 98 | +video = "ingestors.media.video:VideoIngestor" |
| 99 | +json = "ingestors.misc.jsonfile:JSONIngestor" |
| 100 | + |
| 101 | +[build-system] |
| 102 | +requires = ["poetry-core"] |
| 103 | +build-backend = "poetry.core.masonry.api" |
| 104 | + |
| 105 | +[tool.poetry.group.dev.dependencies] |
| 106 | +pytest = "8.2.0" |
| 107 | +pytest-cov = "5.0.0" |
| 108 | +click = "8.1.7" |
0 commit comments