Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions CODE_OF_CONDUCT.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

## Submitting a new issue

* Search through existing Issues and Discussions, in case your issue already exists and a solution is being developed.
* Ensure you read & follow the template form.
* Consider you may be the best person to investigate and fix.
- Search through existing Issues and Discussions, in case your issue already exists and a solution is being developed.
- Ensure you read & follow the template form.
- Consider you may be the best person to investigate and fix.

## Contributing to an existing Issue

* Read the entire thread.
* Ensure your comment is contributing something new/useful. Remember you can simply react to other comments.
* Be concise:
- Read the entire thread.
- Ensure your comment is contributing something new/useful. Remember you can simply react to other comments.
- Be concise:
- use the formatting options
- if replying to a big comment, instead of quoting it, link to it
1 change: 0 additions & 1 deletion MANIFEST.in

This file was deleted.

173 changes: 132 additions & 41 deletions fastmrz/fastmrz.py

Large diffs are not rendered by default.

77 changes: 77 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "fastmrz"
version = "2.1"
description = "Extracts the Machine Readable Zone (MRZ) data from document images"
readme = "README.md"
requires-python = ">=3.8"
license = { text = "AGPLv3" }
authors = [
{ name = "Sivakumar Mahalingam" }
]
classifiers = [
"Intended Audience :: Developers",
"License :: OSI Approved :: GNU Affero General Public License v3",
"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Operating System :: MacOS",
"Operating System :: POSIX :: Linux",
"Operating System :: Microsoft :: Windows :: Windows 10",
"Operating System :: Microsoft :: Windows :: Windows 11",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Image Processing",
"Topic :: Scientific/Engineering :: Image Recognition",
"Topic :: Scientific/Engineering :: Information Analysis",
"Topic :: Software Development",
]
keywords = [
"fastmrz",
"mrz",
"image processing",
"image recognition",
"ocr",
"computer vision",
"text recognition",
"text detection",
"artificial intelligence",
"onnx"
]
dependencies = [
"opencv-python-headless>=4.11.0.86",
"pytesseract>=0.3.10",
]

[project.urls]
Homepage = "https://github.com/sivakumar-mahalingam/fastmrz/"
Source = "https://github.com/sivakumar-mahalingam/fastmrz"
Tracker = "https://github.com/sivakumar-mahalingam/fastmrz/issues"

[tool.hatch.build.targets.wheel]
packages = ["fastmrz"]
include = [
"fastmrz/**/*.onnx",
]

[tool.hatch.build.targets.sdist]
include = [
"fastmrz",
"tests",
"docs",
"tessdata",
"data",
"README.md",
"LICENSE",
"CODE_OF_CONDUCT.md",
"fastmrz/**/*.onnx",
]

[project.scripts]
test = "unittest:main"
2 changes: 0 additions & 2 deletions requirements.txt

This file was deleted.

49 changes: 0 additions & 49 deletions setup.py

This file was deleted.

58 changes: 37 additions & 21 deletions tests/test.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
import unittest
from pathlib import Path

import numpy as np
import os

from fastmrz import FastMRZ

fast_mrz = FastMRZ()
BASE_DIR = Path(__file__).parent.parent
DATA_DIR = BASE_DIR / "data"

fast_mrz = FastMRZ(
tessdata_path=BASE_DIR / "tessdata",
)


class TestFastMRZMethods(unittest.TestCase):
def test_process_image(self):
image_path = os.path.abspath("../data/td3.jpg")
image_path = DATA_DIR / "td3.jpg"
processed_image = fast_mrz._process_image(image_path)
self.assertIsInstance(processed_image, np.ndarray)
self.assertEqual(processed_image.shape, (1, 256, 256, 3))

def test_get_roi(self):
output_data = np.random.rand(1, 256, 256, 1)
image_path = os.path.abspath("../data/td3.jpg")
image_path = DATA_DIR / "td3.jpg"
roi = fast_mrz._get_roi(output_data, image_path)
self.assertIsInstance(roi, str)

Expand All @@ -25,7 +32,9 @@ def test_cleanse_roi(self):
self.assertIsInstance(cleansed_text, str)

def test_get_final_check_digit(self):
input_string = "I<UTOERIKSSON<<ANNA<MARIA<<<<<<<<<<<\nD231458907UTO7408122F1204159<<<<<<<6"
input_string = (
"I<UTOERIKSSON<<ANNA<MARIA<<<<<<<<<<<\nD231458907UTO7408122F1204159<<<<<<<6"
)
input_type = "TD2"
final_check_digit = fast_mrz._get_final_checkdigit(input_string, input_type)
self.assertIsInstance(final_check_digit, str)
Expand All @@ -41,53 +50,60 @@ def test_format_date(self):
self.assertIsInstance(formatted_date, str)

def test_read_raw_mrz(self):
image_path = os.path.abspath("../data/td2.jpg")
image_path = DATA_DIR / "td2.jpg"
raw_mrz = fast_mrz.get_details(image_path, ignore_parse=True)
self.assertIsInstance(raw_mrz, str)

def test_read_mrz(self):
image_path = os.path.abspath("../data/td3.jpg")
image_path = DATA_DIR / "td3.jpg"
mrz_data = fast_mrz.get_details(image_path)
self.assertIsInstance(mrz_data, dict)
self.assertIn("status", mrz_data.keys())

def test_read_mrz_nomrz(self):
image_path = os.path.abspath("../data/nomrz.jpg")
image_path = DATA_DIR / "nomrz.jpg"
mrz_data = fast_mrz.get_details(image_path)
self.assertIsInstance(mrz_data, dict)
self.assertIn("status", mrz_data.keys())

def test_read_mrz_mrva(self):
image_path = os.path.abspath("../data/mrva.jpg")
image_path = DATA_DIR / "mrva.jpg"
mrz_data = fast_mrz.get_details(image_path)
self.assertIsInstance(mrz_data, dict)
self.assertIn("status", mrz_data.keys())

def test_read_mrz_mrvb(self):
image_path = os.path.abspath("../data/mrvb.jpg")
image_path = DATA_DIR / "mrvb.jpg"
mrz_data = fast_mrz.get_details(image_path)
self.assertIsInstance(mrz_data, dict)
self.assertIn("status", mrz_data.keys())

def test_validate_mrz(self):
result = fast_mrz.validate_mrz("P<GBRPUDARSAN<<HENERT<<<<<<<<<<<<<<<<<<<<<<<\n"
"7077979792GBR9505209M1704224<<<<<<<<<<<<<<00")
expected = {"is_valid": True, "message": "The given mrz is valid"}
result = fast_mrz.validate_mrz(
"P<GBRPUDARSAN<<HENERT<<<<<<<<<<<<<<<<<<<<<<<\n"
"7077979792GBR9505209M1704224<<<<<<<<<<<<<<00"
)
expected = {"is_valid": True, "status_message": "The given mrz is valid"}
self.assertEqual(result, expected)

def test_validate_mrz_invalid_format(self):
result = fast_mrz.validate_mrz("INVALIDTEXT<<HENERT<<<<<<<<<<<<<<<<<<<<<<<\n"
"7077979792GBR9505209M1704224<<<<<<<<<<<<<<00")
result = fast_mrz.validate_mrz(
"INVALIDTEXT<<HENERT<<<<<<<<<<<<<<<<<<<<<<<\n"
"7077979792GBR9505209M1704224<<<<<<<<<<<<<<00"
)
self.assertFalse(result["is_valid"])
self.assertIn("message", result)
self.assertIsInstance(result["message"], str)
self.assertIn("status_message", result)
self.assertIsInstance(result["status_message"], str)

def test_validate_mrz_invalid_check_digit(self):
result = fast_mrz.validate_mrz("P<GBRPUDARSAN<<HENERT<<<<<<<<<<<<<<<<<<<<<<<\n"
"7077979792GBR9505209M1704224<<<<<<<<<<<<<<01")
result = fast_mrz.validate_mrz(
"P<GBRPUDARSAN<<HENERT<<<<<<<<<<<<<<<<<<<<<<<\n"
"7077979792GBR9505209M1704224<<<<<<<<<<<<<<01"
)
self.assertFalse(result["is_valid"])
self.assertIn("message", result)
self.assertIsInstance(result["message"], str)
self.assertIn("status_message", result)
self.assertIsInstance(result["status_message"], str)


if __name__ == "__main__":
unittest.main()
Loading