Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
b20b595
chore: Code comments
nikos-livathinos Nov 12, 2025
a379cae
feat: WIP: Migrating the PixelLayoutEvaluator
nikos-livathinos Nov 12, 2025
a324d6f
fix: PixelLayoutEvaluator: Fix the matrix categories mappings. Add un…
nikos-livathinos Nov 13, 2025
dc5206c
feat: PixelLayoutEvaluator: Introduce pydantic types as transfer obje…
nikos-livathinos Nov 13, 2025
0a2e728
feat: WIP: Implementing the PixelLayoutEvaluator::save_evaluations().
nikos-livathinos Nov 13, 2025
54917ad
chore: Move all pixel layout evaluation pydantic transfer objects in …
nikos-livathinos Nov 13, 2025
7630760
feat: PixelLayoutEvaluator seems to work on DPBench. Matrices are gen…
nikos-livathinos Nov 13, 2025
249645e
feat: Add class_names in MultiLabelMatrixMetrics
nikos-livathinos Nov 13, 2025
ed869fc
chore: Improve tests for PixelLayoutEvaluator
nikos-livathinos Nov 14, 2025
9fc7e8c
feat: Make the DoclingPredictionProvider dump the full pipeline optio…
nikos-livathinos Nov 14, 2025
5aea324
feat: Extend PixelLayoutEvaluator to detect the name of the layout mo…
nikos-livathinos Nov 14, 2025
94c2376
feat: Extend PixelLayoutEvaluator to include DatasetStatistics fields…
nikos-livathinos Nov 14, 2025
ae0ad5d
fix: PixelLayoutEvaluator: Convert num_pixels to uint64
nikos-livathinos Nov 14, 2025
6a4a6f3
chore: Fix typos. Code comments
nikos-livathinos Nov 14, 2025
c75e59f
Merge branch 'main' into nli/pixel_layout_evaluation
nikos-livathinos Nov 14, 2025
8893406
fix: Use plain int python type for num_pixels
nikos-livathinos Nov 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions docling_eval/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@
OCREvaluator,
OCRVisualizer,
)
from docling_eval.evaluators.pixel.pixel_types import DatasetPixelLayoutEvaluation
from docling_eval.evaluators.pixel_layout_evaluator import PixelLayoutEvaluator
from docling_eval.evaluators.readingorder_evaluator import (
DatasetReadingOrderEvaluation,
ReadingOrderEvaluator,
Expand Down Expand Up @@ -668,6 +670,18 @@ def evaluate(
with open(save_fn, "w") as fd:
json.dump(evaluation.model_dump(), fd, indent=2, sort_keys=True)

# Evaluate with the pixel-wise layout evaluation
pixel_layout_evaluator = PixelLayoutEvaluator()
pixel_ds_evaluation: DatasetPixelLayoutEvaluation = pixel_layout_evaluator(
idir, split=split
)
pixel_save_root: Path = save_fn.parent
pixel_layout_evaluator.save_evaluations(
benchmark,
pixel_ds_evaluation,
pixel_save_root,
)

elif modality == EvaluationModality.TABLE_STRUCTURE:
table_evaluator = TableEvaluator()
evaluation = table_evaluator( # type: ignore
Expand Down Expand Up @@ -891,6 +905,28 @@ def visualize(
_log.info(content)
with open(log_filename, "a") as fd:
fd.write(content)

# Process stats from the pixel_layout_evaluator
pixel_eval_fns = PixelLayoutEvaluator.evaluation_filenames(benchmark, odir)
pixel_json_fn = pixel_eval_fns["json"]
with open(pixel_json_fn, "r") as fd:
pixel_layout_evaluation = (
DatasetPixelLayoutEvaluation.model_validate_json(fd.read())
)
log_and_save_stats(
odir,
benchmark,
modality,
"pixel_all_classes_f1",
pixel_layout_evaluation.f1_all_classes_stats,
)
log_and_save_stats(
odir,
benchmark,
modality,
"pixel_collapsed_classes_f1",
pixel_layout_evaluation.f1_collapsed_classes_stats,
)
except Exception as e:
_log.error(f"Error processing layout evaluation: {str(e)}")

Expand Down
5 changes: 5 additions & 0 deletions docling_eval/evaluators/layout_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,11 @@ def _extract_layout_data(
_log.debug(f"GT pages: {sorted(gt_pages)}, Pred pages: {sorted(pred_pages)}")

# Process pages in sorted order to ensure consistent alignment
# List[Tuple[page_no, Dict[str, torch.Tensor]]]. The dict has tensors with bboxes, labels:
# "boxes": torch.tensor(bboxes, dtype=torch.float32),
# "labels": torch.tensor(labels, dtype=torch.long),
# "scores": torch.tensor(scores, dtype=torch.float32) # Only for the predictions
# The bboxes are in top-left origin, in x1y1x2y2 format, normalized and scaled to 100
ground_truths: List[Tuple[int, Dict[str, torch.Tensor]]] = []
predictions: List[Tuple[int, Dict[str, torch.Tensor]]] = []

Expand Down
Loading
Loading