docling-project · cau-git · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025
diff --git a/docling_eval/campaign_tools/README_review_bundle.md b/docling_eval/campaign_tools/README_review_bundle.md
@@ -7,7 +7,7 @@ Follow these steps whenever you need to review a submission’s visualization ou
 You need to start from a submission evaluation directory, e.g. `submission-4_10-11-2025_v0`, as created through the `cvat_evaluation_pipeline.py`.
 It is required to contain a `combined_evaluation.csv` (export it from `combined_evaluation.xlsx` if missing).
 
-Run the bundle builder from the submission root. This reads `combined_evaluation.csv` and writes a timestamped bundle folder next to it. Both `need_review` and `need_task2_review` are included automatically in every bundle, so you can switch between them later without extra arguments.
+Run the bundle builder from the submission root. This reads `combined_evaluation.csv` and writes a timestamped bundle folder next to it.
 
 ```bash
 uv run python -m docling_eval.campaign_tools.review_bundle_builder \
@@ -16,17 +16,6 @@ uv run python -m docling_eval.campaign_tools.review_bundle_builder \
 
 The command prints the bundle path, e.g. `submission-4_10-11-2025_v0/review_bundle_20251110_162408`.
 
-### Update an existing bundle in-place
-
-If you already have progress in `review_state.json`, regenerate the manifest and static assets without touching that file by passing `--bundle-dir`. When `--bundle-dir` is supplied you can omit the positional `submission_dir`; the tool automatically resolves it to the bundle’s parent directory:
-
-```bash
-uv run python -m docling_eval.campaign_tools.review_bundle_builder \
-  --bundle-dir /path/to/submission-root/review_bundle_20251110_162408
-```
-
-This keeps `review_state.json` intact while refreshing the HTML/JS/CSS.
-
 ## 2. Host the Submission Root
 
 Serve the submission directory so the bundle and original visualization HTMLs share the same origin:
@@ -38,7 +27,7 @@ python -m http.server 8765
 
 Open `http://localhost:8765/review_bundle_*/index.html` in your browser.
 
-## 3. Connect `review_state.json`
+## 3. Connect to `review_state.json` database
 
 At the top of the page click **Connect bundle for saving**. Choose the same `review_bundle_*` folder. **This is critical** to ensure the browser has permissions to save into the `review_state.json`, which acts as a database. Once connected, the button switches to “Change bundle connection” and the status line on the bottom states that saves go directly to `review_state.json`. Decisions are still mirrored in browser storage as a safety net.
 
@@ -48,9 +37,9 @@ You can import prior logs via **Import log** (JSON or CSV). Imports do not chang
 
 The left sidebar lists documents sorted by the configured column. Selecting an entry loads its visualization(s). You will find the tab buttons for key-value and layout if both are available. The decision panel shows separate controls for **User A** and **User B**:
 
-1. Use the **Priority** dropdown above the list to switch between `need_review`, `need_task2_review`, or any other column you exposed during bundle creation.
-2. Click **Correct** or **Need changes** for each user as needed (buttons stay lit when active).
-3. Optionally add a shared comment.
-4. Press **Save decision**. The status line confirms whether you saved user verdicts, comments only, or cleared the entry.
+1. Click **Correct** or **Need changes** for each user as needed (buttons stay lit when active).
+2. Optionally add a shared comment.
+3. Press **Save decision**. The status line confirms whether you saved user verdicts, comments only, or cleared the entry.
 
 Edits remain locked to the current sample until you save or discard them; this prevents accidental navigation losses. Use **Export CSV** at any time for an external report.
+
diff --git a/docling_eval/campaign_tools/cvat_create_annotation_tasks_from_folders.py b/docling_eval/campaign_tools/cvat_create_annotation_tasks_from_folders.py
@@ -32,6 +32,7 @@
 
 from docling_eval.cli.main import create_cvat, create_eval, create_gt
 from docling_eval.datamodels.types import BenchMarkNames, PredictionProviderType
+from docling_eval.utils.utils import count_pages_in_file
 
 app = typer.Typer(add_completion=False)
 
@@ -67,14 +68,19 @@ def process_subdirectories(
     for subdir in subdirs:
         subdir_name = subdir.name
 
-        # Count total files in subdirectory
-        total_files = 0
+        # Collect all files and count pages
+        all_files: list[Path] = []
         for ext in ["pdf", "tif", "tiff", "jpg", "jpeg", "png", "bmp", "gif", "json"]:
-            total_files += len(list(subdir.glob(f"*.{ext}")))
-            total_files += len(list(subdir.glob(f"*.{ext.upper()}")))
+            all_files.extend(subdir.glob(f"*.{ext}"))
+            all_files.extend(subdir.glob(f"*.{ext.upper()}"))
+        all_files.sort()
+
+        total_files = len(all_files)
+        total_pages = sum(count_pages_in_file(f) for f in all_files)
 
         typer.echo(f"\nProcessing: {subdir_name}")
         typer.echo(f"  Total files found: {total_files}")
+        typer.echo(f"  Total pages found: {total_pages}")
 
         # Calculate number of chunks needed
         num_chunks = (total_files + max_files_per_chunk - 1) // max_files_per_chunk