fix(deploy): Fix discrepancy between lightning and standalone inferencers (#2843)

ashwinvaidya17 · web-flow · commit 3412e9da00bc · 2025-07-30T17:04:07.000+02:00
* Add test + modify anomalib module

Signed-off-by: Ashwin Vaidya &lt;ashwin.vaidya@intel.com&gt;
diff --git a/src/anomalib/models/components/base/anomalib_module.py b/src/anomalib/models/components/base/anomalib_module.py
@@ -208,8 +208,9 @@ def predict_step(
     ) -> STEP_OUTPUT:
         """Perform prediction step.
 
-        This method is called during the predict stage of training. By default,
-        it calls the validation step.
+        This method is called during the predict stage of training. It calls
+        the model's forward method to ensure consistency with exported model behavior,
+        then merges the predictions into the batch for post-processing.
 
         Args:
             batch (Batch): Input batch
@@ -218,17 +219,19 @@ def predict_step(
                 Defaults to ``0``.
 
         Returns:
-            STEP_OUTPUT: Model predictions
+            STEP_OUTPUT: Updated batch with model predictions
         """
-        del dataloader_idx  # These variables are not used.
+        del dataloader_idx, batch_idx  # These variables are not used.
 
-        return self.validation_step(batch, batch_idx)
+        predictions = self.model(batch.image)
+        return batch.update(**predictions._asdict())
 
     def test_step(self, batch: Batch, batch_idx: int, *args, **kwargs) -> STEP_OUTPUT:
         """Perform test step.
 
-        This method is called during the test stage of training. By default,
-        it calls the predict step.
+        This method is called during the test stage of training. It calls
+        the model's forward method to ensure consistency with exported model behavior,
+        then merges the predictions into the batch for post-processing.
 
         Args:
             batch (Batch): Input batch
@@ -237,11 +240,12 @@ def test_step(self, batch: Batch, batch_idx: int, *args, **kwargs) -> STEP_OUTPU
             **kwargs: Additional keyword arguments (unused)
 
         Returns:
-            STEP_OUTPUT: Model predictions
+            STEP_OUTPUT: Updated batch with model predictions
         """
-        del args, kwargs  # These variables are not used.
+        del args, kwargs, batch_idx  # These variables are not used.
 
-        return self.predict_step(batch, batch_idx)
+        predictions = self.model(batch.image)
+        return batch.update(**predictions._asdict())
 
     @property
     @abstractmethod
diff --git a/src/anomalib/models/image/fre/lightning_model.py b/src/anomalib/models/image/fre/lightning_model.py
@@ -185,10 +185,9 @@ def trainer_arguments(self) -> dict[str, Any]:
         Returns:
             dict[str, Any]: Dictionary of trainer arguments:
                 - ``gradient_clip_val``: ``0``
-                - ``max_epochs``: ``220``
                 - ``num_sanity_val_steps``: ``0``
         """
-        return {"gradient_clip_val": 0, "max_epochs": 220, "num_sanity_val_steps": 0}
+        return {"gradient_clip_val": 0, "num_sanity_val_steps": 0}
 
     @property
     def learning_type(self) -> LearningType:
diff --git a/src/anomalib/models/image/vlm_ad/lightning_model.py b/src/anomalib/models/image/vlm_ad/lightning_model.py
@@ -168,6 +168,14 @@ def validation_step(self, batch: ImageBatch, *args, **kwargs) -> ImageBatch:
         batch.pred_label = torch.tensor([1.0 if r.startswith("Y") else 0.0 for r in responses], device=self.device)
         return batch
 
+    def test_step(self, batch: ImageBatch, *args, **kwargs) -> ImageBatch:  # type: ignore[override]
+        """Redirect to validation step."""
+        return self.validation_step(batch, *args, **kwargs)
+
+    def predict_step(self, batch: ImageBatch, *args, **kwargs) -> ImageBatch:  # type: ignore[override]
+        """Redirect to validation step."""
+        return self.validation_step(batch, *args, **kwargs)
+
     @property
     def learning_type(self) -> LearningType:
         """Get the learning type of the model.
diff --git a/src/anomalib/post_processing/post_processor.py b/src/anomalib/post_processing/post_processor.py
@@ -210,7 +210,11 @@ def forward(self, predictions: InferenceBatch) -> InferenceBatch:
         if predictions.pred_score is None and predictions.anomaly_map is None:
             msg = "At least one of pred_score or anomaly_map must be provided."
             raise ValueError(msg)
-        pred_score = predictions.pred_score or torch.amax(predictions.anomaly_map, dim=(-2, -1))
+        pred_score = (
+            predictions.pred_score
+            if predictions.pred_score is not None
+            else torch.amax(predictions.anomaly_map, dim=(-2, -1))
+        )
 
         if self.enable_normalization:
             pred_score = self._normalize(pred_score, self.image_min, self.image_max, self.image_threshold)
diff --git a/tests/integration/test_task_types.py b/tests/integration/test_task_types.py
@@ -9,10 +9,11 @@
 
 import pytest
 import torch
+from torch import nn
 from torchmetrics import Metric
 
 from anomalib import LearningType
-from anomalib.data import AnomalibDataModule, Batch, Folder, ImageDataFormat
+from anomalib.data import AnomalibDataModule, Batch, Folder, ImageDataFormat, InferenceBatch
 from anomalib.engine import Engine
 from anomalib.metrics import AnomalibMetric, Evaluator
 from anomalib.models import AnomalibModule
@@ -21,13 +22,29 @@
 from tests.helpers.data import DummyImageDatasetGenerator
 
 
+class _DummyModel(nn.Module):
+    """Dummy model for testing."""
+
+    @staticmethod
+    def forward(image_tensor: torch.Tensor) -> InferenceBatch:
+        """Dummy forward pass."""
+        return InferenceBatch(
+            pred_score=torch.rand(image_tensor.shape[0], device=image_tensor.device),
+            anomaly_map=torch.rand(image_tensor.shape[0], *image_tensor.shape[-2:], device=image_tensor.device),
+        )
+
+
 class DummyBaseModel(AnomalibModule):
     """Dummy model for testing.
 
     No training, and all auxiliary components default to None. This allows testing of the different components
     in isolation.
     """
 
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.model = _DummyModel()
+
     def training_step(self, *args, **kwargs) -> None:
         """Dummy training step."""
 
@@ -66,7 +83,7 @@ class DummyClassificationModel(DummyBaseModel):
     def validation_step(self, batch: Batch, *args, **kwargs) -> Batch:
         """Validation steps that returns random image-level scores."""
         del args, kwargs
-        batch.pred_score = torch.rand(batch.batch_size, device=self.device)
+        batch.pred_score = self.model(batch.image).pred_score
         return batch
 
 
@@ -79,8 +96,9 @@ class DummySegmentationModel(DummyBaseModel):
     def validation_step(self, batch: Batch, *args, **kwargs) -> Batch:
         """Validation steps that returns random image- and pixel-level scores."""
         del args, kwargs
-        batch.pred_score = torch.rand(batch.batch_size, device=self.device)
-        batch.anomaly_map = torch.rand(batch.batch_size, *batch.image.shape[-2:], device=self.device)
+        result = self.model(batch.image)
+        batch.pred_score = result.pred_score
+        batch.anomaly_map = result.anomaly_map
         return batch
 
 
diff --git a/tests/unit/deploy/test_inferencer.py b/tests/unit/deploy/test_inferencer.py
@@ -9,7 +9,10 @@
 import numpy as np
 import pytest
 import torch
+from PIL import Image
+from torch.utils.data import DataLoader
 
+from anomalib.data import ImageBatch, NumpyImageBatch, PredictDataset
 from anomalib.deploy import ExportType, OpenVINOInferencer, TorchInferencer
 from anomalib.engine import Engine
 from anomalib.models import Padim
@@ -108,3 +111,82 @@ def test_openvino_inference(ckpt_path: Callable[[str], Path]) -> None:
     for image in openvino_dataloader():
         prediction = openvino_inferencer.predict(image)
         assert 0.0 <= prediction.pred_score <= 1.0  # confirm if predicted scores are normalized
+
+
+def compare_predictions(
+    pred1: ImageBatch | NumpyImageBatch,
+    pred2: ImageBatch | NumpyImageBatch,
+    tolerance: float = 1e-3,
+) -> None:
+    """Compare predictions from two different inference methods."""
+    score1 = pred1.pred_score if hasattr(pred1, "pred_score") else None
+    score2 = pred2.pred_score if hasattr(pred2, "pred_score") else None
+
+    map1 = pred1.anomaly_map if hasattr(pred1, "anomaly_map") else None
+    map2 = pred2.anomaly_map if hasattr(pred2, "anomaly_map") else None
+
+    if isinstance(map1, torch.Tensor):
+        map1 = map1.cpu().numpy()
+    if isinstance(map2, torch.Tensor):
+        map2 = map2.cpu().numpy()
+
+    if score1 is None and score2 is None and map1 is None and map2 is None:
+        pytest.fail("No predictions found")
+
+    if score1 is not None and score2 is not None:
+        if isinstance(score1, torch.Tensor):
+            score1 = score1.cpu().item()
+        if isinstance(score2, torch.Tensor):
+            score2 = score2.cpu().item()
+
+    if score1 is not None and score2 is not None:
+        score_diff = abs(score1 - score2)
+        if score_diff > tolerance:
+            pytest.fail(f"Anomaly score absolute difference: {score_diff:.3f}")
+
+    if map1 is not None and map2 is not None:
+        map_diff = np.abs(map1 - map2)
+        if np.mean(map_diff) > tolerance:
+            pytest.fail(f"Anomaly map mean absolute difference: {np.mean(map_diff):.3f}")
+
+
+def test_inference_similarity(
+    ckpt_path: Callable[[str], Path],
+    project_path: Path,
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Test inference result."""
+    # Set TRUST_REMOTE_CODE environment variable for the test
+    monkeypatch.setenv("TRUST_REMOTE_CODE", "1")
+
+    rng = np.random.default_rng()
+    image = rng.integers(0, 255, (256, 256, 3), dtype=np.uint8)
+    image = Image.fromarray(image)
+    test_image_path = tmp_path / "test_image.png"
+    image.save(test_image_path)
+
+    model = Padim()
+    engine = Engine(logger=False, default_root_dir=project_path, devices=1)
+
+    predict_dataset = PredictDataset(test_image_path)
+    predict_dataloader = DataLoader(
+        predict_dataset,
+        batch_size=1,
+        collate_fn=predict_dataset.collate_fn,
+        pin_memory=True,
+    )
+    engine_pred: list[ImageBatch] = engine.predict(model, dataloaders=predict_dataloader, ckpt_path=ckpt_path("Padim"))
+    engine_pred = engine_pred[0]
+
+    torch_path = engine.export(model, export_type=ExportType.TORCH, export_root=project_path)
+    torch_inferencer = TorchInferencer(torch_path, device="cpu")
+    torch_pred = torch_inferencer.predict(test_image_path)
+
+    openvino_path = engine.export(model, export_type=ExportType.OPENVINO, export_root=project_path)
+    openvino_inferencer = OpenVINOInferencer(openvino_path, device="CPU")
+    openvino_pred = openvino_inferencer.predict(test_image_path)
+
+    compare_predictions(engine_pred, torch_pred)
+    compare_predictions(engine_pred, openvino_pred)
+    compare_predictions(torch_pred, openvino_pred)