fix github issues

Donglai Wei · Donglai Wei · commit 5d7854a12061 · 2025-11-23T18:20:00.000-05:00
diff --git a/connectomics/config/hydra_config.py b/connectomics/config/hydra_config.py
@@ -845,6 +845,9 @@ class InferenceDataConfig:
         "predictions.h5"  # Output filename (auto-pathed to inference/{checkpoint}/{output_name})
     )
 
+    # Image transformation (applied to test images during inference)
+    image_transform: ImageTransformConfig = field(default_factory=ImageTransformConfig)
+
     # 2D data support
     do_2d: bool = False  # Enable 2D data processing for inference
 
@@ -885,6 +888,9 @@ class TestTimeAugmentationConfig:
     save_predictions: bool = (
         False  # Save intermediate TTA predictions (before decoding) to disk (default: False)
     )
+    save_dtype: Optional[str] = (
+        None  # Data type for saving predictions: "float16", "float32", "uint8", "uint16", or None (keep original)
+    )
 
 
 @dataclass
diff --git a/connectomics/data/augment/build.py b/connectomics/data/augment/build.py
@@ -321,13 +321,19 @@ def _build_eval_transforms_impl(
     # else: mode == "test" -> no cropping for sliding window inference
 
     # Normalization - use smart normalization
-    if cfg.data.image_transform.normalize != "none":
+    # For test mode, check inference.data.image_transform first, then fall back to data.image_transform
+    if mode == "test" and hasattr(cfg, "inference") and hasattr(cfg.inference, "data") and hasattr(cfg.inference.data, "image_transform"):
+        image_transform = cfg.inference.data.image_transform
+    else:
+        image_transform = cfg.data.image_transform
+
+    if image_transform.normalize != "none":
         transforms.append(
             SmartNormalizeIntensityd(
                 keys=["image"],
-                mode=cfg.data.image_transform.normalize,
-                clip_percentile_low=cfg.data.image_transform.clip_percentile_low,
-                clip_percentile_high=cfg.data.image_transform.clip_percentile_high,
+                mode=image_transform.normalize,
+                clip_percentile_low=getattr(image_transform, 'clip_percentile_low', 0.0),
+                clip_percentile_high=getattr(image_transform, 'clip_percentile_high', 1.0),
             )
         )
 
diff --git a/connectomics/data/augment/monai_transforms.py b/connectomics/data/augment/monai_transforms.py
@@ -1010,12 +1010,13 @@ class SmartNormalizeIntensityd(MapTransform):
     - "none": No normalization
     - "normal": Z-score normalization (x - mean) / std
     - "0-1": Min-max scaling to [0, 1] (default)
+    - "divide-K": Simple divide by K (e.g., "divide-255" for uint8 images)
 
     Percentile clipping is applied BEFORE normalization when low > 0.0 or high < 1.0.
 
     Args:
         keys: Keys to normalize
-        mode: Normalization mode ("none", "normal", "0-1")
+        mode: Normalization mode ("none", "normal", "0-1", or "divide-K")
         clip_percentile_low: Lower percentile (0.0 = no clip, 0.05 = 5th percentile)
         clip_percentile_high: Upper percentile (1.0 = no clip, 0.95 = 95th percentile)
         allow_missing_keys: Whether to allow missing keys
@@ -1044,9 +1045,20 @@ def __init__(
         allow_missing_keys: bool = False,
     ) -> None:
         super().__init__(keys, allow_missing_keys)
-        if mode not in ["none", "normal", "0-1"]:
-            raise ValueError(f"Invalid mode '{mode}'. Must be 'none', 'normal', or '0-1'")
-        self.mode = mode
+
+        # Parse mode - support "divide-K" format where K is a number
+        self.divide_value = None
+        if mode.startswith("divide-"):
+            try:
+                self.divide_value = float(mode.split("-", 1)[1])
+                self.mode = "divide"
+            except ValueError:
+                raise ValueError(f"Invalid divide mode '{mode}'. Format should be 'divide-K' where K is a number (e.g., 'divide-255')")
+        elif mode not in ["none", "normal", "0-1"]:
+            raise ValueError(f"Invalid mode '{mode}'. Must be 'none', 'normal', '0-1', or 'divide-K'")
+        else:
+            self.mode = mode
+
         self.clip_percentile_low = clip_percentile_low
         self.clip_percentile_high = clip_percentile_high
 
@@ -1088,6 +1100,9 @@ def _normalize(
             max_val = volume.max()
             if max_val > min_val:
                 volume = (volume - min_val) / (max_val - min_val)
+        elif self.mode == "divide":
+            # Simple divide by K (e.g., divide-255 for uint8 images)
+            volume = volume / self.divide_value
 
         return volume if is_numpy else torch.from_numpy(volume)
 
diff --git a/connectomics/data/process/monai_transforms.py b/connectomics/data/process/monai_transforms.py
@@ -67,7 +67,17 @@ def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         d = dict(data)
         for key in self.key_iterator(d):
             if key in d:
-                d[key] = seg_to_affinity(d[key], self.offsets)
+                label = d[key]
+                # Convert tensor to numpy if needed
+                if isinstance(label, torch.Tensor):
+                    label = label.detach().cpu().numpy()
+                # Handle channel dimension: input may be [C, D, H, W] or [D, H, W]
+                if label.ndim == 4 and label.shape[0] == 1:
+                    label = label[0]  # Remove channel dim: [1, D, H, W] -> [D, H, W]
+                elif label.ndim == 3 and label.shape[0] == 1:
+                    # 2D case: [1, H, W] -> keep as is for 2D affinity
+                    pass
+                d[key] = seg_to_affinity(label, self.offsets)
         return d
 
 
@@ -700,6 +710,17 @@ def _prepare_label(self, label: Any) -> Tuple[np.ndarray, bool]:
         return np.asarray(label), False
 
     def _to_tensor(self, array: np.ndarray, *, add_batch_dim: bool) -> torch.Tensor:
+        # Ensure array is a proper numpy array (not a numpy scalar type like numpy.uint8)
+        # torch.as_tensor cannot infer dtype from numpy scalar types
+        if not isinstance(array, np.ndarray):
+            array = np.asarray(array)
+        # Convert to a supported dtype if needed (torch doesn't support all numpy dtypes)
+        if array.dtype == np.uint8:
+            array = array.astype(np.float32)
+        elif array.dtype == np.uint16:
+            array = array.astype(np.float32)
+        elif array.dtype == np.int8:
+            array = array.astype(np.int32)
         tensor = torch.as_tensor(array)
         if self.output_dtype is not None:
             tensor = tensor.to(self.output_dtype)
diff --git a/connectomics/lightning/inference.py b/connectomics/lightning/inference.py
@@ -176,8 +176,9 @@ def extract_main_output(
 
     def sliding_window_predict(self, inputs: torch.Tensor) -> torch.Tensor:
         """Wrapper used by MONAI inferer to obtain primary model predictions."""
-        outputs = self.forward_fn(inputs)
-        return self.extract_main_output(outputs)
+        with torch.no_grad():
+            outputs = self.forward_fn(inputs)
+            return self.extract_main_output(outputs)
 
     def apply_tta_preprocessing(self, tensor: torch.Tensor) -> torch.Tensor:
         """
@@ -220,6 +221,10 @@ def apply_tta_preprocessing(self, tensor: torch.Tensor) -> torch.Tensor:
 
                 if act == "sigmoid":
                     channel_tensor = torch.sigmoid(channel_tensor)
+                elif act == "scale_sigmoid":
+                    # Scaled sigmoid for BANIS: sigmoid(0.2 * x)
+                    # This avoids numerical issues with high-confidence fp16 predictions
+                    channel_tensor = torch.sigmoid(0.2 * channel_tensor)
                 elif act == "tanh":
                     channel_tensor = torch.tanh(channel_tensor)
                 elif act == "softmax":
@@ -237,7 +242,7 @@ def apply_tta_preprocessing(self, tensor: torch.Tensor) -> torch.Tensor:
                 else:
                     raise ValueError(
                         f"Unknown activation '{act}' for channels {start_ch}:{end_ch}. "
-                        f"Supported: 'sigmoid', 'softmax', 'tanh', None"
+                        f"Supported: 'sigmoid', 'scale_sigmoid', 'softmax', 'tanh', None"
                     )
 
                 activated_channels.append(channel_tensor)
@@ -334,13 +339,14 @@ def predict_with_tta(
         # Handle different tta_flip_axes configurations
         if tta_flip_axes_config is None:
             # null: No augmentation, but still apply tta_act and tta_channel (no ensemble)
-            if self.sliding_inferer is not None:
-                pred = self.sliding_inferer(inputs=images, network=self.sliding_window_predict)
-            else:
-                pred = self.sliding_window_predict(images)
+            with torch.no_grad():
+                if self.sliding_inferer is not None:
+                    pred = self.sliding_inferer(inputs=images, network=self.sliding_window_predict)
+                else:
+                    pred = self.sliding_window_predict(images)
 
-            # Apply TTA preprocessing (activation + channel selection) even without augmentation
-            ensemble_result = self.apply_tta_preprocessing(pred)
+                # Apply TTA preprocessing (activation + channel selection) even without augmentation
+                ensemble_result = self.apply_tta_preprocessing(pred)
         else:
             if tta_flip_axes_config == "all" or tta_flip_axes_config == []:
                 # "all" or []: All flips (all combinations of spatial axes)
@@ -369,7 +375,13 @@ def predict_with_tta(
                 )
 
             # Apply TTA with flips, preprocessing, and ensembling
-            predictions = []
+            # Use running average to reduce memory usage instead of accumulating all predictions
+            ensemble_mode = getattr(
+                self.cfg.inference.test_time_augmentation, "ensemble_mode", "mean"
+            )
+
+            ensemble_result = None
+            num_predictions = 0
 
             for flip_axes in tta_flip_axes:
                 # Apply flip augmentation
@@ -379,40 +391,52 @@ def predict_with_tta(
                     x_aug = images
 
                 # Inference with sliding window
-                if self.sliding_inferer is not None:
-                    pred = self.sliding_inferer(
-                        inputs=x_aug,
-                        network=self.sliding_window_predict,
-                    )
-                else:
-                    pred = self.sliding_window_predict(x_aug)
+                with torch.no_grad():
+                    if self.sliding_inferer is not None:
+                        pred = self.sliding_inferer(
+                            inputs=x_aug,
+                            network=self.sliding_window_predict,
+                        )
+                    else:
+                        pred = self.sliding_window_predict(x_aug)
 
-                # Invert flip for prediction
-                if flip_axes:
-                    pred = Flip(spatial_axis=flip_axes)(pred)
+                    # Invert flip for prediction
+                    if flip_axes:
+                        pred = Flip(spatial_axis=flip_axes)(pred)
 
-                # Apply TTA preprocessing (activation + channel selection) if configured
-                # Note: This is applied BEFORE ensembling for probability-space averaging
-                pred_processed = self.apply_tta_preprocessing(pred)
+                    # Apply TTA preprocessing (activation + channel selection) if configured
+                    # Note: This is applied BEFORE ensembling for probability-space averaging
+                    pred_processed = self.apply_tta_preprocessing(pred)
 
-                predictions.append(pred_processed)
+                    # Free intermediate memory
+                    del pred
+                    if flip_axes:
+                        del x_aug
 
-            # Ensemble predictions based on configured mode
-            ensemble_mode = getattr(
-                self.cfg.inference.test_time_augmentation, "ensemble_mode", "mean"
-            )
-            stacked_preds = torch.stack(predictions, dim=0)
-
-            if ensemble_mode == "mean":
-                ensemble_result = stacked_preds.mean(dim=0)
-            elif ensemble_mode == "min":
-                ensemble_result = stacked_preds.min(dim=0)[0]  # min returns (values, indices)
-            elif ensemble_mode == "max":
-                ensemble_result = stacked_preds.max(dim=0)[0]  # max returns (values, indices)
-            else:
-                raise ValueError(
-                    f"Unknown TTA ensemble mode: {ensemble_mode}. Use 'mean', 'min', or 'max'."
-                )
+                    # Update running ensemble to reduce memory usage
+                    if ensemble_result is None:
+                        ensemble_result = pred_processed.clone()
+                    else:
+                        if ensemble_mode == "mean":
+                            # Running average: new_avg = old_avg + (new_val - old_avg) / n
+                            ensemble_result = ensemble_result + (pred_processed - ensemble_result) / (num_predictions + 1)
+                        elif ensemble_mode == "min":
+                            ensemble_result = torch.minimum(ensemble_result, pred_processed)
+                        elif ensemble_mode == "max":
+                            ensemble_result = torch.maximum(ensemble_result, pred_processed)
+                        else:
+                            raise ValueError(
+                                f"Unknown TTA ensemble mode: {ensemble_mode}. Use 'mean', 'min', or 'max'."
+                            )
+
+                    num_predictions += 1
+
+                    # Free processed prediction memory
+                    del pred_processed
+
+                    # Force CUDA cache clear periodically to prevent OOM
+                    if torch.cuda.is_available() and num_predictions % 4 == 0:
+                        torch.cuda.empty_cache()
 
         # Apply mask after ensemble if requested
         apply_mask = getattr(self.cfg.inference.test_time_augmentation, "apply_mask", False)
@@ -868,9 +892,40 @@ def write_outputs(
         # Squeeze singleton dimensions (e.g., (1, 1, D, H, W) -> (D, H, W))
         sample = np.squeeze(sample)
 
+        # Convert to specified dtype if save_dtype is set
+        save_dtype = None
+        if hasattr(cfg.inference, "test_time_augmentation"):
+            save_dtype = getattr(cfg.inference.test_time_augmentation, "save_dtype", None)
+
+        if save_dtype is not None:
+            original_dtype = sample.dtype
+            if save_dtype == "float16":
+                sample = sample.astype(np.float16)
+            elif save_dtype == "float32":
+                sample = sample.astype(np.float32)
+            elif save_dtype == "uint8":
+                # For uint8, detect value range and scale appropriately
+                if sample.min() < 0:
+                    # [-1, 1] to [0, 255]
+                    sample = ((sample + 1) * 127.5).clip(0, 255).astype(np.uint8)
+                elif sample.max() <= 1.0:
+                    # [0, 1] to [0, 255]
+                    sample = (sample * 255).clip(0, 255).astype(np.uint8)
+                else:
+                    sample = sample.clip(0, 255).astype(np.uint8)
+            elif save_dtype == "uint16":
+                # For uint16, scale from [0, 1] to [0, 65535]
+                if sample.max() <= 1.0:
+                    sample = (sample * 65535).clip(0, 65535).astype(np.uint16)
+                else:
+                    sample = sample.clip(0, 65535).astype(np.uint16)
+            else:
+                print(f"  WARNING: Unknown save_dtype '{save_dtype}', keeping original dtype")
+
         # Write HDF5 file
         try:
             write_hdf5(str(output_path), sample, dataset="main")
-            print(f"  Saved prediction: {output_path} (shape: {sample.shape})")
+            dtype_info = f", dtype: {sample.dtype}" if save_dtype else ""
+            print(f"  Saved prediction: {output_path} (shape: {sample.shape}{dtype_info})")
         except Exception as e:
             print(f"  ERROR: write_outputs - failed to write {output_path}: {e}")
diff --git a/connectomics/lightning/lit_model.py b/connectomics/lightning/lit_model.py
@@ -337,6 +337,10 @@ def _apply_tta_preprocessing(self, tensor: torch.Tensor) -> torch.Tensor:
 
                 if act == 'sigmoid':
                     channel_tensor = torch.sigmoid(channel_tensor)
+                elif act == 'scale_sigmoid':
+                    # Scaled sigmoid for BANIS: sigmoid(0.2 * x)
+                    # This avoids numerical issues with high-confidence fp16 predictions
+                    channel_tensor = torch.sigmoid(0.2 * channel_tensor)
                 elif act == 'tanh':
                     channel_tensor = torch.tanh(channel_tensor)
                 elif act == 'softmax':
@@ -354,7 +358,7 @@ def _apply_tta_preprocessing(self, tensor: torch.Tensor) -> torch.Tensor:
                 else:
                     raise ValueError(
                         f"Unknown activation '{act}' for channels {start_ch}:{end_ch}. "
-                        f"Supported: 'sigmoid', 'softmax', 'tanh', None"
+                        f"Supported: 'sigmoid', 'scale_sigmoid', 'softmax', 'tanh', None"
                     )
 
                 activated_channels.append(channel_tensor)
diff --git a/connectomics/utils/demo.py b/connectomics/utils/demo.py
@@ -80,8 +80,8 @@ def create_demo_config():
     from connectomics.config import Config
     from connectomics.config.hydra_config import (
         SystemConfig,
-        TrainingSystemConfig,
-        InferenceSystemConfig,
+        SystemTrainingConfig,
+        SystemInferenceConfig,
         ModelConfig,
         DataConfig,
         OptimizationConfig,
@@ -97,13 +97,13 @@ def create_demo_config():
     cfg = Config(
         system=SystemConfig(
             seed=42,
-            training=TrainingSystemConfig(
+            training=SystemTrainingConfig(
                 num_gpus=1 if torch.cuda.is_available() else 0,
                 num_cpus=2,
                 batch_size=2,
                 num_workers=0,  # 0 for demo to avoid multiprocessing issues
             ),
-            inference=InferenceSystemConfig(
+            inference=SystemInferenceConfig(
                 num_gpus=1 if torch.cuda.is_available() else 0,
                 num_cpus=2,
                 batch_size=2,
diff --git a/tutorials/mednext_mitoverse.yaml b/tutorials/mednext_mitoverse.yaml