Project-MONAI · ericspod · Nov 14, 2025 · Sep 3, 2025 · Sep 3, 2025 · Sep 4, 2025
@@ -13,9 +13,11 @@
 
 import warnings
 from collections.abc import Sequence
+from typing import cast
 
 import torch
 import torch.nn as nn
+from torch.utils.checkpoint import checkpoint
 
 from monai.networks.blocks.convolutions import Convolution, ResidualUnit
 from monai.networks.layers.factories import Act, Norm
@@ -24,6 +26,22 @@
 __all__ = ["UNet", "Unet"]
 
 
+class _ActivationCheckpointWrapper(nn.Module):
+    """Apply activation checkpointing to the wrapped module during training."""
+    def __init__(self, module: nn.Module) -> None:
+        super().__init__()
+        self.module = module
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.training and torch.is_grad_enabled() and x.requires_grad:
+            try:
+                return cast(torch.Tensor, checkpoint(self.module, x, use_reentrant=False))
+            except TypeError:
+                # Fallback for older PyTorch without `use_reentrant`
+                return cast(torch.Tensor, checkpoint(self.module, x))
+        return cast(torch.Tensor, self.module(x))
+
+
 class UNet(nn.Module):
     """
     Enhanced version of UNet which has residual units implemented with the ResidualUnit class.
@@ -69,6 +87,8 @@ class UNet(nn.Module):
             if a conv layer is directly followed by a batch norm layer, bias should be False.
         adn_ordering: a string representing the ordering of activation (A), normalization (N), and dropout (D).
             Defaults to "NDA". See also: :py:class:`monai.networks.blocks.ADN`.
+        use_checkpointing: if True, apply activation checkpointing to internal sub-blocks during training to reduce memory
+        at the cost of extra compute. Checkpointing is bypassed in eval and when gradients are disabled. Defaults to False.
 
     Examples::
 
@@ -118,6 +138,7 @@ def __init__(
         dropout: float = 0.0,
         bias: bool = True,
         adn_ordering: str = "NDA",
+        use_checkpointing: bool = False,
     ) -> None:
         super().__init__()
 
@@ -146,6 +167,7 @@ def __init__(
         self.dropout = dropout
         self.bias = bias
         self.adn_ordering = adn_ordering
+        self.use_checkpointing = use_checkpointing
 
         def _create_block(
             inc: int, outc: int, channels: Sequence[int], strides: Sequence[int], is_top: bool
@@ -192,6 +214,8 @@ def _get_connection_block(self, down_path: nn.Module, up_path: nn.Module, subblo
             subblock: block defining the next layer in the network.
         Returns: block for this layer: `nn.Sequential(down_path, SkipConnection(subblock), up_path)`
         """
+        if self.use_checkpointing:
+            subblock = _ActivationCheckpointWrapper(subblock)
         return nn.Sequential(down_path, SkipConnection(subblock), up_path)
 
     def _get_down_layer(self, in_channels: int, out_channels: int, strides: int, is_top: bool) -> nn.Module: