Merge pull request #5 from mctigger/tanh_normal-properties

mctigger · web-flow · commit e1cec97cbd72 · 2025-07-23T19:04:42.000+02:00
This pull request fixes a critical bug in `TensorIndependent`'s shape calculation and significantly improves the `TensorTanhNormal` and `SamplingDistribution` classes.

Here's a breakdown of the key changes:

### Key Fixes and Improvements

* **`TensorIndependent` Shape Bug:** A bug where `reinterpreted_batch_ndims=0` resulted in an incorrect shape has been fixed. The shape calculation now correctly returns the full shape of the base distribution in this case.
* **`TensorTanhNormal` Simplification:** The `reinterpreted_batch_ndims` parameter has been removed from `TensorTanhNormal` to enforce a clearer separation of concerns. To achieve this functionality, users should now wrap `TensorTanhNormal` with `TensorIndependent`. The class also now includes new statistical properties like **mean**, **variance**, and **standard deviation**.
* **`SamplingDistribution` Enhancements:** This class has been rewritten for better performance and reliability. It now uses `__slots__` for memory efficiency, includes better caching for statistical properties, improves error handling, and adds input validation.

### Impact and Migration

This update introduces a **breaking change**: the `reinterpreted_batch_ndims` parameter is no longer available in `TensorTanhNormal`.

To migrate, you must now explicitly wrap `TensorTanhNormal` with `TensorIndependent` to reinterpret batch dimensions. The pull request includes a migration guide with an example of the new usage.
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "tensorcontainer"
-version = "0.6.1"
+version = "0.6.2"
 description = "TensorDict-like functionality for PyTorch with PyTree compatibility and torch.compile support"
 authors = [{name="Tim Joseph", email="tim@mctigger.com"}]
 license = {text = "MIT"}
diff --git a/src/tensorcontainer/distributions/sampling.py b/src/tensorcontainer/distributions/sampling.py
@@ -1,50 +1,135 @@
 import torch
-from torch.distributions import Distribution
+from torch.distributions import Distribution, constraints
+from functools import cached_property
+from typing import Any, Optional
 
 
 class SamplingDistribution(Distribution):
-    def __init__(self, base_distribution: Distribution, n=100):
+    """
+    A wrapper for a PyTorch distribution that calculates statistics via sampling.
+
+    This distribution is useful when the analytical statistics of a base
+    distribution are not available or not desired. Instead, it computes
+    properties like mean, stddev, variance, and mode by drawing samples from the
+    base distribution.
+
+    To improve efficiency, it caches the generated samples and the computed
+    statistics, ensuring that repeated access to these properties does not
+    trigger redundant computations.
+
+    Args:
+        base_distribution (Distribution): The underlying distribution to sample from.
+        n (int, optional): The number of samples to draw for calculating
+            statistics. Defaults to 100.
+    """
+
+    __slots__ = ["base_dist", "n"]
+
+    def __init__(self, base_distribution: Distribution, *, n: int = 100):
+        if not isinstance(base_distribution, Distribution):
+            raise TypeError(
+                "base_distribution must be a torch.distributions.Distribution"
+            )
+        if not isinstance(n, int) or n <= 0:
+            raise ValueError("n must be a positive integer")
+
         self.base_dist = base_distribution
         self.n = n
 
-    def __getattr__(self, name):
+        super().__init__(
+            batch_shape=self.base_dist.batch_shape,
+            event_shape=self.base_dist.event_shape,
+            validate_args=False,  # We defer validation to the base distribution
+        )
+
+    def __repr__(self) -> str:
+        return f"SamplingDistribution(base_dist={self.base_dist}, n={self.n})"
+
+    def __getattr__(self, name: str) -> Any:
+        """Delegates attribute access to the base distribution."""
         return getattr(self.base_dist, name)
 
+    @cached_property
+    def _samples(self) -> torch.Tensor:
+        """
+        Cached samples from the base distribution.
+
+        Uses rsample if available for reparameterization-friendly gradients,
+        otherwise falls back to sample.
+        """
+        sample_shape = torch.Size((self.n,))
+        if self.base_dist.has_rsample:
+            return self.base_dist.rsample(sample_shape)
+        return self.base_dist.sample(sample_shape)
+
     @property
-    def has_rsample(self):
+    def has_rsample(self) -> bool:
         return self.base_dist.has_rsample
 
-    def rsample(self, sample_shape=torch.Size()):
+    def rsample(self, sample_shape: Any = torch.Size()) -> torch.Tensor:
+        """Delegates rsample to the base distribution."""
         return self.base_dist.rsample(sample_shape)
 
-    def sample(self, sample_shape=torch.Size()):
+    def sample(self, sample_shape: Any = torch.Size()) -> torch.Tensor:
+        """Delegates sample to the base distribution."""
         return self.base_dist.sample(sample_shape)
 
-    @property
-    def mean(self):
-        return self.base_dist.rsample((self.n,)).mean(0)
+    @cached_property
+    def mean(self) -> torch.Tensor:  # type: ignore
+        """Mean of the distribution, computed as the mean of cached samples."""
+        return self._samples.float().mean(0)
 
-    @property
-    def stddev(self):
-        return self.base_dist.rsample((self.n,)).std(0)
+    @cached_property
+    def stddev(self) -> torch.Tensor:  # type: ignore
+        """Standard deviation of the distribution, computed from cached samples."""
+        return self._samples.float().std(0)
 
-    @property
-    def variance(self):
-        return self.base_dist.rsample((self.n,)).var(0)
+    @cached_property
+    def variance(self) -> torch.Tensor:  # type: ignore
+        """Variance of the distribution, computed from cached samples."""
+        return self._samples.float().var(0)
 
-    @property
-    def mode(self):
-        samples = self.base_dist.sample((self.n,))
-        log_probs = self.base_dist.log_prob(samples).view(self.n, -1)
-        index = torch.argmax(log_probs, dim=0)
+    @cached_property
+    def mode(self) -> torch.Tensor:  # type: ignore
+        """
+        Mode of the distribution.
+
+        Tries to return the analytical mode if available. Otherwise, it computes
+        the mode via Monte Carlo approximation by finding the sample with the
+        highest log probability.
+        """
+        try:
+            return self.base_dist.mode
+        except (AttributeError, NotImplementedError):
+            pass  # Fall back to sampling
+
+        log_probs = self.base_dist.log_prob(self._samples)
+        max_indices = torch.argmax(log_probs, dim=0)
 
-        selected = torch.gather(samples.view(self.n, -1), 0, index.unsqueeze(0))
-        return selected
+        # Use advanced indexing to gather the modes efficiently
+        return self._samples.gather(
+            0, max_indices.reshape(1, *max_indices.shape, *(1,) * len(self.event_shape))
+        ).squeeze(0)
 
-    def entropy(self):
-        samples = self.base_dist.rsample((self.n,))
-        logprob = self.base_dist.log_prob(samples)
-        return -logprob.mean(0)
+    def entropy(self) -> torch.Tensor:
+        """
+        Entropy of the distribution, estimated via Monte Carlo.
 
-    def log_prob(self, value):
+        Calculates the negative mean of the log probabilities of cached samples.
+        """
+        log_prob = self.base_dist.log_prob(self._samples)
+        return -log_prob.mean(0)
+
+    def log_prob(self, value: torch.Tensor) -> torch.Tensor:
+        """Delegates log probability calculation to the base distribution."""
         return self.base_dist.log_prob(value)
+
+    @property
+    def support(self) -> Optional[constraints.Constraint]:
+        """Delegates support to the base distribution."""
+        return self.base_dist.support
+
+    @property
+    def arg_constraints(self) -> dict:
+        """Delegates argument constraints to the base distribution."""
+        return self.base_dist.arg_constraints
diff --git a/src/tensorcontainer/tensor_distribution/independent.py b/src/tensorcontainer/tensor_distribution/independent.py
@@ -19,7 +19,11 @@ def __init__(
         self.reinterpreted_batch_ndims = reinterpreted_batch_ndims
 
         super().__init__(
-            Size(base_distribution.shape[:-reinterpreted_batch_ndims]),
+            Size(
+                base_distribution.shape[:-reinterpreted_batch_ndims]
+                if reinterpreted_batch_ndims > 0
+                else base_distribution.shape
+            ),
             base_distribution.device,
         )
 
diff --git a/src/tensorcontainer/tensor_distribution/tanh_normal.py b/src/tensorcontainer/tensor_distribution/tanh_normal.py
@@ -1,19 +1,20 @@
 from __future__ import annotations
 
-from typing import Any, Dict, Optional, get_args
+from functools import cached_property
+from typing import Any, Dict, get_args
 
 import torch
 from torch import Tensor
 from torch.distributions import (
     Distribution,
-    Independent,
     Normal,
     TransformedDistribution,
     constraints,
 )
 from torch.distributions.utils import broadcast_all
 from torch.types import Number
 
+from ..distributions.sampling import SamplingDistribution
 from .base import TensorDistribution
 
 
@@ -49,25 +50,31 @@ def log_abs_det_jacobian(self, x, y):
 
 
 class TensorTanhNormal(TensorDistribution):
+    """Tensor-aware TanhNormal distribution.
+
+    Creates a transformed Normal distribution where the output is passed through
+    a hyperbolic tangent (tanh) function, constraining values to the interval (-1, 1).
+
+    Args:
+        loc: Location parameter of the underlying normal distribution.
+        scale: Scale parameter of the underlying normal distribution. Must be positive.
+
+    Note:
+        This distribution is commonly used in reinforcement learning for bounded
+        continuous action spaces. Use TensorIndependent to reinterpret batch dimensions
+        as event dimensions if needed.
+    """
+
     _loc: Tensor
     _scale: Tensor
-    _reinterpreted_batch_ndims: int
 
     def __init__(
         self,
         loc: Tensor,
         scale: Tensor,
-        reinterpreted_batch_ndims: Optional[int] = None,
     ):
         self._loc, self._scale = broadcast_all(loc, scale)
 
-        if reinterpreted_batch_ndims is None:
-            self._reinterpreted_batch_ndims = 0
-            if self._loc.ndim > 0:
-                self._reinterpreted_batch_ndims = 1
-        else:
-            self._reinterpreted_batch_ndims = reinterpreted_batch_ndims
-
         if isinstance(loc, get_args(Number)) and isinstance(scale, get_args(Number)):
             shape = tuple()
         else:
@@ -86,25 +93,59 @@ def _unflatten_distribution(
         return cls(
             loc=attributes.get("_loc"),  # type: ignore
             scale=attributes.get("_scale"),  # type: ignore
-            reinterpreted_batch_ndims=attributes.get("_reinterpreted_batch_ndims"),  # type: ignore
         )
 
     def dist(self) -> Distribution:
-        return Independent(
-            TransformedDistribution(
-                Normal(self._loc.float(), self._scale.float(), validate_args=False),
-                [
-                    ClampedTanhTransform(),
-                ],
-                validate_args=False,
-            ),
-            self._reinterpreted_batch_ndims,
+        return TransformedDistribution(
+            Normal(self._loc.float(), self._scale.float(), validate_args=False),
+            [
+                ClampedTanhTransform(),
+            ],
+            validate_args=False,
         )
 
+    def log_prob(self, value: Tensor) -> Tensor:
+        """Compute log probability of value under the distribution."""
+        return self.dist().log_prob(value)
+
     @property
     def loc(self) -> Tensor:
+        """Returns the location parameter of the underlying normal distribution."""
         return self._loc
 
     @property
     def scale(self) -> Tensor:
+        """Returns the scale parameter of the underlying normal distribution."""
         return self._scale
+
+    @cached_property
+    def _sampling_dist(self) -> SamplingDistribution:
+        """Cached sampling distribution for consistent property calculations."""
+        return SamplingDistribution(self.dist(), n=10000)
+
+    @property
+    def mean(self) -> Tensor:
+        """Returns the mean of the distribution.
+
+        Note: For transformed distributions, this is computed via sampling
+        since the analytical mean may not be available.
+        """
+        return self._sampling_dist.mean
+
+    @property
+    def variance(self) -> Tensor:
+        """Returns the variance of the distribution.
+
+        Note: For transformed distributions, this is computed via sampling
+        since the analytical variance may not be available.
+        """
+        return self._sampling_dist.variance
+
+    @property
+    def stddev(self) -> Tensor:
+        """Returns the standard deviation of the distribution.
+
+        Note: For transformed distributions, this is computed via sampling
+        since the analytical standard deviation may not be available.
+        """
+        return self._sampling_dist.stddev
diff --git a/tests/tensor_distribution/test_independent.py b/tests/tensor_distribution/test_independent.py
@@ -42,6 +42,9 @@ def test_broadcasting_shapes(
         assert td_independent.batch_shape == expected_batch_shape
         assert td_independent.dist().batch_shape == expected_batch_shape
 
+        # Test that shape property matches expected_batch_shape (fixes bug with reinterpreted_batch_ndims=0)
+        assert td_independent.shape == expected_batch_shape
+
 
 class TestTensorIndependentTensorContainerIntegration:
     @pytest.mark.parametrize(
diff --git a/tests/tensor_distribution/test_sampling.py b/tests/tensor_distribution/test_sampling.py
diff --git a/tests/tensor_distribution/test_tanh_normal.py b/tests/tensor_distribution/test_tanh_normal.py