From 050928c8229daf3f7594ad3c348df4aba3566c4e Mon Sep 17 00:00:00 2001
From: Olivier Laurent <olivier.ar.laurent@gmail.com>
Date: Tue, 29 Jul 2025 11:02:37 +0200
Subject: [PATCH 01/25] :fire: Remove baseline code

---
 docs/source/api.rst                           |  62 -----
 tests/baselines/__init__.py                   |   0
 tests/baselines/test_batched.py               |  56 ----
 tests/baselines/test_deep_ensembles.py        |  18 --
 tests/baselines/test_masked.py                |  87 -------
 tests/baselines/test_mc_dropout.py            |  75 ------
 tests/baselines/test_mimo.py                  |  62 -----
 tests/baselines/test_packed.py                | 129 ----------
 tests/baselines/test_standard.py              | 165 ------------
 torch_uncertainty/baselines/__init__.py       |   0
 .../baselines/classification/__init__.py      |   4 -
 .../classification/deep_ensembles.py          |  58 -----
 .../baselines/classification/resnet.py        | 240 ------------------
 .../baselines/classification/vgg.py           | 175 -------------
 .../baselines/classification/wideresnet.py    | 192 --------------
 torch_uncertainty/baselines/depth/__init__.py |   2 -
 torch_uncertainty/baselines/depth/bts.py      |  41 ---
 .../baselines/regression/__init__.py          |   2 -
 torch_uncertainty/baselines/regression/mlp.py |  65 -----
 .../baselines/segmentation/__init__.py        |   3 -
 .../baselines/segmentation/deeplab.py         |  47 ----
 .../baselines/segmentation/segformer.py       |  62 -----
 22 files changed, 1545 deletions(-)
 delete mode 100644 tests/baselines/__init__.py
 delete mode 100644 tests/baselines/test_batched.py
 delete mode 100644 tests/baselines/test_deep_ensembles.py
 delete mode 100644 tests/baselines/test_masked.py
 delete mode 100644 tests/baselines/test_mc_dropout.py
 delete mode 100644 tests/baselines/test_mimo.py
 delete mode 100644 tests/baselines/test_packed.py
 delete mode 100644 tests/baselines/test_standard.py
 delete mode 100644 torch_uncertainty/baselines/__init__.py
 delete mode 100644 torch_uncertainty/baselines/classification/__init__.py
 delete mode 100644 torch_uncertainty/baselines/classification/deep_ensembles.py
 delete mode 100644 torch_uncertainty/baselines/classification/resnet.py
 delete mode 100644 torch_uncertainty/baselines/classification/vgg.py
 delete mode 100644 torch_uncertainty/baselines/classification/wideresnet.py
 delete mode 100644 torch_uncertainty/baselines/depth/__init__.py
 delete mode 100644 torch_uncertainty/baselines/depth/bts.py
 delete mode 100644 torch_uncertainty/baselines/regression/__init__.py
 delete mode 100644 torch_uncertainty/baselines/regression/mlp.py
 delete mode 100644 torch_uncertainty/baselines/segmentation/__init__.py
 delete mode 100644 torch_uncertainty/baselines/segmentation/deeplab.py
 delete mode 100644 torch_uncertainty/baselines/segmentation/segformer.py

diff --git a/docs/source/api.rst b/docs/source/api.rst
index ea39d418..6c463509 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -52,68 +52,6 @@ Pixelwise Regression
 
     PixelRegressionRoutine
 
-Baselines
----------
-
-.. warning:: 
-
-    The baselines will soon be removed from the library to avoid confusion with the routines.
-
-TorchUncertainty provide lightning-based models that can be easily trained and evaluated.
-These models inherit from the routines and are specifically designed to benchmark
-different methods in similar settings, here with constant architectures.
-
-.. currentmodule:: torch_uncertainty.baselines.classification
-
-Classification
-^^^^^^^^^^^^^^
-
-.. autosummary::
-    :toctree: generated/
-    :nosignatures:
-    :template: class.rst
-
-    ResNetBaseline
-    VGGBaseline
-    WideResNetBaseline
-
-.. currentmodule:: torch_uncertainty.baselines.regression
-
-Regression
-^^^^^^^^^^
-
-.. autosummary::
-    :toctree: generated/
-    :nosignatures:
-    :template: class.rst
-
-    MLPBaseline
-
-.. currentmodule:: torch_uncertainty.baselines.segmentation
-
-Segmentation
-^^^^^^^^^^^^
-
-.. autosummary::
-    :toctree: generated/
-    :nosignatures:
-    :template: class.rst
-
-    DeepLabBaseline
-    SegFormerBaseline
-
-.. currentmodule:: torch_uncertainty.baselines.depth
-
-Monocular Depth Estimation 
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. autosummary::
-    :toctree: generated/
-    :nosignatures:
-    :template: class.rst
-
-    BTSBaseline
-
 Layers
 ------
 
diff --git a/tests/baselines/__init__.py b/tests/baselines/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/baselines/test_batched.py b/tests/baselines/test_batched.py
deleted file mode 100644
index c1c55dcb..00000000
--- a/tests/baselines/test_batched.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import torch
-from torch import nn
-
-from torch_uncertainty.baselines.classification import (
-    ResNetBaseline,
-    WideResNetBaseline,
-)
-
-
-class TestBatchedBaseline:
-    """Testing the BatchedResNet baseline class."""
-
-    def test_batched_18(self) -> None:
-        net = ResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="batched",
-            arch=18,
-            style="cifar",
-            num_estimators=4,
-            groups=1,
-        )
-
-        _ = net(torch.rand(1, 3, 32, 32))
-
-    def test_batched_50(self) -> None:
-        net = ResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="batched",
-            arch=50,
-            style="imagenet",
-            num_estimators=4,
-            groups=1,
-        )
-
-        _ = net(torch.rand(1, 3, 40, 40))
-
-
-class TestBatchedWideBaseline:
-    """Testing the BatchedWideResNet baseline class."""
-
-    def test_batched(self) -> None:
-        net = WideResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="batched",
-            style="cifar",
-            num_estimators=4,
-            groups=1,
-        )
-
-        _ = net(torch.rand(1, 3, 32, 32))
diff --git a/tests/baselines/test_deep_ensembles.py b/tests/baselines/test_deep_ensembles.py
deleted file mode 100644
index 2cf474cc..00000000
--- a/tests/baselines/test_deep_ensembles.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import pytest
-
-from torch_uncertainty.baselines.classification.deep_ensembles import (
-    DeepEnsemblesBaseline,
-)
-
-
-class TestDeepEnsembles:
-    """Testing the Deep Ensembles baseline class."""
-
-    def test_failure(self) -> None:
-        with pytest.raises(ValueError, match="Models must not be an empty list."):
-            DeepEnsemblesBaseline(
-                log_path=".",
-                checkpoint_ids=[],
-                backbone="resnet",
-                num_classes=10,
-            )
diff --git a/tests/baselines/test_masked.py b/tests/baselines/test_masked.py
deleted file mode 100644
index ad2b6761..00000000
--- a/tests/baselines/test_masked.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import pytest
-import torch
-from torch import nn
-
-from torch_uncertainty.baselines.classification import (
-    ResNetBaseline,
-    WideResNetBaseline,
-)
-
-
-class TestMaskedBaseline:
-    """Testing the MaskedResNet baseline class."""
-
-    def test_masked_18(self) -> None:
-        net = ResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="masked",
-            arch=18,
-            style="cifar",
-            num_estimators=4,
-            scale=2,
-            groups=1,
-        )
-
-        _ = net(torch.rand(1, 3, 32, 32))
-
-    def test_masked_50(self) -> None:
-        net = ResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="masked",
-            arch=50,
-            style="imagenet",
-            num_estimators=4,
-            scale=2,
-            groups=1,
-        )
-
-        _ = net(torch.rand(1, 3, 40, 40))
-
-    def test_masked_errors(self) -> None:
-        with pytest.raises(ValueError):
-            _ = ResNetBaseline(
-                num_classes=10,
-                in_channels=3,
-                loss=nn.CrossEntropyLoss(),
-                version="masked",
-                arch=18,
-                style="cifar",
-                num_estimators=4,
-                scale=0.5,
-                groups=1,
-            )
-
-        with pytest.raises(ValueError):
-            _ = ResNetBaseline(
-                num_classes=10,
-                in_channels=3,
-                loss=nn.CrossEntropyLoss(),
-                version="masked",
-                arch=18,
-                style="cifar",
-                num_estimators=4,
-                scale=2,
-                groups=0,
-            )
-
-
-class TestMaskedWideBaseline:
-    """Testing the MaskedWideResNet baseline class."""
-
-    def test_masked(self) -> None:
-        net = WideResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="masked",
-            style="cifar",
-            num_estimators=4,
-            scale=2,
-            groups=1,
-        )
-
-        _ = net(torch.rand(1, 3, 32, 32))
diff --git a/tests/baselines/test_mc_dropout.py b/tests/baselines/test_mc_dropout.py
deleted file mode 100644
index 5847b995..00000000
--- a/tests/baselines/test_mc_dropout.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import torch
-from torch import nn
-
-from torch_uncertainty.baselines.classification import (
-    ResNetBaseline,
-    VGGBaseline,
-    WideResNetBaseline,
-)
-
-
-class TestStandardBaseline:
-    """Testing the ResNetBaseline baseline class."""
-
-    def test_standard(self) -> None:
-        net = ResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="mc-dropout",
-            dropout_rate=0.1,
-            num_estimators=4,
-            arch=18,
-            style="cifar",
-            groups=1,
-        )
-        net(torch.rand(1, 3, 32, 32))
-
-
-class TestStandardWideBaseline:
-    """Testing the WideResNetBaseline baseline class."""
-
-    def test_standard(self) -> None:
-        net = WideResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="mc-dropout",
-            dropout_rate=0.1,
-            num_estimators=4,
-            style="cifar",
-            groups=1,
-        )
-        net(torch.rand(1, 3, 32, 32))
-
-
-class TestStandardVGGBaseline:
-    """Testing the VGGBaseline baseline class."""
-
-    def test_standard(self) -> None:
-        net = VGGBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="mc-dropout",
-            dropout_rate=0.1,
-            num_estimators=4,
-            arch=11,
-            groups=1,
-            last_layer_dropout=True,
-        )
-        net(torch.rand(1, 3, 32, 32))
-
-        net = VGGBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="mc-dropout",
-            num_estimators=4,
-            arch=11,
-            groups=1,
-            dropout_rate=0.3,
-            last_layer_dropout=True,
-        )
-        net.eval()
-        net(torch.rand(1, 3, 32, 32))
diff --git a/tests/baselines/test_mimo.py b/tests/baselines/test_mimo.py
deleted file mode 100644
index aafaea9c..00000000
--- a/tests/baselines/test_mimo.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import torch
-from torch import nn
-
-from torch_uncertainty.baselines.classification import (
-    ResNetBaseline,
-    WideResNetBaseline,
-)
-
-
-class TestMIMOBaseline:
-    """Testing the MIMOResNet baseline class."""
-
-    def test_mimo_50(self) -> None:
-        net = ResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="mimo",
-            arch=50,
-            style="cifar",
-            num_estimators=4,
-            rho=0.5,
-            batch_repeat=4,
-            groups=1,
-        ).eval()
-
-        _ = net(torch.rand(1, 3, 32, 32))
-
-    def test_mimo_18(self) -> None:
-        net = ResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="mimo",
-            arch=18,
-            style="imagenet",
-            num_estimators=4,
-            rho=0.5,
-            batch_repeat=4,
-            groups=2,
-        ).eval()
-
-        _ = net(torch.rand(1, 3, 40, 40))
-
-
-class TestMIMOWideBaseline:
-    """Testing the PackedWideResNet baseline class."""
-
-    def test_mimo(self) -> None:
-        net = WideResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="mimo",
-            style="cifar",
-            num_estimators=4,
-            rho=0.5,
-            batch_repeat=4,
-            groups=1,
-        ).eval()
-
-        _ = net(torch.rand(1, 3, 32, 32))
diff --git a/tests/baselines/test_packed.py b/tests/baselines/test_packed.py
deleted file mode 100644
index 32bbb8f8..00000000
--- a/tests/baselines/test_packed.py
+++ /dev/null
@@ -1,129 +0,0 @@
-import pytest
-import torch
-from torch import nn
-
-from torch_uncertainty.baselines.classification import (
-    ResNetBaseline,
-    VGGBaseline,
-    WideResNetBaseline,
-)
-from torch_uncertainty.baselines.regression import MLPBaseline
-
-
-class TestPackedBaseline:
-    """Testing the PackedResNet baseline class."""
-
-    def test_packed_50(self) -> None:
-        net = ResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="packed",
-            arch=50,
-            style="cifar",
-            num_estimators=4,
-            alpha=2,
-            gamma=1,
-            groups=1,
-        )
-
-        _ = net(torch.rand(1, 3, 32, 32))
-
-    def test_packed_18(self) -> None:
-        net = ResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="packed",
-            arch=18,
-            style="imagenet",
-            num_estimators=4,
-            alpha=2,
-            gamma=2,
-            groups=2,
-        )
-
-        _ = net(torch.rand(1, 3, 40, 40))
-
-    def test_packed_exception(self) -> None:
-        with pytest.raises(ValueError):
-            _ = ResNetBaseline(
-                num_classes=10,
-                in_channels=3,
-                loss=nn.CrossEntropyLoss(),
-                version="packed",
-                arch=50,
-                style="cifar",
-                num_estimators=4,
-                alpha=0,
-                gamma=1,
-                groups=1,
-            )
-
-        with pytest.raises(ValueError):
-            _ = ResNetBaseline(
-                num_classes=10,
-                in_channels=3,
-                loss=nn.CrossEntropyLoss(),
-                version="packed",
-                arch=50,
-                style="cifar",
-                num_estimators=4,
-                alpha=2,
-                gamma=0,
-                groups=1,
-            )
-
-
-class TestPackedWideBaseline:
-    """Testing the PackedWideResNet baseline class."""
-
-    def test_packed(self) -> None:
-        net = WideResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="packed",
-            style="cifar",
-            num_estimators=4,
-            alpha=2,
-            gamma=1,
-            groups=1,
-        )
-
-        _ = net(torch.rand(1, 3, 32, 32))
-
-
-class TestPackedVGGBaseline:
-    """Testing the PackedWideResNet baseline class."""
-
-    def test_packed(self) -> None:
-        net = VGGBaseline(
-            num_classes=10,
-            in_channels=3,
-            arch=13,
-            loss=nn.CrossEntropyLoss(),
-            version="packed",
-            num_estimators=4,
-            alpha=2,
-            gamma=1,
-            groups=1,
-        )
-        _ = net(torch.rand(2, 3, 32, 32))
-
-
-class TestPackedMLPBaseline:
-    """Testing the Packed MLP baseline class."""
-
-    def test_packed(self) -> None:
-        net = MLPBaseline(
-            in_features=3,
-            output_dim=10,
-            loss=nn.MSELoss(),
-            version="packed",
-            hidden_dims=[1],
-            num_estimators=2,
-            alpha=2,
-            gamma=1,
-        )
-        _ = net(torch.rand(1, 3))
diff --git a/tests/baselines/test_standard.py b/tests/baselines/test_standard.py
deleted file mode 100644
index cd3aacb0..00000000
--- a/tests/baselines/test_standard.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import pytest
-import torch
-from torch import nn
-
-from torch_uncertainty.baselines.classification import (
-    ResNetBaseline,
-    VGGBaseline,
-    WideResNetBaseline,
-)
-from torch_uncertainty.baselines.regression import MLPBaseline
-from torch_uncertainty.baselines.segmentation import (
-    DeepLabBaseline,
-    SegFormerBaseline,
-)
-
-
-class TestStandardBaseline:
-    """Testing the ResNetBaseline baseline class."""
-
-    def test_standard(self) -> None:
-        net = ResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="std",
-            arch=18,
-            style="cifar",
-            groups=1,
-        )
-        _ = net(torch.rand(1, 3, 32, 32))
-
-    def test_errors(self) -> None:
-        with pytest.raises(ValueError):
-            ResNetBaseline(
-                num_classes=10,
-                in_channels=3,
-                loss=nn.CrossEntropyLoss(),
-                version="test",
-                arch=18,
-                style="cifar",
-                groups=1,
-            )
-
-
-class TestStandardWideBaseline:
-    """Testing the WideResNetBaseline baseline class."""
-
-    def test_standard(self) -> None:
-        net = WideResNetBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="std",
-            style="cifar",
-            groups=1,
-        )
-        _ = net(torch.rand(1, 3, 32, 32))
-
-    def test_errors(self) -> None:
-        with pytest.raises(ValueError):
-            WideResNetBaseline(
-                num_classes=10,
-                in_channels=3,
-                loss=nn.CrossEntropyLoss(),
-                version="test",
-                style="cifar",
-                groups=1,
-            )
-
-
-class TestStandardVGGBaseline:
-    """Testing the VGGBaseline baseline class."""
-
-    def test_standard(self) -> None:
-        net = VGGBaseline(
-            num_classes=10,
-            in_channels=3,
-            loss=nn.CrossEntropyLoss(),
-            version="std",
-            arch=11,
-            groups=1,
-        )
-        _ = net(torch.rand(1, 3, 32, 32))
-
-    def test_errors(self) -> None:
-        with pytest.raises(ValueError):
-            VGGBaseline(
-                num_classes=10,
-                in_channels=3,
-                loss=nn.CrossEntropyLoss(),
-                version="test",
-                arch=11,
-                groups=1,
-            )
-
-
-class TestStandardMLPBaseline:
-    """Testing the MLP baseline class."""
-
-    def test_standard(self) -> None:
-        net = MLPBaseline(
-            in_features=3,
-            output_dim=10,
-            loss=nn.MSELoss(),
-            version="std",
-            hidden_dims=[1],
-        )
-        _ = net(torch.rand(1, 3))
-        for dist_family in ["normal", "laplace", "nig"]:
-            MLPBaseline(
-                in_features=3,
-                output_dim=10,
-                loss=nn.MSELoss(),
-                version="std",
-                hidden_dims=[1],
-                dist_family=dist_family,
-            )
-
-    def test_errors(self) -> None:
-        with pytest.raises(ValueError):
-            MLPBaseline(
-                in_features=3,
-                output_dim=10,
-                loss=nn.MSELoss(),
-                version="test",
-                hidden_dims=[1],
-            )
-
-
-class TestStandardSegFormerBaseline:
-    """Testing the SegFormer baseline class."""
-
-    def test_standard(self) -> None:
-        net = SegFormerBaseline(
-            num_classes=10,
-            loss=nn.CrossEntropyLoss(),
-            version="std",
-            arch=0,
-        )
-        _ = net(torch.rand(1, 3, 32, 32))
-
-    def test_errors(self) -> None:
-        with pytest.raises(ValueError):
-            SegFormerBaseline(
-                num_classes=10,
-                loss=nn.CrossEntropyLoss(),
-                version="test",
-                arch=0,
-            )
-
-
-class TestStandardDeepLabBaseline:
-    """Testing the DeepLab baseline class."""
-
-    def test_standard(self) -> None:
-        net = DeepLabBaseline(
-            num_classes=10,
-            loss=nn.CrossEntropyLoss(),
-            version="std",
-            style="v3",
-            output_stride=16,
-            arch=50,
-            separable=True,
-        ).eval()
-        _ = net(torch.rand(1, 3, 32, 32))
diff --git a/torch_uncertainty/baselines/__init__.py b/torch_uncertainty/baselines/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/torch_uncertainty/baselines/classification/__init__.py b/torch_uncertainty/baselines/classification/__init__.py
deleted file mode 100644
index e080ee4e..00000000
--- a/torch_uncertainty/baselines/classification/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# ruff: noqa: F401
-from .resnet import ResNetBaseline
-from .vgg import VGGBaseline
-from .wideresnet import WideResNetBaseline
diff --git a/torch_uncertainty/baselines/classification/deep_ensembles.py b/torch_uncertainty/baselines/classification/deep_ensembles.py
deleted file mode 100644
index 4471dff9..00000000
--- a/torch_uncertainty/baselines/classification/deep_ensembles.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from pathlib import Path
-from typing import Literal
-
-from torch_uncertainty.models import deep_ensembles
-from torch_uncertainty.ood_criteria import TUOODCriterion
-from torch_uncertainty.routines.classification import ClassificationRoutine
-from torch_uncertainty.utils import get_version
-
-from . import ResNetBaseline, VGGBaseline, WideResNetBaseline
-
-
-class DeepEnsemblesBaseline(ClassificationRoutine):
-    backbones = {
-        "resnet": ResNetBaseline,
-        "vgg": VGGBaseline,
-        "wideresnet": WideResNetBaseline,
-    }
-
-    def __init__(
-        self,
-        num_classes: int,
-        log_path: str | Path,
-        checkpoint_ids: list[int],
-        backbone: Literal["resnet", "vgg", "wideresnet"],
-        eval_ood: bool = False,
-        eval_shift: bool = False,
-        eval_grouping_loss: bool = False,
-        ood_criterion: TUOODCriterion | str = "msp",
-        log_plots: bool = False,
-    ) -> None:
-        log_path = Path(log_path)
-
-        backbone_cls = self.backbones[backbone]
-
-        models = []
-        for version in checkpoint_ids:  # coverage: ignore
-            ckpt_file, hparams_file = get_version(root=log_path, version=version)
-            trained_model = backbone_cls.load_from_checkpoint(
-                checkpoint_path=ckpt_file,
-                hparams_file=hparams_file,
-                loss=None,
-                optim_recipe=None,
-            ).eval()
-            models.append(trained_model.model)
-        de = deep_ensembles(models=models)
-
-        super().__init__(  # coverage: ignore
-            num_classes=num_classes,
-            model=de,
-            loss=None,
-            is_ensemble=de.num_estimators > 1,
-            eval_ood=eval_ood,
-            eval_shift=eval_shift,
-            eval_grouping_loss=eval_grouping_loss,
-            ood_criterion=ood_criterion,
-            log_plots=log_plots,
-        )
-        self.save_hyperparameters()  # coverage: ignore
diff --git a/torch_uncertainty/baselines/classification/resnet.py b/torch_uncertainty/baselines/classification/resnet.py
deleted file mode 100644
index cb3c254b..00000000
--- a/torch_uncertainty/baselines/classification/resnet.py
+++ /dev/null
@@ -1,240 +0,0 @@
-from typing import Literal
-
-from torch import nn
-from torch.optim import Optimizer
-
-from torch_uncertainty.models import mc_dropout
-from torch_uncertainty.models.classification import (
-    batched_resnet,
-    lpbnn_resnet,
-    masked_resnet,
-    mimo_resnet,
-    packed_resnet,
-    resnet,
-)
-from torch_uncertainty.ood_criteria import TUOODCriterion
-from torch_uncertainty.routines.classification import ClassificationRoutine
-from torch_uncertainty.transforms import MIMOBatchFormat, RepeatTarget
-
-ENSEMBLE_METHODS = [
-    "packed",
-    "batched",
-    "lpbnn",
-    "masked",
-    "mc-dropout",
-    "mimo",
-]
-
-
-class ResNetBaseline(ClassificationRoutine):
-    versions = {
-        "std": resnet,
-        "packed": packed_resnet,
-        "batched": batched_resnet,
-        "lpbnn": lpbnn_resnet,
-        "masked": masked_resnet,
-        "mimo": mimo_resnet,
-        "mc-dropout": resnet,
-    }
-    archs = [18, 20, 34, 44, 50, 56, 101, 110, 152, 1202]
-
-    def __init__(
-        self,
-        num_classes: int,
-        in_channels: int,
-        loss: nn.Module,
-        version: Literal[
-            "std",
-            "mc-dropout",
-            "packed",
-            "batched",
-            "lpbnn",
-            "masked",
-            "mimo",
-        ],
-        arch: int,
-        style: str = "imagenet",
-        normalization_layer: type[nn.Module] = nn.BatchNorm2d,
-        num_estimators: int = 1,
-        dropout_rate: float = 0.0,
-        optim_recipe: dict | Optimizer | None = None,
-        mixup_params: dict | None = None,
-        last_layer_dropout: bool = False,
-        width_multiplier: float = 1.0,
-        groups: int = 1,
-        conv_bias: bool = False,
-        scale: float | None = None,
-        alpha: int | None = None,
-        gamma: int = 1,
-        rho: float = 1.0,
-        batch_repeat: int = 1,
-        ood_criterion: TUOODCriterion | str = "msp",
-        log_plots: bool = False,
-        save_in_csv: bool = False,
-        eval_ood: bool = False,
-        eval_shift: bool = False,
-        eval_grouping_loss: bool = False,
-        num_bins_cal_err: int = 15,
-        pretrained: bool = False,
-    ) -> None:
-        r"""ResNet backbone baseline for classification providing support for
-        various versions and architectures.
-
-        Args:
-            num_classes (int): Number of classes to predict.
-            in_channels (int): Number of input channels.
-            loss (nn.Module): Training loss.
-            optim_recipe (Any): optimization recipe, corresponds to
-                what expect the `LightningModule.configure_optimizers()
-                <https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#configure-optimizers>`_
-                method.
-            version (str): Determines which ResNet version to use:
-
-                - ``"std"``: original ResNet
-                - ``"packed"``: Packed-Ensembles ResNet
-                - ``"batched"``: BatchEnsemble ResNet
-                - ``"masked"``: Masksemble ResNet
-                - ``"mimo"``: MIMO ResNet
-                - ``"mc-dropout"``: Monte-Carlo Dropout ResNet
-
-            arch (int): Determines which ResNet architecture to use, one of:
-
-                - ``18``: ResNet-18
-                - ``32``: ResNet-32
-                - ``50``: ResNet-50
-                - ``101``: ResNet-101
-                - ``152``: ResNet-152
-
-            style (str, optional): Which ResNet style to use. Defaults to ``imagenet``.
-            normalization_layer (type[nn.Module], optional): Normalization layer
-                to use. Defaults to ``nn.BatchNorm2d``.
-            num_estimators (int, optional): Number of estimators in the ensemble.
-                Only used if :attr:`version` is either ``"packed"``, ``"batched"``,
-                ``"masked"`` or ``"mc-dropout"`` Defaults to ``None``.
-            dropout_rate (float, optional): Dropout rate. Defaults to ``0.0``.
-            mixup_params (dict, optional): Mixup parameters. Can include mixtype,
-                mixmode, dist_sim, kernel_tau_max, kernel_tau_std,
-                mixup_alpha, and cutmix_alpha. If None, no augmentations.
-                Defaults to ``None``.
-            width_multiplier (float, optional): Expansion factor affecting the width
-                of the estimators. Defaults to ``1.0``
-            groups (int, optional): Number of groups in convolutions. Defaults
-                to ``1``.
-            scale (float, optional): Expansion factor affecting the width of
-                the estimators. Only used if :attr:`version` is ``"masked"``.
-                Defaults to ``None``.
-            last_layer_dropout (bool): whether to apply dropout to the last layer only.
-            groups (int, optional): Number of groups in convolutions. Defaults to
-                ``1``.
-            conv_bias (bool, optional): Whether to include bias in the convolutional
-                layers. Defaults to ``False``.
-            scale (float, optional): Expansion factor affecting the width of the
-                estimators. Only used if :attr:`version` is ``"masked"``. Defaults
-                to ``None``.
-            alpha (float, optional): Expansion factor affecting the width of the
-                estimators. Only used if :attr:`version` is ``"packed"``. Defaults
-                to ``None``.
-            gamma (int, optional): Number of groups within each estimator. Only
-                used if :attr:`version` is ``"packed"`` and scales with
-                :attr:`groups`. Defaults to ``1``.
-            rho (float, optional): Probability that all estimators share the same
-                input. Only used if :attr:`version` is ``"mimo"``. Defaults to
-                ``1``.
-            batch_repeat (int, optional): Number of times to repeat the batch. Only
-                used if :attr:`version` is ``"mimo"``. Defaults to ``1``.
-            ood_criterion (TUOODCriterion, optional): Criterion for the binary OOD detection task.
-                Defaults to None which amounts to the maximum softmax probability score (MSP).
-            log_plots (bool, optional): Indicates whether to log the plots or not.
-                Defaults to ``False``.
-            save_in_csv (bool, optional): Indicates whether to save the results in
-                a csv file or not. Defaults to ``False``.
-            eval_ood (bool, optional): Indicates whether to evaluate the
-                OOD detection or not. Defaults to ``False``.
-            eval_shift (bool): Whether to evaluate on shifted data. Defaults to ``False``.
-            eval_grouping_loss (bool, optional): Indicates whether to evaluate the
-                grouping loss or not. Defaults to ``False``.
-            num_bins_cal_err (int, optional): Number of calibration bins.
-                Defaults to ``15``.
-            pretrained (bool, optional): Indicates whether to use the pretrained
-                weights or not. Only used if :attr:`version` is ``"packed"``.
-                Defaults to ``False``.
-
-        Raises:
-            ValueError: If :attr:`version` is not either ``"std"``,
-                ``"packed"``, ``"batched"``, ``"masked"`` or ``"mc-dropout"``.
-
-        Returns:
-            LightningModule: ResNet baseline ready for training and evaluation.
-        """
-        params = {
-            "arch": arch,
-            "conv_bias": conv_bias,
-            "dropout_rate": dropout_rate,
-            "groups": groups,
-            "width_multiplier": width_multiplier,
-            "in_channels": in_channels,
-            "num_classes": num_classes,
-            "style": style,
-            "normalization_layer": normalization_layer,
-        }
-
-        format_batch_fn = nn.Identity()
-
-        if version not in self.versions:
-            raise ValueError(f"Unknown version: {version}")
-
-        if version in ENSEMBLE_METHODS:
-            params |= {
-                "num_estimators": num_estimators,
-            }
-
-            if version != "mc-dropout":
-                format_batch_fn = RepeatTarget(num_repeats=num_estimators)
-
-        if version == "packed":
-            params |= {
-                "alpha": alpha,
-                "gamma": gamma,
-                "pretrained": pretrained,
-            }
-
-        elif version == "masked":
-            params |= {
-                "scale": scale,
-            }
-
-        elif version == "mimo":
-            format_batch_fn = MIMOBatchFormat(
-                num_estimators=num_estimators,
-                rho=rho,
-                batch_repeat=batch_repeat,
-            )
-
-        if version == "mc-dropout":  # std ResNets don't have `num_estimators`
-            del params["num_estimators"]
-
-        model = self.versions[version](**params)
-        if version == "mc-dropout":
-            model = mc_dropout(
-                model=model,
-                num_estimators=num_estimators,
-                last_layer=last_layer_dropout,
-            )
-
-        super().__init__(
-            num_classes=num_classes,
-            model=model,
-            loss=loss,
-            is_ensemble=version in ENSEMBLE_METHODS,
-            optim_recipe=optim_recipe,
-            format_batch_fn=format_batch_fn,
-            mixup_params=mixup_params,
-            eval_ood=eval_ood,
-            eval_shift=eval_shift,
-            eval_grouping_loss=eval_grouping_loss,
-            ood_criterion=ood_criterion,
-            log_plots=log_plots,
-            save_in_csv=save_in_csv,
-            num_bins_cal_err=num_bins_cal_err,
-        )
-        self.save_hyperparameters(ignore=["loss"])
diff --git a/torch_uncertainty/baselines/classification/vgg.py b/torch_uncertainty/baselines/classification/vgg.py
deleted file mode 100644
index 40c40db5..00000000
--- a/torch_uncertainty/baselines/classification/vgg.py
+++ /dev/null
@@ -1,175 +0,0 @@
-from typing import Literal
-
-from torch import nn
-from torch.optim import Optimizer
-
-from torch_uncertainty.models import mc_dropout
-from torch_uncertainty.models.classification import (
-    packed_vgg,
-    vgg,
-)
-from torch_uncertainty.ood_criteria import TUOODCriterion
-from torch_uncertainty.routines.classification import ClassificationRoutine
-from torch_uncertainty.transforms import RepeatTarget
-
-ENSEMBLE_METHODS = ["mc-dropout", "packed"]
-
-
-class VGGBaseline(ClassificationRoutine):
-    versions = {
-        "std": vgg,
-        "mc-dropout": vgg,
-        "packed": packed_vgg,
-    }
-    archs = [11, 13, 16, 19]
-
-    def __init__(
-        self,
-        num_classes: int,
-        in_channels: int,
-        loss: nn.Module,
-        version: Literal["std", "mc-dropout", "packed"],
-        arch: int,
-        style: str = "imagenet",
-        num_estimators: int = 1,
-        dropout_rate: float = 0.0,
-        last_layer_dropout: bool = False,
-        optim_recipe: dict | Optimizer | None = None,
-        mixup_params: dict | None = None,
-        groups: int = 1,
-        alpha: int | None = None,
-        gamma: int = 1,
-        ood_criterion: type[TUOODCriterion] | str = "msp",
-        log_plots: bool = False,
-        save_in_csv: bool = False,
-        eval_ood: bool = False,
-        eval_shift: bool = False,
-        eval_grouping_loss: bool = False,
-    ) -> None:
-        r"""VGG backbone baseline for classification providing support for
-        various versions and architectures.
-
-        Args:
-            num_classes (int): Number of classes to predict.
-            in_channels (int): Number of input channels.
-            loss (nn.Module): Training loss.
-            optim_recipe (Any): optimization recipe, corresponds to
-                what expect the `LightningModule.configure_optimizers()
-                <https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#configure-optimizers>`_
-                method.
-            version (str): Determines which VGG version to use:
-
-                - ``"std"``: original VGG
-                - ``"mc-dropout"``: Monte Carlo Dropout VGG
-                - ``"packed"``: Packed-Ensembles VGG
-
-            arch (int): Determines which VGG architecture to use:
-
-                - ``11``: VGG-11
-                - ``13``: VGG-13
-                - ``16``: VGG-16
-                - ``19``: VGG-19
-
-            style (str, optional): Which VGG style to use. Defaults to ``imagenet``.
-            num_estimators (int, optional): Number of estimators in the ensemble.
-                Only used if :attr:`version` is either ``"packed"``, ``"batched"``
-                or ``"masked"`` Defaults to ``None``.
-            dropout_rate (float, optional): Dropout rate. Defaults to ``0.0``.
-            mixup_params (dict, optional): Mixup parameters. Can include mixtype,
-                mixmode, dist_sim, kernel_tau_max, kernel_tau_std,
-                mixup_alpha, and cutmix_alpha. If None, no augmentations.
-                Defaults to ``None``.
-            last_layer_dropout (bool): whether to apply dropout to the last layer only.
-            groups (int, optional): Number of groups in convolutions. Defaults to
-                ``1``.
-            alpha (float, optional): Expansion factor affecting the width of the
-                estimators. Only used if :attr:`version` is ``"packed"``. Defaults
-                to ``None``.
-            gamma (int, optional): Number of groups within each estimator. Only
-                used if :attr:`version` is ``"packed"`` and scales with
-                :attr:`groups`. Defaults to ``1s``.
-            ood_criterion (TUOODCriterion, optional): Criterion for the binary OOD detection task.
-                Defaults to None which amounts to the maximum softmax probability score (MSP).
-            log_plots (bool, optional): Indicates whether to log the plots or not.
-                Defaults to ``False``.
-            save_in_csv (bool, optional): Indicates whether to save the results in
-                a csv file or not. Defaults to ``False``.
-            eval_ood (bool, optional): Indicates whether to evaluate the
-                OOD detection or not. Defaults to ``False``.
-            eval_shift (bool): Whether to evaluate on shifted data. Defaults to
-                ``False``.
-            eval_grouping_loss (bool, optional): Indicates whether to evaluate the
-                grouping loss or not. Defaults to ``False``.
-
-        Raises:
-            ValueError: If :attr:`version` is not either ``"std"``,
-                ``"packed"``, ``"batched"`` or ``"masked"``.
-
-        Returns:
-            LightningModule: VGG baseline ready for training and evaluation.
-        """
-        params = {
-            "dropout_rate": dropout_rate,
-            "in_channels": in_channels,
-            "num_classes": num_classes,
-            "style": style,
-            "groups": groups,
-            "arch": arch,
-        }
-
-        if version not in self.versions:
-            raise ValueError(f"Unknown version: {version}")
-
-        format_batch_fn = nn.Identity()
-
-        if version == "std":
-            params |= {
-                "dropout_rate": dropout_rate,
-            }
-
-        elif version == "mc-dropout":
-            params |= {
-                "dropout_rate": dropout_rate,
-                "num_estimators": num_estimators,
-            }
-
-        if version in ENSEMBLE_METHODS:
-            params |= {
-                "num_estimators": num_estimators,
-            }
-
-            if version != "mc-dropout":
-                format_batch_fn = RepeatTarget(num_repeats=num_estimators)
-
-        if version == "packed":
-            params |= {
-                "alpha": alpha,
-                "style": style,
-                "gamma": gamma,
-            }
-
-        if version == "mc-dropout":  # std VGGs don't have `num_estimators`
-            del params["num_estimators"]
-        model = self.versions[version](**params)
-        if version == "mc-dropout":
-            model = mc_dropout(
-                model=model,
-                num_estimators=num_estimators,
-                last_layer=last_layer_dropout,
-            )
-        super().__init__(
-            num_classes=num_classes,
-            model=model,
-            loss=loss,
-            is_ensemble=version in ENSEMBLE_METHODS,
-            format_batch_fn=format_batch_fn,
-            optim_recipe=optim_recipe,
-            mixup_params=mixup_params,
-            eval_ood=eval_ood,
-            eval_shift=eval_shift,
-            ood_criterion=ood_criterion,
-            log_plots=log_plots,
-            save_in_csv=save_in_csv,
-            eval_grouping_loss=eval_grouping_loss,
-        )
-        self.save_hyperparameters(ignore=["loss"])
diff --git a/torch_uncertainty/baselines/classification/wideresnet.py b/torch_uncertainty/baselines/classification/wideresnet.py
deleted file mode 100644
index c37ea80f..00000000
--- a/torch_uncertainty/baselines/classification/wideresnet.py
+++ /dev/null
@@ -1,192 +0,0 @@
-from typing import Literal
-
-from torch import nn
-from torch.optim import Optimizer
-
-from torch_uncertainty.models import mc_dropout
-from torch_uncertainty.models.classification import (
-    batched_wideresnet28x10,
-    masked_wideresnet28x10,
-    mimo_wideresnet28x10,
-    packed_wideresnet28x10,
-    wideresnet28x10,
-)
-from torch_uncertainty.ood_criteria import TUOODCriterion
-from torch_uncertainty.routines.classification import (
-    ClassificationRoutine,
-)
-from torch_uncertainty.transforms import MIMOBatchFormat, RepeatTarget
-
-ENSEMBLE_METHODS = ["packed", "batched", "masked", "mimo", "mc-dropout"]
-
-
-class WideResNetBaseline(ClassificationRoutine):
-    versions = {
-        "std": [wideresnet28x10],
-        "mc-dropout": [wideresnet28x10],
-        "packed": [packed_wideresnet28x10],
-        "batched": [batched_wideresnet28x10],
-        "masked": [masked_wideresnet28x10],
-        "mimo": [mimo_wideresnet28x10],
-    }
-
-    def __init__(
-        self,
-        num_classes: int,
-        in_channels: int,
-        loss: nn.Module,
-        version: Literal["std", "mc-dropout", "packed", "batched", "masked", "mimo"],
-        style: str = "imagenet",
-        num_estimators: int = 1,
-        dropout_rate: float = 0.0,
-        optim_recipe: dict | Optimizer | None = None,
-        mixup_params: dict | None = None,
-        groups: int = 1,
-        last_layer_dropout: bool = False,
-        scale: float | None = None,
-        alpha: int | None = None,
-        gamma: int = 1,
-        rho: float = 1.0,
-        batch_repeat: int = 1,
-        ood_criterion: type[TUOODCriterion] | str = "msp",
-        log_plots: bool = False,
-        save_in_csv: bool = False,
-        eval_ood: bool = False,
-        eval_shift: bool = False,
-        eval_grouping_loss: bool = False,
-    ) -> None:
-        r"""Wide-ResNet28x10 backbone baseline for classification providing support
-        for various versions.
-
-        Args:
-            num_classes (int): Number of classes to predict.
-            in_channels (int): Number of input channels.
-            loss (nn.Module): Training loss.
-            optim_recipe (Any): optimization recipe, corresponds to
-                what expect the `LightningModule.configure_optimizers()
-                <https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html#configure-optimizers>`_
-                method.
-            version (str): Determines which Wide-ResNet version to use:
-
-                - ``"std"``: original Wide-ResNet
-                - ``"mc-dropout"``: Monte Carlo Dropout Wide-ResNet
-                - ``"packed"``: Packed-Ensembles Wide-ResNet
-                - ``"batched"``: BatchEnsemble Wide-ResNet
-                - ``"masked"``: Masksemble Wide-ResNet
-                - ``"mimo"``: MIMO Wide-ResNet
-
-            style (bool, optional): (str, optional): Which ResNet style to use.
-                Defaults to ``imagenet``.
-            num_estimators (int, optional): Number of estimators in the ensemble.
-                Only used if :attr:`version` is either ``"packed"``, ``"batched"``
-                or ``"masked"`` Defaults to ``None``.
-            dropout_rate (float, optional): Dropout rate. Defaults to ``0.0``.
-            mixup_params (dict, optional): Mixup parameters. Can include mixtype,
-                mixmode, dist_sim, kernel_tau_max, kernel_tau_std,
-                mixup_alpha, and cutmix_alpha. If None, no augmentations.
-                Defaults to ``None``.
-            last_layer_dropout (bool): whether to apply dropout to the last layer only.
-            groups (int, optional): Number of groups in convolutions. Defaults to
-                ``1``.
-            scale (float, optional): Expansion factor affecting the width of the
-                estimators. Only used if :attr:`version` is ``"masked"``. Defaults
-                to ``None``.
-            alpha (float, optional): Expansion factor affecting the width of the
-                estimators. Only used if :attr:`version` is ``"packed"``. Defaults
-                to ``None``.
-            gamma (int, optional): Number of groups within each estimator. Only
-                used if :attr:`version` is ``"packed"`` and scales with
-                :attr:`groups`. Defaults to ``1``.
-            rho (float, optional): Probability that all estimators share the same
-                input. Only used if :attr:`version` is ``"mimo"``. Defaults to
-                ``1``.
-            batch_repeat (int, optional): Number of times to repeat the batch. Only
-                used if :attr:`version` is ``"mimo"``. Defaults to ``1``.
-            ood_criterion (TUOODCriterion, optional): Criterion for the binary OOD detection task.
-                Defaults to None which amounts to the maximum softmax probability score (MSP).
-            log_plots (bool, optional): Indicates whether to log the plots or not.
-                Defaults to ``False``.
-            save_in_csv (bool, optional): Indicates whether to save the results in
-                a csv file or not. Defaults to ``False``.
-            eval_ood (bool, optional): Indicates whether to evaluate the
-                OOD detection or not. Defaults to ``False``.
-            eval_shift (bool): Whether to evaluate on shifted data. Defaults to
-                ``False``.
-            eval_grouping_loss (bool, optional): Indicates whether to evaluate the
-                grouping loss or not. Defaults to ``False``.
-
-        Raises:
-            ValueError: If :attr:`version` is not either ``"std"``,
-                ``"packed"``, ``"batched"`` or ``"masked"``.
-
-        Returns:
-            LightningModule: Wide-ResNet baseline ready for training and evaluation.
-        """
-        params = {
-            "conv_bias": False,
-            "dropout_rate": dropout_rate,
-            "groups": groups,
-            "in_channels": in_channels,
-            "num_classes": num_classes,
-            "style": style,
-        }
-
-        format_batch_fn = nn.Identity()
-
-        if version not in self.versions:
-            raise ValueError(f"Unknown version: {version}")
-
-        if version in ENSEMBLE_METHODS:
-            params |= {
-                "num_estimators": num_estimators,
-            }
-
-            if version != "mc-dropout":
-                format_batch_fn = RepeatTarget(num_repeats=num_estimators)
-
-        if version == "packed":
-            params |= {
-                "alpha": alpha,
-                "gamma": gamma,
-            }
-
-        elif version == "masked":
-            params |= {
-                "scale": scale,
-            }
-
-        elif version == "mimo":
-            format_batch_fn = MIMOBatchFormat(
-                num_estimators=num_estimators,
-                rho=rho,
-                batch_repeat=batch_repeat,
-            )
-
-        if version == "mc-dropout":  # std wideRn don't have `num_estimators`
-            del params["num_estimators"]
-
-        model = self.versions[version][0](**params)
-
-        if version == "mc-dropout":
-            model = mc_dropout(
-                model=model,
-                num_estimators=num_estimators,
-                last_layer=last_layer_dropout,
-            )
-
-        super().__init__(
-            num_classes=num_classes,
-            model=model,
-            loss=loss,
-            is_ensemble=version in ENSEMBLE_METHODS,
-            format_batch_fn=format_batch_fn,
-            optim_recipe=optim_recipe,
-            mixup_params=mixup_params,
-            eval_ood=eval_ood,
-            eval_shift=eval_shift,
-            eval_grouping_loss=eval_grouping_loss,
-            ood_criterion=ood_criterion,
-            log_plots=log_plots,
-            save_in_csv=save_in_csv,
-        )
-        self.save_hyperparameters(ignore=["loss"])
diff --git a/torch_uncertainty/baselines/depth/__init__.py b/torch_uncertainty/baselines/depth/__init__.py
deleted file mode 100644
index 6643eab0..00000000
--- a/torch_uncertainty/baselines/depth/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# ruff: noqa: F401
-from .bts import BTSBaseline
diff --git a/torch_uncertainty/baselines/depth/bts.py b/torch_uncertainty/baselines/depth/bts.py
deleted file mode 100644
index c8bca7ea..00000000
--- a/torch_uncertainty/baselines/depth/bts.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from typing import Literal
-
-from torch import nn
-
-from torch_uncertainty.models.depth.bts import bts_resnet
-from torch_uncertainty.routines import PixelRegressionRoutine
-
-
-class BTSBaseline(PixelRegressionRoutine):
-    archs = [50, 101]
-
-    def __init__(
-        self,
-        loss: nn.Module,
-        version: Literal["std"],
-        arch: int,
-        max_depth: float,
-        dist_family: str | None = None,
-        pretrained_backbone: bool = True,
-    ) -> None:
-        params = {
-            "arch": arch,
-            "dist_family": dist_family,
-            "max_depth": max_depth,
-            "pretrained_backbone": pretrained_backbone,
-        }
-
-        format_batch_fn = nn.Identity()
-
-        if version not in self.versions:
-            raise ValueError(f"Unknown version {version}")
-
-        model = bts_resnet(**params)
-        super().__init__(
-            model=model,
-            output_dim=1,
-            loss=loss,
-            format_batch_fn=format_batch_fn,
-            dist_family=dist_family,
-        )
-        self.save_hyperparameters(ignore=["loss"])
diff --git a/torch_uncertainty/baselines/regression/__init__.py b/torch_uncertainty/baselines/regression/__init__.py
deleted file mode 100644
index b4a1391a..00000000
--- a/torch_uncertainty/baselines/regression/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# ruff: noqa: F401
-from .mlp import MLPBaseline
diff --git a/torch_uncertainty/baselines/regression/mlp.py b/torch_uncertainty/baselines/regression/mlp.py
deleted file mode 100644
index 82cb8e9a..00000000
--- a/torch_uncertainty/baselines/regression/mlp.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from typing import Literal
-
-from torch import nn
-
-from torch_uncertainty.models.mlp import mlp, packed_mlp
-from torch_uncertainty.routines.regression import (
-    RegressionRoutine,
-)
-from torch_uncertainty.transforms.batch import RepeatTarget
-
-ENSEMBLE_METHODS = ["packed"]
-
-
-class MLPBaseline(RegressionRoutine):
-    versions = {"std": mlp, "packed": packed_mlp}
-
-    def __init__(
-        self,
-        output_dim: int,
-        in_features: int,
-        loss: nn.Module,
-        version: Literal["std", "packed"],
-        hidden_dims: list[int],
-        num_estimators: int | None = 1,
-        dropout_rate: float = 0.0,
-        alpha: float | None = None,
-        gamma: int = 1,
-        dist_family: str | None = None,
-        dist_args: dict | None = None,
-    ) -> None:
-        r"""MLP baseline for regression providing support for various versions."""
-        params = {
-            "dropout_rate": dropout_rate,
-            "in_features": in_features,
-            "num_outputs": output_dim,
-            "hidden_dims": hidden_dims,
-            "dist_family": dist_family,
-            "dist_args": dist_args,
-        }
-
-        format_batch_fn = nn.Identity()
-
-        if version not in self.versions:
-            raise ValueError(f"Unknown version: {version}")
-
-        if version == "packed":
-            params |= {
-                "alpha": alpha,
-                "num_estimators": num_estimators,
-                "gamma": gamma,
-            }
-            format_batch_fn = RepeatTarget(num_repeats=num_estimators)
-
-        model = self.versions[version](**params)
-
-        super().__init__(
-            output_dim=output_dim,
-            model=model,
-            loss=loss,
-            dist_family=dist_family,
-            is_ensemble=version in ENSEMBLE_METHODS,
-            format_batch_fn=format_batch_fn,
-            save_in_csv=True,
-        )
-        self.save_hyperparameters(ignore=["loss"])
diff --git a/torch_uncertainty/baselines/segmentation/__init__.py b/torch_uncertainty/baselines/segmentation/__init__.py
deleted file mode 100644
index 3dbaae4a..00000000
--- a/torch_uncertainty/baselines/segmentation/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# ruff: noqa: F401
-from .deeplab import DeepLabBaseline
-from .segformer import SegFormerBaseline
diff --git a/torch_uncertainty/baselines/segmentation/deeplab.py b/torch_uncertainty/baselines/segmentation/deeplab.py
deleted file mode 100644
index 156849e3..00000000
--- a/torch_uncertainty/baselines/segmentation/deeplab.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from typing import Literal
-
-from torch import nn
-
-from torch_uncertainty.models.segmentation import deep_lab_v3_resnet
-from torch_uncertainty.routines.segmentation import SegmentationRoutine
-
-
-class DeepLabBaseline(SegmentationRoutine):
-    archs = [50, 101]
-
-    def __init__(
-        self,
-        num_classes: int,
-        loss: nn.Module,
-        version: Literal["std"],
-        arch: int,
-        style: Literal["v3", "v3+"],
-        output_stride: int,
-        separable: bool,
-        metric_subsampling_rate: float = 1e-2,
-        log_plots: bool = False,
-        num_bins_cal_err: int = 15,
-        pretrained_backbone: bool = True,
-    ) -> None:
-        params = {
-            "num_classes": num_classes,
-            "arch": arch,
-            "style": style,
-            "output_stride": output_stride,
-            "separable": separable,
-            "pretrained_backbone": pretrained_backbone,
-        }
-
-        format_batch_fn = nn.Identity()
-
-        model = deep_lab_v3_resnet(**params)
-        super().__init__(
-            num_classes=num_classes,
-            model=model,
-            loss=loss,
-            format_batch_fn=format_batch_fn,
-            metric_subsampling_rate=metric_subsampling_rate,
-            log_plots=log_plots,
-            num_bins_cal_err=num_bins_cal_err,
-        )
-        self.save_hyperparameters(ignore=["loss"])
diff --git a/torch_uncertainty/baselines/segmentation/segformer.py b/torch_uncertainty/baselines/segmentation/segformer.py
deleted file mode 100644
index 2bbdaba7..00000000
--- a/torch_uncertainty/baselines/segmentation/segformer.py
+++ /dev/null
@@ -1,62 +0,0 @@
-from typing import Literal
-
-from torch import nn
-
-from torch_uncertainty.models.segmentation.segformer import (
-    seg_former,
-)
-from torch_uncertainty.routines.segmentation import SegmentationRoutine
-
-
-class SegFormerBaseline(SegmentationRoutine):
-    single = ["std"]
-    versions = {
-        "std": seg_former,
-    }
-    archs = [0, 1, 2, 3, 4, 5]
-
-    def __init__(
-        self,
-        num_classes: int,
-        loss: nn.Module,
-        version: Literal["std"],
-        arch: int,
-    ) -> None:
-        r"""SegFormer backbone baseline for segmentation providing support for
-        various versions and architectures.
-
-        Args:
-            num_classes (int): Number of classes to predict.
-            loss (type[Module]): Training loss.
-            version (str): Determines which SegFormer version to use. Options are:``"std"``: original SegFormer
-            arch (int): Determines which architecture to use. Options are:
-
-                - ``0``: SegFormer-B0
-                - ``1``: SegFormer-B1
-                - ``2``: SegFormer-B2
-                - ``3``: SegFormer-B3
-                - ``4``: SegFormer-B4
-                - ``5``: SegFormer-B5
-
-            num_estimators (int, optional): Number of estimators in the
-                ensemble. Defaults to 1 (single model).
-        """
-        params = {
-            "num_classes": num_classes,
-            "arch": arch,
-        }
-
-        format_batch_fn = nn.Identity()
-
-        if version not in self.versions:
-            raise ValueError(f"Unknown version {version}")
-
-        model = self.versions[version](**params)
-
-        super().__init__(
-            num_classes=num_classes,
-            model=model,
-            loss=loss,
-            format_batch_fn=format_batch_fn,
-        )
-        self.save_hyperparameters(ignore=["loss"])

From e6ed38e937fb0683228cf8f7112d3a5fc09f6b3a Mon Sep 17 00:00:00 2001
From: Olivier Laurent <olivier.ar.laurent@gmail.com>
Date: Tue, 29 Jul 2025 11:02:55 +0200
Subject: [PATCH 02/25] :white_check_mark: Adapt test

---
 tests/test_cli.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 34df2d1e..6061c749 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1,6 +1,7 @@
 import sys
 
-from torch_uncertainty.baselines.classification import ResNetBaseline
+from torch_uncertainty.models import resnet
+from torch_uncertainty.routines import ClassificationRoutine
 from torch_uncertainty.datamodules import CIFAR10DataModule
 from torch_uncertainty.utils.cli import TULightningCLI, TUSaveConfigCallback
 
@@ -12,14 +13,16 @@ def test_cli_init(self) -> None:
         """Test CLI initialization."""
         sys.argv = [
             "file.py",
-            "--model.in_channels",
+            "--model.model.class_path",
+            "torch_uncertainty.models.resnet",
+            "--model.model.init_args.in_channels",
             "3",
-            "--model.num_classes",
+            "--model.model.init_args.num_classes",
             "10",
-            "--model.version",
-            "std",
-            "--model.arch",
+            "--model.model.init_args.arch",
             "18",
+            "--model.num_classes",
+            "10",
             "--model.loss.class_path",
             "torch.nn.CrossEntropyLoss",
             "--data.root",
@@ -30,7 +33,7 @@ def test_cli_init(self) -> None:
             "--trainer.callbacks.monitor=val/cls/Acc",
             "--trainer.callbacks.mode=max",
         ]
-        cli = TULightningCLI(ResNetBaseline, CIFAR10DataModule, run=False)
+        cli = TULightningCLI(ClassificationRoutine, CIFAR10DataModule, run=False)
         assert cli.eval_after_fit_default is False
         assert cli.save_config_callback == TUSaveConfigCallback
         assert isinstance(cli.trainer.callbacks[0], TUSaveConfigCallback)

From 9af8282d06d3a5515f3c24086568b4d2442d4037 Mon Sep 17 00:00:00 2001
From: Olivier Laurent <olivier.ar.laurent@gmail.com>
Date: Thu, 31 Jul 2025 13:27:19 +0200
Subject: [PATCH 03/25] :book: Add one paper using TU

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index fc0994c3..8d011c68 100644
--- a/README.md
+++ b/README.md
@@ -105,6 +105,7 @@ Check out all our tutorials at [torch-uncertainty.github.io/auto_tutorials](http
 
 The following projects use TorchUncertainty:
 
+- _Towards Understanding and Quantifying Uncertainty for Text-to-Image Generation_ - [CVPR 2025](https://openaccess.thecvf.com/content/CVPR2025/papers/Franchi_Towards_Understanding_and_Quantifying_Uncertainty_for_Text-to-Image_Generation_CVPR_2025_paper.pdf)
 - _Towards Understanding Why Label Smoothing Degrades Selective Classification and How to Fix It_ - [ICLR 2025](https://arxiv.org/abs/2403.14715)
 - _A Symmetry-Aware Exploration of Bayesian Neural Network Posteriors_ - [ICLR 2024](https://arxiv.org/abs/2310.08287)
 

From 7582564679e2d872a8ce250a9dcedff20986c79e Mon Sep 17 00:00:00 2001
From: Olivier Laurent <olivier.ar.laurent@gmail.com>
Date: Thu, 31 Jul 2025 13:27:19 +0200
Subject: [PATCH 04/25] :book: Add one paper using TU

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index fc0994c3..8d011c68 100644
--- a/README.md
+++ b/README.md
@@ -105,6 +105,7 @@ Check out all our tutorials at [torch-uncertainty.github.io/auto_tutorials](http
 
 The following projects use TorchUncertainty:
 
+- _Towards Understanding and Quantifying Uncertainty for Text-to-Image Generation_ - [CVPR 2025](https://openaccess.thecvf.com/content/CVPR2025/papers/Franchi_Towards_Understanding_and_Quantifying_Uncertainty_for_Text-to-Image_Generation_CVPR_2025_paper.pdf)
 - _Towards Understanding Why Label Smoothing Degrades Selective Classification and How to Fix It_ - [ICLR 2025](https://arxiv.org/abs/2403.14715)
 - _A Symmetry-Aware Exploration of Bayesian Neural Network Posteriors_ - [ICLR 2024](https://arxiv.org/abs/2310.08287)
 

From 552a5c3ce60ecf37bc7507ca63bb9e4ab5d482b2 Mon Sep 17 00:00:00 2001
From: Olivier Laurent <olivier.ar.laurent@gmail.com>
Date: Fri, 1 Aug 2025 01:42:31 +0200
Subject: [PATCH 05/25] :lipstick: Show val NLL in probabilistic regression

---
 torch_uncertainty/routines/regression.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/torch_uncertainty/routines/regression.py b/torch_uncertainty/routines/regression.py
index db954f2c..e952c27f 100644
--- a/torch_uncertainty/routines/regression.py
+++ b/torch_uncertainty/routines/regression.py
@@ -333,7 +333,15 @@ def on_validation_epoch_end(self) -> None:
         )
         self.val_metrics.reset()
         if self.probabilistic:
-            self.log_dict(self.val_prob_metrics.compute(), sync_dist=True)
+            prob_dict = self.val_prob_metrics.compute()
+            self.log_dict(prob_dict, logger=True, sync_dist=True)
+            self.log(
+                "NLL",
+                prob_dict["val/reg/NLL"],
+                prog_bar=True,
+                logger=False,
+                sync_dist=True,
+            )
             self.val_prob_metrics.reset()
 
     def on_test_epoch_end(self) -> None:

From bfc2bcd0e29d37e49b3fd035856cd43e8fe199f4 Mon Sep 17 00:00:00 2001
From: "roko.torbarina" <rokotorbarina9@gmail.com>
Date: Fri, 1 Aug 2025 02:46:36 +0200
Subject: [PATCH 06/25] =?UTF-8?q?=F0=9F=93=9A=20Remove=20MCDropout=20class?=
 =?UTF-8?q?=20from=20docs?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/source/api.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/source/api.rst b/docs/source/api.rst
index ea39d418..fcb61256 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -221,7 +221,6 @@ Classes
     BatchEnsemble
     CheckpointCollector
     EMA
-    MCDropout
     StochasticModel
     SWA
     SWAG

From a528da3ece05e1fb31f71db0ebf8cd53cb15c6c2 Mon Sep 17 00:00:00 2001
From: Olivier Laurent <olivier.ar.laurent@gmail.com>
Date: Sat, 2 Aug 2025 22:26:11 +0200
Subject: [PATCH 07/25] :lipstick: Small improvement for the EMA

---
 torch_uncertainty/models/wrappers/ema.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/torch_uncertainty/models/wrappers/ema.py b/torch_uncertainty/models/wrappers/ema.py
index b3228f27..5275bf44 100644
--- a/torch_uncertainty/models/wrappers/ema.py
+++ b/torch_uncertainty/models/wrappers/ema.py
@@ -14,19 +14,23 @@ def __init__(
         The :attr:`model` given as argument is used to compute the gradient during the training.
         The EMA model is regularly updated with the inner-model and used at evaluation time.
 
-        The :attr:`model` given as argument is used to compute the gradient during the training.
-        The EMA model is regularly updated with the inner-model and used at evaluation time.
-
         Args:
-            model (nn.Module): The model to train and ensemble.
-            momentum (float): The momentum of the moving average.
+            model (nn.Module): The model to train.
+            momentum (float): The momentum of the moving average. The larger the momentum, 
+                the more stable the model.
+
+        Note: 
+            The momentum value used is often large, such as 0.98.
         """
         super().__init__()
         _ema_checks(momentum)
         self.core_model = model
         self.ema_model = copy.deepcopy(model)
         self.momentum = momentum
-        self.remainder = 1 - momentum
+
+    @property
+    def remainder(self):
+        return 1 - self.momentum
 
     def update_wrapper(self, epoch: int | None = None) -> None:
         """Update the EMA model.

From 8aebd5fc6903c894cf1dbdbb9855757e898369c5 Mon Sep 17 00:00:00 2001
From: Olivier Laurent <olivier.ar.laurent@gmail.com>
Date: Sat, 2 Aug 2025 22:39:24 +0200
Subject: [PATCH 08/25] :shirt: Format

---
 tests/datamodules/classification/test_ucr_uea.py | 3 +--
 tests/layers/test_batch.py                       | 8 +++++++-
 torch_uncertainty/models/wrappers/ema.py         | 6 +++---
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/tests/datamodules/classification/test_ucr_uea.py b/tests/datamodules/classification/test_ucr_uea.py
index 4871d3fb..53953211 100644
--- a/tests/datamodules/classification/test_ucr_uea.py
+++ b/tests/datamodules/classification/test_ucr_uea.py
@@ -16,7 +16,7 @@ def test_ucr_uea_main(self) -> None:
         dm.train_dataloader()
         dm.val_dataloader()
         dm.test_dataloader()
-        
+
         dm = UCRUEADataModule(
             dataset_name="test",
             batch_size=128,
@@ -25,4 +25,3 @@ def test_ucr_uea_main(self) -> None:
         dm.dataset = DummyClassificationDataset
         dm.setup()
         dm.setup("test")
-        
\ No newline at end of file
diff --git a/tests/layers/test_batch.py b/tests/layers/test_batch.py
index 28dc1d83..e7b216e5 100644
--- a/tests/layers/test_batch.py
+++ b/tests/layers/test_batch.py
@@ -1,7 +1,12 @@
 import pytest
 import torch
 
-from torch_uncertainty.layers.batch_ensemble import BatchConv1d, BatchConv2d, BatchConvTranspose2d, BatchLinear
+from torch_uncertainty.layers.batch_ensemble import (
+    BatchConv1d,
+    BatchConv2d,
+    BatchConvTranspose2d,
+    BatchLinear,
+)
 
 
 @pytest.fixture
@@ -62,6 +67,7 @@ def test_conv_one_estimator(self, oned_input: torch.Tensor) -> None:
         layer = BatchConv1d(6, 2, num_estimators=1, kernel_size=1, bias=False)
         assert layer(oned_input).shape == torch.Size([5, 2, 3])
 
+
 class TestBatchConv2d:
     """Testing the BatchConv2d layer class."""
 
diff --git a/torch_uncertainty/models/wrappers/ema.py b/torch_uncertainty/models/wrappers/ema.py
index 5275bf44..e3815954 100644
--- a/torch_uncertainty/models/wrappers/ema.py
+++ b/torch_uncertainty/models/wrappers/ema.py
@@ -16,11 +16,11 @@ def __init__(
 
         Args:
             model (nn.Module): The model to train.
-            momentum (float): The momentum of the moving average. The larger the momentum, 
+            momentum (float): The momentum of the moving average. The larger the momentum,
                 the more stable the model.
 
-        Note: 
-            The momentum value used is often large, such as 0.98.
+        Note:
+            The momentum value is often large, such as 0.9 or 0.95.
         """
         super().__init__()
         _ema_checks(momentum)

From ecb47290e1508e587441d7d26fcccc254eb70294 Mon Sep 17 00:00:00 2001
From: Olivier Laurent <olivier.ar.laurent@gmail.com>
Date: Sun, 3 Aug 2025 11:17:28 +0200
Subject: [PATCH 09/25] :sparkles: Add support for the Gamma distribution

---
 torch_uncertainty/layers/distributions.py | 82 +++++++++++++++++++++++
 torch_uncertainty/utils/distributions.py  | 15 +++--
 2 files changed, 91 insertions(+), 6 deletions(-)

diff --git a/torch_uncertainty/layers/distributions.py b/torch_uncertainty/layers/distributions.py
index 7f2cbc59..c710c436 100644
--- a/torch_uncertainty/layers/distributions.py
+++ b/torch_uncertainty/layers/distributions.py
@@ -12,6 +12,8 @@ def get_dist_linear_layer(dist_family: str) -> type[nn.Module]:
         return LaplaceLinear
     if dist_family == "cauchy":
         return CauchyLinear
+    if dist_family == "gamma":
+        return GammaLinear
     if dist_family == "student":
         return StudentTLinear
     if dist_family == "nig":
@@ -28,6 +30,8 @@ def get_dist_conv_layer(dist_family: str) -> type[nn.Module]:
         return LaplaceConvNd
     if dist_family == "cauchy":
         return CauchyConvNd
+    if dist_family == "gamma":
+        return GammaConvNd
     if dist_family == "student":
         return StudentTConvNd
     if dist_family == "nig":
@@ -302,6 +306,84 @@ class CauchyConvNd(_LocScaleConvNd):
     """
 
 
+class GammaLinear(_ExpandOutputLinear):
+    """Gamma distribution Linear Density Layer.
+
+    Args:
+        base_layer (type[nn.Module]): The base layer class.
+        event_dim (int): The number of event dimensions.
+        min_scale (float): The minimal value of the scale parameter.
+        **layer_args: Additional arguments for the base layer.
+
+    Note:
+        You should avoid null targets when using the Gamma distribution.
+    """
+
+    def __init__(
+        self,
+        base_layer: type[nn.Module],
+        event_dim: int,
+        min_concentration: float = 1e-6,
+        min_rate: float = 1e-6,
+        **layer_args,
+    ) -> None:
+        super().__init__(
+            base_layer=base_layer,
+            event_dim=event_dim,
+            num_params=2,
+            **layer_args,
+        )
+        self.min_concentration = min_concentration
+        self.min_rate = min_rate
+
+    def forward(self, x: Tensor) -> dict[str, Tensor]:
+        x = super().forward(x)
+        concentration = torch.clamp(
+            F.softplus(x[..., : self.event_dim]), min=self.min_concentration
+        )
+        rate = torch.clamp(
+            F.softplus(x[..., self.event_dim : 2 * self.event_dim]), min=self.min_rate
+        )
+        return {"concentration": concentration, "rate": rate}
+
+
+class GammaConvNd(_ExpandOutputConvNd):
+    """Gamma distribution Convolutional Density Layer.
+
+    Args:
+        base_layer (type[nn.Module]): The base layer class.
+        event_dim (int): The number of event dimensions.
+        min_scale (float): The minimal value of the scale parameter.
+        **layer_args: Additional arguments for the base layer.
+
+    Note:
+        You should avoid null targets when using the Gamma distribution.
+    """
+
+    def __init__(
+        self,
+        base_layer: type[nn.Module],
+        event_dim: int,
+        min_scale: float = 1e-6,
+        **layer_args,
+    ) -> None:
+        super().__init__(
+            base_layer=base_layer,
+            event_dim=event_dim,
+            num_params=2,
+            **layer_args,
+        )
+        self.min_scale = min_scale
+
+    def forward(self, x: Tensor) -> dict[str, Tensor]:
+        x = super().forward(x)
+        loc = x[:, : self.event_dim]
+        scale = torch.clamp(
+            F.softplus(x[:, self.event_dim : 2 * self.event_dim]), min=self.min_scale
+        )
+        return {"loc": loc, "scale": scale}
+
+
 class StudentTLinear(_ExpandOutputLinear):
     def __init__(
         self,
diff --git a/torch_uncertainty/utils/distributions.py b/torch_uncertainty/utils/distributions.py
index 27d49192..e59ae019 100644
--- a/torch_uncertainty/utils/distributions.py
+++ b/torch_uncertainty/utils/distributions.py
@@ -6,6 +6,7 @@
 from torch.distributions import (
     Cauchy,
     Distribution,
+    Gamma,
     Laplace,
     Normal,
     StudentT,
@@ -34,14 +35,16 @@ def get_dist_class(dist_family: str) -> type[Distribution]:
         return Normal
     if dist_family == "laplace":
         return Laplace
-    if dist_family == "nig":
-        return NormalInverseGamma
     if dist_family == "cauchy":
         return Cauchy
+    if dist_family == "gamma":
+        return Gamma
     if dist_family == "student":
         return TUStudentT
+    if dist_family == "nig":
+        return NormalInverseGamma
     raise NotImplementedError(
-        f"{dist_family} distribution is not supported. Raise an issue if needed."
+        f"{dist_family} distribution is currently not supported. Raise an issue if needed."
     )
 
 
@@ -60,7 +63,7 @@ def get_dist_estimate(dist: Distribution, dist_estimate: str) -> Tensor:
     if dist_estimate == "mode":
         return dist.mode
     raise NotImplementedError(
-        f"{dist_estimate} estimate is not supported.Raise an issue if needed."
+        f"{dist_estimate} estimate is not supported. Raise an issue if needed."
     )
 
 
@@ -68,7 +71,7 @@ class TUStudentT(StudentT):
     def cdf(self, value: Tensor) -> Tensor:
         if not scipy_installed:  # coverage: ignore
             raise ImportError(
-                "Please install torch_uncertainty with the distribution option:"
+                "Please install torch_uncertainty with the distribution option: "
                 """pip install -U "torch_uncertainty[distribution]"."""
             )
         if self._validate_args:  # coverage: ignore
@@ -82,7 +85,7 @@ def cdf(self, value: Tensor) -> Tensor:
     def icdf(self, value: Tensor) -> Tensor:
         if not scipy_installed:  # coverage: ignore
             raise ImportError(
-                "Please install torch_uncertainty with the distribution option:"
+                "Please install torch_uncertainty with the distribution option: "
                 """pip install -U "torch_uncertainty[distribution]"."""
             )
 

From a441d2def1830a8501fdc71aca2b2fff14879def Mon Sep 17 00:00:00 2001
From: Olivier Laurent <olivier.ar.laurent@gmail.com>
Date: Tue, 5 Aug 2025 22:02:40 +0200
Subject: [PATCH 10/25] :bug: Add forgotten mlps to all

---
 torch_uncertainty/metrics/classification/calibration_error.py | 2 +-
 torch_uncertainty/models/mlp.py                               | 2 +-
 torch_uncertainty/routines/regression.py                      | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/torch_uncertainty/metrics/classification/calibration_error.py b/torch_uncertainty/metrics/classification/calibration_error.py
index a5ca9131..7d1115c0 100644
--- a/torch_uncertainty/metrics/classification/calibration_error.py
+++ b/torch_uncertainty/metrics/classification/calibration_error.py
@@ -311,7 +311,7 @@ def __new__(  # type: ignore[misc]
             for details. Our version of the metric is a wrapper around the original metric providing a plotting functionality.
         """
         if kwargs.get("n_bins") is not None:
-            raise ValueError("`n_bins` does not exist, use `num_bins`.")
+            raise ValueError("`n_bins` does not exist in TorchUncertainty, use `num_bins`.")
         if adaptive:
             return AdaptiveCalibrationError(
                 task=task,
diff --git a/torch_uncertainty/models/mlp.py b/torch_uncertainty/models/mlp.py
index 398c00ab..12ae5ea9 100644
--- a/torch_uncertainty/models/mlp.py
+++ b/torch_uncertainty/models/mlp.py
@@ -8,7 +8,7 @@
 from torch_uncertainty.layers.distributions import get_dist_linear_layer
 from torch_uncertainty.models import StochasticModel
 
-__all__ = ["bayesian_mlp", "mlp", "packed_mlp"]
+__all__ = ["batched_mlp", "bayesian_mlp", "mimo_mlp", "mlp", "packed_mlp"]
 
 
 class _MLP(nn.Module):
diff --git a/torch_uncertainty/routines/regression.py b/torch_uncertainty/routines/regression.py
index e952c27f..65ec86f6 100644
--- a/torch_uncertainty/routines/regression.py
+++ b/torch_uncertainty/routines/regression.py
@@ -200,8 +200,8 @@ def forward(self, inputs: Tensor) -> Tensor | dict[str, Tensor]:
                     pred = {k: v.squeeze(-1) for k, v in pred.items()}
             else:
                 raise TypeError(
-                    "If the model is probabilistic, the output must be a dictionary ",
-                    "of PyTorch distributions.",
+                    "The model is probabilistic: the output must be a dictionary ",
+                    "of PyTorch distribution parameters.",
                 )
         else:
             if self.one_dim_regression:

From d4a4f6eebdce56fc708650b496c1a25d18c468b2 Mon Sep 17 00:00:00 2001
From: Olivier Laurent <olivier.ar.laurent@gmail.com>
Date: Fri, 15 Aug 2025 21:03:12 +0200
Subject: [PATCH 11/25] :lipstick: remove output plot type

---
 .../metrics/classification/calibration_error.py              | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/torch_uncertainty/metrics/classification/calibration_error.py b/torch_uncertainty/metrics/classification/calibration_error.py
index 7d1115c0..3b26adae 100644
--- a/torch_uncertainty/metrics/classification/calibration_error.py
+++ b/torch_uncertainty/metrics/classification/calibration_error.py
@@ -15,7 +15,6 @@
 from torchmetrics.metric import Metric
 from torchmetrics.utilities.data import dim_zero_cat
 from torchmetrics.utilities.enums import ClassificationTaskNoMultilabel
-from torchmetrics.utilities.plot import _PLOT_OUT_TYPE
 
 from .adaptive_calibration_error import AdaptiveCalibrationError
 
@@ -143,7 +142,7 @@ def reliability_chart(
     title: str = "Reliability Diagram",
     figsize: tuple[int, int] = (6, 6),
     dpi: int = 150,
-) -> _PLOT_OUT_TYPE:
+) -> tuple[object, object]:
     """Builds Reliability Diagram
     `Source <https://github.com/hollance/reliability-diagrams>`_.
     """
@@ -177,7 +176,7 @@ def reliability_chart(
     return fig, ax
 
 
-def custom_plot(self) -> _PLOT_OUT_TYPE:
+def custom_plot(self) -> tuple[object, object]:
     confidences = dim_zero_cat(self.confidences)
     accuracies = dim_zero_cat(self.accuracies)
 

From a95b64a1bf32a242d9a1cebcdbee33e13647a382 Mon Sep 17 00:00:00 2001
From: alafage <adrienlafage@outlook.com>
Date: Fri, 22 Aug 2025 23:15:42 +0200
Subject: [PATCH 12/25] :white_check_mark: Fix CalibrationError failure test

---
 tests/metrics/classification/test_calibration.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/metrics/classification/test_calibration.py b/tests/metrics/classification/test_calibration.py
index 615a3129..99af5428 100644
--- a/tests/metrics/classification/test_calibration.py
+++ b/tests/metrics/classification/test_calibration.py
@@ -49,7 +49,9 @@ def test_plot_multiclass(
     def test_errors(self) -> None:
         with pytest.raises(TypeError, match="is expected to be `int`"):
             CalibrationError(task="multiclass", num_classes=None)
-        with pytest.raises(ValueError, match="`n_bins` does not exist, use `num_bins`."):
+        with pytest.raises(
+            ValueError, match="`n_bins` does not exist in TorchUncertainty, use `num_bins`."
+        ):
             CalibrationError(task="multiclass", num_classes=2, n_bins=1)
 
 

From f5564e8c685b1410f47502f93c539a7c308d5cc7 Mon Sep 17 00:00:00 2001
From: "roko.torbarina" <rokotorbarina9@gmail.com>
Date: Fri, 8 Aug 2025 00:45:35 +0200
Subject: [PATCH 13/25] =?UTF-8?q?=F0=9F=93=9A=20Fix=20cropped=20plot=20axi?=
 =?UTF-8?q?s=20text=20(#55)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Bayesian_Methods/tutorial_bayesian.py          |  3 +--
 .../Classification/tutorial_ood_detection.py       |  1 +
 .../Ensemble_Methods/tutorial_from_de_to_pe.py     | 14 ++++++++++++--
 .../Post_Hoc_Methods/tutorial_scaler.py            |  2 ++
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py b/auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py
index f85288c4..f1ab1b71 100644
--- a/auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py
+++ b/auto_tutorial_source/Bayesian_Methods/tutorial_bayesian.py
@@ -16,8 +16,7 @@
 For more information on Bayesian Neural Networks, we refer to the following resources:
 
 - Weight Uncertainty in Neural Networks `ICML2015 <https://arxiv.org/pdf/1505.05424.pdf>`_
-- Hands-on Bayesian Neural Networks - a Tutorial for Deep Learning Users `IEEE Computational Intelligence Magazine
-    <https://arxiv.org/pdf/2007.06823.pdf>`_
+- Hands-on Bayesian Neural Networks - a Tutorial for Deep Learning Users `IEEE Computational Intelligence Magazine <https://arxiv.org/pdf/2007.06823.pdf>`_
 
 Training a Bayesian LeNet using TorchUncertainty models and Lightning
 ---------------------------------------------------------------------
diff --git a/auto_tutorial_source/Classification/tutorial_ood_detection.py b/auto_tutorial_source/Classification/tutorial_ood_detection.py
index 4bbbefee..119baa0d 100644
--- a/auto_tutorial_source/Classification/tutorial_ood_detection.py
+++ b/auto_tutorial_source/Classification/tutorial_ood_detection.py
@@ -146,4 +146,5 @@
 # ----------
 #
 # [1] Hendrycks, D., & Gimpel, K. (2016). A baseline for detecting misclassified and out-of-distribution examples in neural networks. In ICLR 2017.
+#
 # [2] Hendrycks, D., Basart, S., Mazeika, M., Zou, A., Kwon, J., Mostajabi, M., ... & Song, D. (2019). Scaling out-of-distribution detection for real-world settings. In ICML 2022.
diff --git a/auto_tutorial_source/Ensemble_Methods/tutorial_from_de_to_pe.py b/auto_tutorial_source/Ensemble_Methods/tutorial_from_de_to_pe.py
index 8fff9d4a..a5ae1510 100644
--- a/auto_tutorial_source/Ensemble_Methods/tutorial_from_de_to_pe.py
+++ b/auto_tutorial_source/Ensemble_Methods/tutorial_from_de_to_pe.py
@@ -3,7 +3,11 @@
 Improved Ensemble parameter-efficiency with Packed-Ensembles
 ============================================================
 
-*This tutorial is adapted from a notebook part of a lecture given at the `Helmholtz AI Conference <https://haicon24.de/>`_ by Sebastian Starke, Peter Steinbach, Gianni Franchi, and Olivier Laurent.*
+*This tutorial is adapted from a notebook part of a lecture given at the* |conference|_ *by Sebastian Starke, Peter Steinbach, Gianni Franchi, and Olivier Laurent.*
+
+.. _conference: https://haicon24.de/
+
+.. |conference| replace:: *Helmholtz AI Conference*
 
 In this notebook will work on the MNIST dataset that was introduced by Corinna Cortes, Christopher J.C. Burges, and later modified by Yann LeCun in the foundational paper:
 
@@ -12,6 +16,7 @@
 The MNIST dataset consists of 70 000 images of handwritten digits from 0 to 9. The images are grayscale and 28x28-pixel sized. The task is to classify the images into their respective digits. The dataset can be automatically downloaded using the `torchvision` library.
 
 In this notebook, we will train a model and an ensemble on this task and evaluate their performance. The performance will consist in the following metrics:
+
 - Accuracy: the proportion of correctly classified images,
 - Brier score: a measure of the quality of the predicted probabilities,
 - Calibration error: a measure of the calibration of the predicted probabilities,
@@ -174,13 +179,16 @@ def optim_recipe(model, lr_mult: float = 1.0):
 # This table provides a lot of information:
 #
 # **OOD Detection: Binary Classification MNIST vs. FashionMNIST**
+#
 # - AUPR/AUROC/FPR95: Measures the quality of the OOD detection. The higher the better for AUPR and AUROC, the lower the better for FPR95.
 #
 # **Calibration: Reliability of the Predictions**
+#
 # - ECE: Expected Calibration Error. The lower the better.
 # - aECE: Adaptive Expected Calibration Error. The lower the better. (~More precise version of the ECE)
 #
 # **Classification Performance**
+#
 # - Accuracy: The ratio of correctly classified images. The higher the better.
 # - Brier: The quality of the predicted probabilities (Mean Squared Error of the predictions vs. ground-truth). The lower the better.
 # - Negative Log-Likelihood: The value of the loss on the test set. The lower the better.
@@ -236,7 +244,7 @@ def optim_recipe(model, lr_mult: float = 1.0):
 # We need to multiply the learning rate by 2 to account for the fact that we have 2 models
 # in the ensemble and that we average the loss over all the predictions.
 #
-# #### Downloading the pre-trained models
+# **Downloading the pre-trained models**
 #
 # We have put the pre-trained models on Hugging Face that you can download with the utility function
 # "hf_hub_download" imported just below. These models are trained for 75 epochs and are therefore not
@@ -393,9 +401,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 # In constrast to calibration, the values of the confidence scores are not important, only the order of the scores. *Ideally, the best model will order all the correct predictions first, and all the incorrect predictions last.* In this case, there will be a threshold so that all the predictions above the threshold are correct, and all the predictions below the threshold are incorrect.
 #
 # In TorchUncertainty, we look at 3 different metrics for selective classification:
+#
 # - **AURC**: The area under the Risk (% of errors) vs. Coverage (% of classified samples) curve. This curve expresses how the risk of the model evolves as we increase the coverage (the proportion of predictions that are above the selection threshold). This metric will be minimized by a model able to perfectly separate the correct and incorrect predictions.
 #
 # The following metrics are computed at a fixed risk and coverage level and that have practical interests. The idea of these metrics is that you can set the selection threshold to achieve a certain level of risk and coverage, as required by the technical constraints of your application:
+#
 # - **Coverage at 5% Risk**: The proportion of predictions that are above the selection threshold when it is set for the risk to egal 5%. Set the risk threshold to your application constraints. The higher the better.
 # - **Risk at 80% Coverage**: The proportion of errors when the coverage is set to 80%. Set the coverage threshold to your application constraints. The lower the better.
 #
diff --git a/auto_tutorial_source/Post_Hoc_Methods/tutorial_scaler.py b/auto_tutorial_source/Post_Hoc_Methods/tutorial_scaler.py
index 45706873..e47a4c72 100644
--- a/auto_tutorial_source/Post_Hoc_Methods/tutorial_scaler.py
+++ b/auto_tutorial_source/Post_Hoc_Methods/tutorial_scaler.py
@@ -101,6 +101,7 @@
 # We also compute and plot the top-label calibration figure. We see that the
 # model is not well calibrated.
 fig, ax = ece.plot()
+fig.tight_layout()
 fig.show()
 
 # %%
@@ -143,6 +144,7 @@
 # that the model is now better calibrated. If the temperature is greater than 1,
 # the final model is less confident than before.
 fig, ax = ece.plot()
+fig.tight_layout()
 fig.show()
 
 # %%

From 1937ca48c1d4cf4b1452d8601c2017cd10913bb7 Mon Sep 17 00:00:00 2001
From: alafage <adrienlafage@outlook.com>
Date: Mon, 25 Aug 2025 09:25:18 +0200
Subject: [PATCH 14/25] :wrench: Attempt fixing `Server certificate
 verification failed`

---
 .github/workflows/run-tests.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index 57ca396c..3c34718c 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -36,6 +36,9 @@ jobs:
         echo "PYTHON_VERSION=$(python -c "import platform; print(platform.python_version())")"
         echo "PYTHON_VERSION=$(python -c "import platform; print(platform.python_version())")" >> $GITHUB_ENV
 
+    - name: Update CA certificates
+      run: sudo update-ca-certificates
+
     - name: Get changed files
       id: changed-files-specific
       uses: tj-actions/changed-files@v46

From 8587f53093e187882b67241be5e1b7ec4efab2a4 Mon Sep 17 00:00:00 2001
From: alafage <adrienlafage@outlook.com>
Date: Mon, 25 Aug 2025 09:44:12 +0200
Subject: [PATCH 15/25] :bug: Temporary fix for UCI dataset url domain not
 working anymore

---
 .github/workflows/run-tests.yml              |  3 --
 tests/datamodules/classification/test_uci.py | 40 +++++++++++---------
 2 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index 3c34718c..57ca396c 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -36,9 +36,6 @@ jobs:
         echo "PYTHON_VERSION=$(python -c "import platform; print(platform.python_version())")"
         echo "PYTHON_VERSION=$(python -c "import platform; print(platform.python_version())")" >> $GITHUB_ENV
 
-    - name: Update CA certificates
-      run: sudo update-ca-certificates
-
     - name: Get changed files
       id: changed-files-specific
       uses: tj-actions/changed-files@v46
diff --git a/tests/datamodules/classification/test_uci.py b/tests/datamodules/classification/test_uci.py
index 54345405..72a02a6d 100644
--- a/tests/datamodules/classification/test_uci.py
+++ b/tests/datamodules/classification/test_uci.py
@@ -1,3 +1,6 @@
+import warnings
+from urllib.error import URLError
+
 import pytest
 
 from torch_uncertainty.datamodules.classification import (
@@ -13,27 +16,30 @@ class TestHTRU2DataModule:
     """Testing the HTRU2DataModule datamodule class."""
 
     def test_htru2(self) -> None:
-        dm = HTRU2DataModule(root="./data/", batch_size=128)
+        try:
+            dm = HTRU2DataModule(root="./data/", batch_size=128)
 
-        dm.prepare_data()
-        dm.setup()
+            dm.prepare_data()
+            dm.setup()
 
-        dm.train_dataloader()
-        dm.val_dataloader()
-        dm.test_dataloader()
+            dm.train_dataloader()
+            dm.val_dataloader()
+            dm.test_dataloader()
 
-        dm.setup("test")
-        dm.test_dataloader()
+            dm.setup("test")
+            dm.test_dataloader()
 
-        dm = HTRU2DataModule(root="./data/", batch_size=128, val_split=0.1)
+            dm = HTRU2DataModule(root="./data/", batch_size=128, val_split=0.1)
 
-        dm.prepare_data()
-        dm.setup()
+            dm.prepare_data()
+            dm.setup()
 
-        with pytest.raises(ValueError):
-            dm.setup("other")
+            with pytest.raises(ValueError):
+                dm.setup("other")
 
-        dm = BankMarketingDataModule(root="./data/", batch_size=128)
-        dm = DOTA2GamesDataModule(root="./data/", batch_size=128)
-        dm = OnlineShoppersDataModule(root="./data/", batch_size=128)
-        dm = SpamBaseDataModule(root="./data/", batch_size=128)
+            dm = BankMarketingDataModule(root="./data/", batch_size=128)
+            dm = DOTA2GamesDataModule(root="./data/", batch_size=128)
+            dm = OnlineShoppersDataModule(root="./data/", batch_size=128)
+            dm = SpamBaseDataModule(root="./data/", batch_size=128)
+        except URLError as e:
+            warnings.warn(f"Data download failed due to network error: {e}", stacklevel=2)

From 2255238c63ecb0f899d8a94d80d7a85560fee28f Mon Sep 17 00:00:00 2001
From: alafage <adrienlafage@outlook.com>
Date: Mon, 25 Aug 2025 10:26:00 +0200
Subject: [PATCH 16/25] :wrench: update build-doc workflow file

---
 .github/workflows/build-docs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml
index 031878a6..b7b08a41 100644
--- a/.github/workflows/build-docs.yml
+++ b/.github/workflows/build-docs.yml
@@ -14,7 +14,7 @@ env:
 
 jobs:
   documentation:
-    runs-on: self-hosted
+    runs-on: [self-hosted]
     steps:
     - uses: actions/checkout@v4
 

From aa27f6c48326f59fa103671f94fc39213c40753b Mon Sep 17 00:00:00 2001
From: alafage <adrienlafage@outlook.com>
Date: Tue, 26 Aug 2025 15:59:21 +0200
Subject: [PATCH 17/25] :art: Rename attribute `model` to `core_model` in all
 wrappers

---
 tests/models/wrappers/test_batch_ensemble.py  |  4 +-
 tests/models/wrappers/test_mc_dropout.py      |  2 +-
 .../models/classification/lenet.py            |  2 +-
 torch_uncertainty/models/mlp.py               |  2 +-
 .../models/wrappers/batch_ensemble.py         | 38 +++++++++---------
 .../models/wrappers/checkpoint_collector.py   |  6 +--
 .../models/wrappers/deep_ensembles.py         | 40 ++++++++++---------
 torch_uncertainty/models/wrappers/ema.py      |  8 ++--
 .../models/wrappers/mc_dropout.py             | 23 ++++++-----
 .../models/wrappers/stochastic.py             |  4 +-
 torch_uncertainty/models/wrappers/swa.py      |  6 +--
 torch_uncertainty/models/wrappers/swag.py     |  6 +--
 torch_uncertainty/models/wrappers/zero.py     |  6 +--
 13 files changed, 76 insertions(+), 71 deletions(-)

diff --git a/tests/models/wrappers/test_batch_ensemble.py b/tests/models/wrappers/test_batch_ensemble.py
index 5ea2ff19..3945f0d0 100644
--- a/tests/models/wrappers/test_batch_ensemble.py
+++ b/tests/models/wrappers/test_batch_ensemble.py
@@ -45,8 +45,8 @@ def test_convert_layers(self) -> None:
         model = _DummyModel(in_features, out_features)
         wrapped_model = batch_ensemble(model, num_estimators, convert_layers=True)
         assert wrapped_model.num_estimators == num_estimators
-        assert isinstance(wrapped_model.model.conv, BatchConv2d)
-        assert isinstance(wrapped_model.model.fc, BatchLinear)
+        assert isinstance(wrapped_model.core_model.conv, BatchConv2d)
+        assert isinstance(wrapped_model.core_model.fc, BatchLinear)
 
     def test_forward_pass(self, img_input) -> None:
         batch_size = img_input.size(0)
diff --git a/tests/models/wrappers/test_mc_dropout.py b/tests/models/wrappers/test_mc_dropout.py
index e5bc56da..041db9da 100644
--- a/tests/models/wrappers/test_mc_dropout.py
+++ b/tests/models/wrappers/test_mc_dropout.py
@@ -135,7 +135,7 @@ def test_mc_dropout_errors(self) -> None:
             mc_dropout(model, num_estimators=5, task="regression")
 
         with pytest.raises(ValueError, match="`num_estimators` must be strictly positive"):
-            mc_dropout(model=model, num_estimators=-1, last_layer=True, on_batch=True)
+            mc_dropout(core_model=model, num_estimators=-1, last_layer=True, on_batch=True)
 
         dropout_model = mc_dropout(model, 5)
         with pytest.raises(TypeError, match="Training mode is expected to be boolean"):
diff --git a/torch_uncertainty/models/classification/lenet.py b/torch_uncertainty/models/classification/lenet.py
index 3bdb0c70..b1d1a5db 100644
--- a/torch_uncertainty/models/classification/lenet.py
+++ b/torch_uncertainty/models/classification/lenet.py
@@ -139,7 +139,7 @@ def batchensemble_lenet(
         dropout_rate=dropout_rate,
     )
     return BatchEnsemble(
-        model=model,
+        core_model=model,
         num_estimators=num_estimators,
         repeat_training_inputs=repeat_training_inputs,
         convert_layers=True,
diff --git a/torch_uncertainty/models/mlp.py b/torch_uncertainty/models/mlp.py
index 398c00ab..89a99b2b 100644
--- a/torch_uncertainty/models/mlp.py
+++ b/torch_uncertainty/models/mlp.py
@@ -173,7 +173,7 @@ def _mlp(
     )
     if stochastic:
         return StochasticModel(
-            model=model, num_samples=num_samples, probabilistic=dist_family is not None
+            core_model=model, num_samples=num_samples, probabilistic=dist_family is not None
         )
     return model
 
diff --git a/torch_uncertainty/models/wrappers/batch_ensemble.py b/torch_uncertainty/models/wrappers/batch_ensemble.py
index 4c1f42b9..7e8d9289 100644
--- a/torch_uncertainty/models/wrappers/batch_ensemble.py
+++ b/torch_uncertainty/models/wrappers/batch_ensemble.py
@@ -8,7 +8,7 @@
 class BatchEnsemble(nn.Module):
     def __init__(
         self,
-        model: nn.Module,
+        core_model: nn.Module,
         num_estimators: int,
         repeat_training_inputs: bool = False,
         convert_layers: bool = False,
@@ -23,7 +23,7 @@ def __init__(
         ensuring that each estimator receives the correct data format.
 
         Args:
-            model (nn.Module): The BatchEnsemble model.
+            core_model (nn.Module): The BatchEnsemble model.
             num_estimators (int): Number of ensemble members.
             repeat_training_inputs (optional, bool): Whether to repeat the input batch during training.
                 If ``True``, the input batch is repeated during both training and evaluation. If ``False``,
@@ -33,17 +33,17 @@ def __init__(
                 BatchEnsemble counterparts. Default is ``False``.
 
         Raises:
-            ValueError: If neither ``BatchLinear`` nor ``BatchConv2d`` layers are found in the model at the
+            ValueError: If neither ``BatchLinear`` nor ``BatchConv2d`` layers are found in the core_model at the
                 end of initialization.
             ValueError: If ``num_estimators`` is less than or equal to ``0``.
             ValueError: If ``convert_layers=True`` and neither ``nn.Linear`` nor ``nn.Conv2d`` layers are
-                found in the model.
+                found in the core_model.
 
         Warning:
             If ``convert_layers==True``, the wrapper will attempt to convert all ``nn.Linear`` and ``nn.Conv2d``
-            layers in the model to their BatchEnsemble counterparts. If the model contains other types of
+            layers in the core_model to their BatchEnsemble counterparts. If the core_model contains other types of
             layers, the conversion won't happen for these layers. If don't have any ``nn.Linear`` or ``nn.Conv2d``
-            layers in the model, the wrapper will raise an error during conversion.
+            layers in the core_model, the wrapper will raise an error during conversion.
 
         Warning:
             If ``repeat_training_inputs==True`` and you want to use one of the ``torch_uncertainty.routines``
@@ -51,11 +51,11 @@ def __init__(
             initializing the routine.
 
         Example:
-            >>> model = nn.Sequential(nn.Linear(10, 5), nn.ReLU(), nn.Linear(5, 2))
-            >>> model = BatchEnsemble(model, num_estimators=4, convert_layers=True)
+            >>> core_model = nn.Sequential(nn.Linear(10, 5), nn.ReLU(), nn.Linear(5, 2))
+            >>> model = BatchEnsemble(core_model, num_estimators=4, convert_layers=True)
             >>> model
             BatchEnsemble(
-            (model): Sequential(
+            (core_model): Sequential(
                 (0): BatchLinear(in_features=10, out_features=5, num_estimators=4)
                 (1): ReLU()
                 (2): BatchLinear(in_features=5, out_features=2, num_estimators=4)
@@ -63,7 +63,7 @@ def __init__(
             )
         """
         super().__init__()
-        self.model = model
+        self.core_model = core_model
         self.num_estimators = num_estimators
         self.repeat_training_inputs = repeat_training_inputs
 
@@ -72,7 +72,7 @@ def __init__(
 
         filtered_modules = [
             module
-            for module in self.model.modules()
+            for module in self.core_model.modules()
             if isinstance(module, BatchLinear | BatchConv2d)
         ]
         _batch_ensemble_checks(filtered_modules, num_estimators)
@@ -81,22 +81,22 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Repeat the input if ``self.training==False`` or ``repeat_training_inputs==True`` and pass it through the model."""
         if not self.training or self.repeat_training_inputs:
             x = repeat(x, "b ... -> (m b) ...", m=self.num_estimators)
-        return self.model(x)
+        return self.core_model(x)
 
     def _convert_layers(self) -> None:
         """Convert the model's layers to BatchEnsemble layers."""
         no_valid_layers = True
-        for name, layer in self.model.named_modules():
+        for name, layer in self.core_model.named_modules():
             if isinstance(layer, nn.Linear):
                 setattr(
-                    self.model,
+                    self.core_model,
                     name,
                     BatchLinear.from_linear(layer, num_estimators=self.num_estimators),
                 )
                 no_valid_layers = False
             elif isinstance(layer, nn.Conv2d):
                 setattr(
-                    self.model,
+                    self.core_model,
                     name,
                     BatchConv2d.from_conv2d(layer, num_estimators=self.num_estimators),
                 )
@@ -121,7 +121,7 @@ def _batch_ensemble_checks(filtered_modules: list[nn.Module], num_estimators: in
 
 
 def batch_ensemble(
-    model: nn.Module,
+    core_model: nn.Module,
     num_estimators: int,
     repeat_training_inputs: bool = False,
     convert_layers: bool = False,
@@ -129,7 +129,7 @@ def batch_ensemble(
     """BatchEnsemble wrapper for a model.
 
     Args:
-        model (nn.Module): model to wrap
+        core_model (nn.Module): model to wrap
         num_estimators (int): number of ensemble members
         repeat_training_inputs (bool, optional): whether to repeat the input batch during training.
             If ``True``, the input batch is repeated during both training and evaluation. If ``False``,
@@ -139,10 +139,10 @@ def batch_ensemble(
             BatchEnsemble counterparts. Default is ``False``.
 
     Returns:
-        BatchEnsemble: BatchEnsemble wrapper for the model
+        BatchEnsemble: BatchEnsemble wrapper for the :attr:`core_model`
     """
     return BatchEnsemble(
-        model=model,
+        core_model=core_model,
         num_estimators=num_estimators,
         repeat_training_inputs=repeat_training_inputs,
         convert_layers=convert_layers,
diff --git a/torch_uncertainty/models/wrappers/checkpoint_collector.py b/torch_uncertainty/models/wrappers/checkpoint_collector.py
index 2ba13194..5ce37f51 100644
--- a/torch_uncertainty/models/wrappers/checkpoint_collector.py
+++ b/torch_uncertainty/models/wrappers/checkpoint_collector.py
@@ -7,7 +7,7 @@
 class CheckpointCollector(nn.Module):
     def __init__(
         self,
-        model: nn.Module,
+        core_model: nn.Module,
         cycle_start: int | None = None,
         cycle_length: int | None = None,
         save_schedule: list[int] | None = None,
@@ -21,7 +21,7 @@ def __init__(
         as implemented in TorchUncertainty.
 
         Args:
-            model (nn.Module): The model to train and ensemble.
+            core_model (nn.Module): The model to train and ensemble.
             cycle_start (int): Epoch to start ensembling. Defaults to ``None``.
             cycle_length (int): Number of epochs between model collections. Defaults to ``None``.
             save_schedule (list[int] | None): The epochs at which to save the model. Defaults to ``None``.
@@ -52,7 +52,7 @@ def __init__(
                 f"The combination of arguments: cycle_start: {cycle_start}, cycle_length: {cycle_length}, save_schedule: {save_schedule} is not known."
             )
 
-        self.core_model = model
+        self.core_model = core_model
         self.cycle_start = cycle_start
         self.cycle_length = cycle_length
         self.save_schedule = save_schedule
diff --git a/torch_uncertainty/models/wrappers/deep_ensembles.py b/torch_uncertainty/models/wrappers/deep_ensembles.py
index 9a280b4e..749ec34a 100644
--- a/torch_uncertainty/models/wrappers/deep_ensembles.py
+++ b/torch_uncertainty/models/wrappers/deep_ensembles.py
@@ -9,13 +9,13 @@
 class _DeepEnsembles(nn.Module):
     def __init__(
         self,
-        models: list[nn.Module],
+        core_models: list[nn.Module],
         store_on_cpu: bool = False,
     ) -> None:
         """Create a classification deep ensembles from a list of models."""
         super().__init__()
-        self.core_models = nn.ModuleList(models)
-        self.num_estimators = len(models)
+        self.core_models = nn.ModuleList(core_models)
+        self.num_estimators = len(core_models)
         self.store_on_cpu = store_on_cpu
 
     def forward(self, x: Tensor) -> Tensor:
@@ -52,11 +52,11 @@ class _RegDeepEnsembles(_DeepEnsembles):
     def __init__(
         self,
         probabilistic: bool,
-        models: list[nn.Module],
+        core_models: list[nn.Module],
         store_on_cpu: bool = False,
     ) -> None:
         """Create a regression deep ensembles from a list of models."""
-        super().__init__(models=models, store_on_cpu=store_on_cpu)
+        super().__init__(core_models=core_models, store_on_cpu=store_on_cpu)
         self.probabilistic = probabilistic
 
     def forward(self, x: Tensor) -> Tensor | dict[str, Tensor]:
@@ -87,7 +87,7 @@ def forward(self, x: Tensor) -> Tensor | dict[str, Tensor]:
 
 
 def deep_ensembles(
-    models: list[nn.Module] | nn.Module,
+    core_models: list[nn.Module] | nn.Module,
     num_estimators: int | None = None,
     task: Literal[
         "classification", "regression", "segmentation", "pixel_regression"
@@ -101,12 +101,12 @@ def deep_ensembles(
     """Build a Deep Ensembles out of the original models.
 
     Args:
-        models (list[nn.Module] | nn.Module): The model to be ensembled.
+        core_models (list[nn.Module] | nn.Module): The model to be ensembled.
         num_estimators (int | None): The number of estimators in the ensemble.
         task (Literal[``"classification"``, ``"regression"``, ``"segmentation"``, ``"pixel_regression"``]): The model task. Defaults to ``"classification"``.
         probabilistic (bool): Whether the regression model is probabilistic.
         reset_model_parameters (bool): Whether to reset the model parameters
-            when :attr:models is a module or a list of length 1. Defaults to ``True``.
+            when :attr:core_models is a module or a list of length 1. Defaults to ``True``.
         store_on_cpu (bool): Whether to store the models on CPU. Defaults to ``False``.
             This is useful for large models that do not fit in GPU memory. Only one
             model will be stored on GPU at a time during forward. The rest will be stored on CPU.
@@ -140,26 +140,28 @@ def deep_ensembles(
             <https://arxiv.org/abs/1612.01474>`_.
 
     """
-    if isinstance(models, list) and len(models) == 0:
+    if isinstance(core_models, list) and len(core_models) == 0:
         raise ValueError("Models must not be an empty list.")
-    if (isinstance(models, list) and len(models) == 1) or isinstance(models, nn.Module):
+    if (isinstance(core_models, list) and len(core_models) == 1) or isinstance(
+        core_models, nn.Module
+    ):
         if num_estimators is None:
             raise ValueError("if models is a module, num_estimators must be specified.")
         if num_estimators < 2:
             raise ValueError(f"num_estimators must be at least 2. Got {num_estimators}.")
 
-        if isinstance(models, list):
-            models = models[0]
+        if isinstance(core_models, list):
+            core_models = core_models[0]
 
-        models = [copy.deepcopy(models) for _ in range(num_estimators)]
+        core_models = [copy.deepcopy(core_models) for _ in range(num_estimators)]
 
         if reset_model_parameters:
-            for model in models:
+            for model in core_models:
                 for layer in model.modules():
                     if hasattr(layer, "reset_parameters"):
                         layer.reset_parameters()
 
-    elif isinstance(models, list) and len(models) > 1 and num_estimators is not None:
+    elif isinstance(core_models, list) and len(core_models) > 1 and num_estimators is not None:
         raise ValueError("num_estimators must be None if you provided a non-singleton list.")
 
     if ckpt_paths is not None:  # coverage: ignore
@@ -175,11 +177,11 @@ def deep_ensembles(
             if len(ckpt_paths) == 0:
                 raise ValueError("No checkpoint files found in the directory.")
 
-        if len(models) != len(ckpt_paths):
+        if len(core_models) != len(ckpt_paths):
             raise ValueError(
                 "The number of models and the number of checkpoint paths must be the same."
             )
-        for model, ckpt_path in zip(models, ckpt_paths, strict=True):
+        for model, ckpt_path in zip(core_models, ckpt_paths, strict=True):
             if isinstance(ckpt_path, str | Path):
                 loaded_data = torch.load(ckpt_path, map_location="cpu")
                 if "state_dict" in loaded_data:
@@ -198,12 +200,12 @@ def deep_ensembles(
 
     match task:
         case "classification" | "segmentation":
-            return _DeepEnsembles(models=models, store_on_cpu=store_on_cpu)
+            return _DeepEnsembles(core_models=core_models, store_on_cpu=store_on_cpu)
         case "regression" | "pixel_regression":
             if probabilistic is None:
                 raise ValueError("probabilistic must be specified for regression models.")
             return _RegDeepEnsembles(
-                probabilistic=probabilistic, models=models, store_on_cpu=store_on_cpu
+                probabilistic=probabilistic, core_models=core_models, store_on_cpu=store_on_cpu
             )
         case _:
             raise ValueError(f"Unknown task: {task}.")
diff --git a/torch_uncertainty/models/wrappers/ema.py b/torch_uncertainty/models/wrappers/ema.py
index b3228f27..a369e3b5 100644
--- a/torch_uncertainty/models/wrappers/ema.py
+++ b/torch_uncertainty/models/wrappers/ema.py
@@ -6,7 +6,7 @@
 class EMA(nn.Module):
     def __init__(
         self,
-        model: nn.Module,
+        core_model: nn.Module,
         momentum: float,
     ) -> None:
         """Exponential Moving Average (EMA).
@@ -18,13 +18,13 @@ def __init__(
         The EMA model is regularly updated with the inner-model and used at evaluation time.
 
         Args:
-            model (nn.Module): The model to train and ensemble.
+            core_model (nn.Module): The model to train and ensemble.
             momentum (float): The momentum of the moving average.
         """
         super().__init__()
         _ema_checks(momentum)
-        self.core_model = model
-        self.ema_model = copy.deepcopy(model)
+        self.core_model = core_model
+        self.ema_model = copy.deepcopy(core_model)
         self.momentum = momentum
         self.remainder = 1 - momentum
 
diff --git a/torch_uncertainty/models/wrappers/mc_dropout.py b/torch_uncertainty/models/wrappers/mc_dropout.py
index 5e060a75..a1582e2f 100644
--- a/torch_uncertainty/models/wrappers/mc_dropout.py
+++ b/torch_uncertainty/models/wrappers/mc_dropout.py
@@ -9,7 +9,7 @@
 class _MCDropout(nn.Module):
     def __init__(
         self,
-        model: nn.Module,
+        core_model: nn.Module,
         num_estimators: int,
         last_layer: bool,
         on_batch: bool,
@@ -17,7 +17,7 @@ def __init__(
         """MC Dropout wrapper for a model containing nn.Dropout modules.
 
         Args:
-            model (nn.Module): model to wrap
+            core_model (nn.Module): model to wrap
             num_estimators (int): number of estimators to use during the evaluation
             last_layer (bool): whether to apply dropout to the last layer only.
             on_batch (bool): Perform the MC-Dropout on the batch-size. Otherwise in a for loop. Useful when constrained in memory.
@@ -35,7 +35,7 @@ def __init__(
         filtered_modules = list(
             filter(
                 lambda m: isinstance(m, _DropoutNd),
-                model.modules(),
+                core_model.modules(),
             )
         )
         if last_layer:
@@ -44,7 +44,7 @@ def __init__(
         _dropout_checks(filtered_modules, num_estimators)
         self.last_layer = last_layer
         self.on_batch = on_batch
-        self.core_model = model
+        self.core_model = core_model
         self.num_estimators = num_estimators
         self.filtered_modules = filtered_modules
 
@@ -95,14 +95,17 @@ def forward(
 class _RegMCDropout(_MCDropout):
     def __init__(
         self,
-        model: nn.Module,
+        core_model: nn.Module,
         num_estimators: int,
         last_layer: bool,
         on_batch: bool,
         probabilistic: bool,
     ):
         super().__init__(
-            model=model, num_estimators=num_estimators, last_layer=last_layer, on_batch=on_batch
+            core_model=core_model,
+            num_estimators=num_estimators,
+            last_layer=last_layer,
+            on_batch=on_batch,
         )
         self.probabilistic = probabilistic
 
@@ -142,7 +145,7 @@ def forward(
 
 
 def mc_dropout(
-    model: nn.Module,
+    core_model: nn.Module,
     num_estimators: int,
     last_layer: bool = False,
     on_batch: bool = True,
@@ -154,7 +157,7 @@ def mc_dropout(
     """MC Dropout wrapper for a model.
 
     Args:
-        model (nn.Module): model to wrap
+        core_model (nn.Module): model to wrap
         num_estimators (int): number of estimators to use last_layer (bool, optional): whether to apply dropout to the last layer only. Defaults to ``False``.
         on_batch (bool): Increase the batch_size to perform MC-Dropout. Otherwise in a for loop to reduce memory footprint. Defaults to ``True``.
         last_layer (bool, optional): whether to apply dropout to the last layer only. Defaults to ``False``.
@@ -167,7 +170,7 @@ def mc_dropout(
     match task:
         case "classification" | "segmentation":
             return _MCDropout(
-                model=model,
+                core_model=core_model,
                 num_estimators=num_estimators,
                 last_layer=last_layer,
                 on_batch=on_batch,
@@ -176,7 +179,7 @@ def mc_dropout(
             if probabilistic is None:
                 raise ValueError("`probabilistic` must be set for regression tasks.")
             return _RegMCDropout(
-                model=model,
+                core_model=core_model,
                 num_estimators=num_estimators,
                 last_layer=last_layer,
                 on_batch=on_batch,
diff --git a/torch_uncertainty/models/wrappers/stochastic.py b/torch_uncertainty/models/wrappers/stochastic.py
index c369ff6b..ebd48689 100644
--- a/torch_uncertainty/models/wrappers/stochastic.py
+++ b/torch_uncertainty/models/wrappers/stochastic.py
@@ -7,12 +7,12 @@
 class StochasticModel(nn.Module):
     def __init__(
         self,
-        model: nn.Module,
+        core_model: nn.Module,
         num_samples: int,
         probabilistic: bool = False,
     ) -> None:
         super().__init__()
-        self.core_model = model
+        self.core_model = core_model
         self.num_samples = num_samples
         self.probabilistic = probabilistic
 
diff --git a/torch_uncertainty/models/wrappers/swa.py b/torch_uncertainty/models/wrappers/swa.py
index 4bb9e21a..000ef0f8 100644
--- a/torch_uncertainty/models/wrappers/swa.py
+++ b/torch_uncertainty/models/wrappers/swa.py
@@ -10,7 +10,7 @@ class SWA(nn.Module):
 
     def __init__(
         self,
-        model: nn.Module,
+        core_model: nn.Module,
         cycle_start: int,
         cycle_length: int,
     ) -> None:
@@ -21,7 +21,7 @@ def __init__(
         uses the base model for training.
 
         Args:
-            model (nn.Module): PyTorch model to be trained.
+            core_model (nn.Module): PyTorch model to be trained.
             cycle_start (int): Epoch to start SWA.
             cycle_length (int): Number of epochs between SWA updates.
 
@@ -31,7 +31,7 @@ def __init__(
         """
         super().__init__()
         _swa_checks(cycle_start, cycle_length)
-        self.core_model = model
+        self.core_model = core_model
         self.cycle_start = cycle_start
         self.cycle_length = cycle_length
 
diff --git a/torch_uncertainty/models/wrappers/swag.py b/torch_uncertainty/models/wrappers/swag.py
index 594f2afb..d8b60065 100644
--- a/torch_uncertainty/models/wrappers/swag.py
+++ b/torch_uncertainty/models/wrappers/swag.py
@@ -14,7 +14,7 @@ class SWAG(SWA):
 
     def __init__(
         self,
-        model: nn.Module,
+        core_model: nn.Module,
         cycle_start: int,
         cycle_length: int,
         scale: float = 1.0,
@@ -36,7 +36,7 @@ def __init__(
         the batchnorm statistics of the current SWAG samples.
 
         Args:
-            model (nn.Module): PyTorch model to be trained.
+            core_model (nn.Module): PyTorch model to be trained.
             cycle_start (int): Begininning of the first SWAG averaging cycle.
             cycle_length (int): Number of epochs between SWAG updates. The first update occurs at :attr:`cycle_start` + :attr:`cycle_length`.
             scale (float, optional): Scale of the Gaussian. Defaults to ``1.0``.
@@ -52,7 +52,7 @@ def __init__(
         Note:
             Modified from https://github.com/wjmaddox/swa_gaussian.
         """
-        super().__init__(model, cycle_start, cycle_length)
+        super().__init__(core_model, cycle_start, cycle_length)
         _swag_checks(scale, max_num_models, var_clamp)
 
         self.num_estimators = num_estimators
diff --git a/torch_uncertainty/models/wrappers/zero.py b/torch_uncertainty/models/wrappers/zero.py
index 88b572bd..103e41af 100644
--- a/torch_uncertainty/models/wrappers/zero.py
+++ b/torch_uncertainty/models/wrappers/zero.py
@@ -6,7 +6,7 @@
 
 class Zero(nn.Module):
     def __init__(
-        self, model: nn.Module, num_tta: int, filter_views: float = 0.1, eps: float = 1e-8
+        self, core_model: nn.Module, num_tta: int, filter_views: float = 0.1, eps: float = 1e-8
     ) -> None:
         """Zero for test-time adaptation.
 
@@ -16,7 +16,7 @@ def __init__(
         passed as argument (:attr:`model`).
 
         Args:
-            model (nn.Module): The inner model to train.
+            core_model (nn.Module): The inner model to train.
             num_tta (int): The number of views at evaluation time.
             filter_views (float): Filter out 1-:attr:`filter_views` of the predictions of the augmented views.
                 Defaults to ``0.1``.
@@ -24,7 +24,7 @@ def __init__(
         """
         super().__init__()
         _zero_checks(num_tta, filter_views, eps)
-        self.core_model = model
+        self.core_model = core_model
         self.filter = filter_views
         self.kept_views = int(filter_views * num_tta)
         self.num_tta = num_tta

From bd0d1c61045d3dda984ea5e0482a049e7f41b303 Mon Sep 17 00:00:00 2001
From: alafage <adrienlafage@outlook.com>
Date: Tue, 26 Aug 2025 16:20:21 +0200
Subject: [PATCH 18/25] :wrench: Update CIFAR10 experiments (script + config
 files)

---
 .../cifar10/configs/resnet.yaml               | 30 ---------
 .../cifar10/configs/resnet18/batched.yaml     | 37 +++++++----
 .../configs/resnet18/deep_ensembles.yaml      | 62 ++++++++++++++++++
 .../cifar10/configs/resnet18/masked.yaml      | 37 +++++++----
 .../cifar10/configs/resnet18/mimo.yaml        | 39 +++++++----
 .../cifar10/configs/resnet18/packed.yaml      | 40 ++++++++----
 .../cifar10/configs/resnet18/standard.yaml    | 29 +++++----
 .../cifar10/configs/resnet50/batched.yaml     | 40 ++++++++----
 .../configs/resnet50/deep_ensembles.yaml      | 64 +++++++++++++++++++
 .../cifar10/configs/resnet50/masked.yaml      | 35 ++++++----
 .../cifar10/configs/resnet50/mimo.yaml        | 37 +++++++----
 .../cifar10/configs/resnet50/packed.yaml      | 38 +++++++----
 .../cifar10/configs/resnet50/standard.yaml    | 27 +++++---
 .../cifar10/configs/wideresnet28x10.yaml      | 31 ---------
 .../configs/wideresnet28x10/batched.yaml      | 32 +++++++---
 .../wideresnet28x10/deep_ensembles.yaml       | 64 +++++++++++++++++++
 .../configs/wideresnet28x10/masked.yaml       | 33 ++++++----
 .../cifar10/configs/wideresnet28x10/mimo.yaml | 35 ++++++----
 .../configs/wideresnet28x10/packed.yaml       | 36 +++++++----
 .../configs/wideresnet28x10/standard.yaml     | 27 +++++---
 .../classification/cifar10/deep_ensembles.py  | 30 ---------
 experiments/classification/cifar10/main.py    | 20 ++++++
 experiments/classification/cifar10/resnet.py  | 28 --------
 experiments/classification/cifar10/vgg.py     | 28 --------
 .../classification/cifar10/wideresnet.py      | 28 --------
 25 files changed, 553 insertions(+), 354 deletions(-)
 delete mode 100644 experiments/classification/cifar10/configs/resnet.yaml
 create mode 100644 experiments/classification/cifar10/configs/resnet18/deep_ensembles.yaml
 create mode 100644 experiments/classification/cifar10/configs/resnet50/deep_ensembles.yaml
 delete mode 100644 experiments/classification/cifar10/configs/wideresnet28x10.yaml
 create mode 100644 experiments/classification/cifar10/configs/wideresnet28x10/deep_ensembles.yaml
 delete mode 100644 experiments/classification/cifar10/deep_ensembles.py
 create mode 100644 experiments/classification/cifar10/main.py
 delete mode 100644 experiments/classification/cifar10/resnet.py
 delete mode 100644 experiments/classification/cifar10/vgg.py
 delete mode 100644 experiments/classification/cifar10/wideresnet.py

diff --git a/experiments/classification/cifar10/configs/resnet.yaml b/experiments/classification/cifar10/configs/resnet.yaml
deleted file mode 100644
index 93649669..00000000
--- a/experiments/classification/cifar10/configs/resnet.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-# lightning.pytorch==2.1.3
-seed_everything: false
-eval_after_fit: true
-trainer:
-  accelerator: gpu
-  devices: 1
-  precision: 16-mixed
-  logger:
-    class_path: lightning.pytorch.loggers.TensorBoardLogger
-    init_args:
-      save_dir: logs/resnet
-      default_hp_metric: false
-  callbacks:
-    - class_path: torch_uncertainty.callbacks.TUClsCheckpoint
-    - class_path: lightning.pytorch.callbacks.LearningRateMonitor
-      init_args:
-        logging_interval: step
-    - class_path: lightning.pytorch.callbacks.EarlyStopping
-      init_args:
-        monitor: val/cls/Acc
-        patience: 1000
-        check_finite: true
-model:
-  num_classes: 10
-  in_channels: 3
-  loss: CrossEntropyLoss
-  style: cifar
-data:
-  root: ./data
-  batch_size: 128
diff --git a/experiments/classification/cifar10/configs/resnet18/batched.yaml b/experiments/classification/cifar10/configs/resnet18/batched.yaml
index 58d25ff5..34666473 100644
--- a/experiments/classification/cifar10/configs/resnet18/batched.yaml
+++ b/experiments/classification/cifar10/configs/resnet18/batched.yaml
@@ -1,4 +1,3 @@
-# lightning.pytorch==2.1.3
 seed_everything: false
 eval_after_fit: true
 trainer:
@@ -23,22 +22,34 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.batched_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      arch: 18
+      num_estimators: 4
+      style: cifar
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: batched
-  arch: 18
-  style: cifar
-  num_estimators: 4
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.05
-  momentum: 0.9
-  weight_decay: 5e-4
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.05
+    momentum: 0.9
+    weight_decay: 5e-4
 lr_scheduler:
-  milestones:
-    - 25
-    - 50
-  gamma: 0.1
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
+      - 25
+      - 50
+    gamma: 0.1
diff --git a/experiments/classification/cifar10/configs/resnet18/deep_ensembles.yaml b/experiments/classification/cifar10/configs/resnet18/deep_ensembles.yaml
new file mode 100644
index 00000000..845d9d28
--- /dev/null
+++ b/experiments/classification/cifar10/configs/resnet18/deep_ensembles.yaml
@@ -0,0 +1,62 @@
+# lightning.pytorch==2.1.3
+seed_everything: false
+eval_after_fit: true
+trainer:
+  accelerator: gpu
+  devices: 1
+  precision: 16-mixed
+  max_epochs: 75
+  logger:
+    class_path: lightning.pytorch.loggers.TensorBoardLogger
+    init_args:
+      save_dir: logs/resnet18
+      name: deep_ensembles
+      default_hp_metric: false
+  callbacks:
+    - class_path: torch_uncertainty.callbacks.TUClsCheckpoint
+    - class_path: lightning.pytorch.callbacks.LearningRateMonitor
+      init_args:
+        logging_interval: step
+    - class_path: lightning.pytorch.callbacks.EarlyStopping
+      init_args:
+        monitor: val/cls/Acc
+        patience: 1000
+        check_finite: true
+model:
+  model:
+    class_path: torch_uncertainty.models.deep_ensembles
+    init_args:
+      core_models:
+        class_path: torch_uncertainty.models.classification.resnet
+        init_args:
+          in_channels: 3
+          num_classes: 10
+          arch: 18
+          style: cifar
+      num_estimators: 4
+      task: classification
+      # eventually you can pass the checkpoints of standard resnet18 models here
+      # ckpt_paths: [path/to/ckpt1, path/to/ckpt2, path/to/ckpt3, path/to/ckpt4]
+  num_classes: 10
+  loss: CrossEntropyLoss
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
+data:
+  root: ./data
+  batch_size: 128
+optimizer:
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.2  # initial learning rate times 4 (num_estimators)
+    momentum: 0.9
+    weight_decay: 5e-4
+lr_scheduler:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
+      - 25
+      - 50
+    gamma: 0.1
diff --git a/experiments/classification/cifar10/configs/resnet18/masked.yaml b/experiments/classification/cifar10/configs/resnet18/masked.yaml
index bc147b97..9d371c48 100644
--- a/experiments/classification/cifar10/configs/resnet18/masked.yaml
+++ b/experiments/classification/cifar10/configs/resnet18/masked.yaml
@@ -23,23 +23,34 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.masked_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      arch: 18
+      num_estimators: 4
+      scale: 2
+      style: cifar
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: masked
-  arch: 18
-  style: cifar
-  num_estimators: 4
-  scale: 2
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.05
-  momentum: 0.9
-  weight_decay: 5e-4
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.05
+    momentum: 0.9
+    weight_decay: 5e-4
 lr_scheduler:
-  milestones:
-    - 25
-    - 50
-  gamma: 0.1
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
+      - 25
+      - 50
+    gamma: 0.1
diff --git a/experiments/classification/cifar10/configs/resnet18/mimo.yaml b/experiments/classification/cifar10/configs/resnet18/mimo.yaml
index cc043fc3..ec31f3f2 100644
--- a/experiments/classification/cifar10/configs/resnet18/mimo.yaml
+++ b/experiments/classification/cifar10/configs/resnet18/mimo.yaml
@@ -23,23 +23,36 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.mimo_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      arch: 18
+      num_estimators: 4
+      style: cifar
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: mimo
-  arch: 18
-  style: cifar
-  num_estimators: 4
-  rho: 1.0
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.MIMOBatchFormat
+    init_args:
+      num_estimators: 4
+      rho: 1.0
+      batch_repeat: 1
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.05
-  momentum: 0.9
-  weight_decay: 5e-4
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.05
+    momentum: 0.9
+    weight_decay: 5e-4
 lr_scheduler:
-  milestones:
-    - 25
-    - 50
-  gamma: 0.1
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
+      - 25
+      - 50
+    gamma: 0.1
diff --git a/experiments/classification/cifar10/configs/resnet18/packed.yaml b/experiments/classification/cifar10/configs/resnet18/packed.yaml
index c9cf1098..e4083e46 100644
--- a/experiments/classification/cifar10/configs/resnet18/packed.yaml
+++ b/experiments/classification/cifar10/configs/resnet18/packed.yaml
@@ -23,24 +23,36 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.packed_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      arch: 18
+      style: cifar
+      num_estimators: 4
+      alpha: 2
+      gamma: 2
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: packed
-  arch: 18
-  style: cifar
-  num_estimators: 4
-  alpha: 2
-  gamma: 2
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.05
-  momentum: 0.9
-  weight_decay: 5e-4
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.05
+    momentum: 0.9
+    weight_decay: 5e-4
 lr_scheduler:
-  milestones:
-    - 25
-    - 50
-  gamma: 0.1
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
+      - 25
+      - 50
+    gamma: 0.1
diff --git a/experiments/classification/cifar10/configs/resnet18/standard.yaml b/experiments/classification/cifar10/configs/resnet18/standard.yaml
index 046aaa15..72f918b7 100644
--- a/experiments/classification/cifar10/configs/resnet18/standard.yaml
+++ b/experiments/classification/cifar10/configs/resnet18/standard.yaml
@@ -23,21 +23,28 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.resnet
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      arch: 18
+      style: cifar
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: std
-  arch: 18
-  style: cifar
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.05
-  momentum: 0.9
-  weight_decay: 5e-4
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.05
+    momentum: 0.9
+    weight_decay: 5e-4
 lr_scheduler:
-  milestones:
-    - 25
-    - 50
-  gamma: 0.1
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
+      - 25
+      - 50
+    gamma: 0.1
diff --git a/experiments/classification/cifar10/configs/resnet50/batched.yaml b/experiments/classification/cifar10/configs/resnet50/batched.yaml
index dfdc6da8..6d3a4df1 100644
--- a/experiments/classification/cifar10/configs/resnet50/batched.yaml
+++ b/experiments/classification/cifar10/configs/resnet50/batched.yaml
@@ -23,24 +23,36 @@ trainer:
       patience: 1000
       check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.batched_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      arch: 50
+      num_estimators: 4
+      style: cifar
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: batched
-  arch: 50
-  style: cifar
-  num_estimators: 4
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.08
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.08
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
-  - 60
-  - 120
-  - 160
-  gamma: 0.2
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
+    - 60
+    - 120
+    - 160
+    gamma: 0.2
diff --git a/experiments/classification/cifar10/configs/resnet50/deep_ensembles.yaml b/experiments/classification/cifar10/configs/resnet50/deep_ensembles.yaml
new file mode 100644
index 00000000..8a977faf
--- /dev/null
+++ b/experiments/classification/cifar10/configs/resnet50/deep_ensembles.yaml
@@ -0,0 +1,64 @@
+# lightning.pytorch==2.1.3
+seed_everything: false
+eval_after_fit: true
+trainer:
+  accelerator: gpu
+  devices: 1
+  precision: 16-mixed
+  max_epochs: 200
+  logger:
+    class_path: lightning.pytorch.loggers.TensorBoardLogger
+    init_args:
+      save_dir: logs/resnet50
+      name: deep_ensembles
+      default_hp_metric: false
+  callbacks:
+    - class_path: torch_uncertainty.callbacks.TUClsCheckpoint
+    - class_path: lightning.pytorch.callbacks.LearningRateMonitor
+      init_args:
+        logging_interval: step
+    - class_path: lightning.pytorch.callbacks.EarlyStopping
+      init_args:
+        monitor: val/cls/Acc
+        patience: 1000
+        check_finite: true
+model:
+  model:
+    class_path: torch_uncertainty.models.deep_ensembles
+    init_args:
+      core_models:
+        class_path: torch_uncertainty.models.classification.resnet
+        init_args:
+          in_channels: 3
+          num_classes: 10
+          arch: 50
+          style: cifar
+      num_estimators: 4
+      task: classification
+      # eventually you can pass the checkpoints of standard resnet50 models here
+      # ckpt_paths: [path/to/ckpt1, path/to/ckpt2, path/to/ckpt3, path/to/ckpt4]
+  num_classes: 10
+  loss: CrossEntropyLoss
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
+data:
+  root: ./data
+  batch_size: 128
+optimizer:
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.4  # initial learning rate times 4 (num_estimators)
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
+lr_scheduler:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
+    - 60
+    - 120
+    - 160
+    gamma: 0.2
diff --git a/experiments/classification/cifar10/configs/resnet50/masked.yaml b/experiments/classification/cifar10/configs/resnet50/masked.yaml
index 7d5111e3..0963ae4f 100644
--- a/experiments/classification/cifar10/configs/resnet50/masked.yaml
+++ b/experiments/classification/cifar10/configs/resnet50/masked.yaml
@@ -23,25 +23,36 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.masked_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      arch: 50
+      num_estimators: 4
+      scale: 2
+      style: cifar
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: masked
-  arch: 50
-  style: cifar
-  num_estimators: 4
-  scale: 2
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar10/configs/resnet50/mimo.yaml b/experiments/classification/cifar10/configs/resnet50/mimo.yaml
index eed913da..1295f1dc 100644
--- a/experiments/classification/cifar10/configs/resnet50/mimo.yaml
+++ b/experiments/classification/cifar10/configs/resnet50/mimo.yaml
@@ -23,25 +23,38 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.mimo_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      arch: 50
+      num_estimators: 4
+      style: cifar
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: mimo
-  arch: 50
-  style: cifar
-  num_estimators: 4
-  rho: 1.0
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.MIMOBatchFormat
+    init_args:
+      num_estimators: 4
+      rho: 1.0
+      batch_repeat: 1
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar10/configs/resnet50/packed.yaml b/experiments/classification/cifar10/configs/resnet50/packed.yaml
index 766dda19..d25204b2 100644
--- a/experiments/classification/cifar10/configs/resnet50/packed.yaml
+++ b/experiments/classification/cifar10/configs/resnet50/packed.yaml
@@ -23,26 +23,38 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.packed_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      arch: 50
+      style: cifar
+      num_estimators: 4
+      alpha: 2
+      gamma: 2
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: packed
-  arch: 50
-  style: cifar
-  num_estimators: 4
-  alpha: 2
-  gamma: 2
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar10/configs/resnet50/standard.yaml b/experiments/classification/cifar10/configs/resnet50/standard.yaml
index 9b6b2b25..2b75b800 100644
--- a/experiments/classification/cifar10/configs/resnet50/standard.yaml
+++ b/experiments/classification/cifar10/configs/resnet50/standard.yaml
@@ -23,23 +23,30 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.resnet
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      arch: 50
+      style: cifar
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: std
-  arch: 50
-  style: cifar
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar10/configs/wideresnet28x10.yaml b/experiments/classification/cifar10/configs/wideresnet28x10.yaml
deleted file mode 100644
index 6e65ae32..00000000
--- a/experiments/classification/cifar10/configs/wideresnet28x10.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-# lightning.pytorch==2.1.3
-seed_everything: false
-eval_after_fit: true
-trainer:
-  accelerator: gpu
-  devices: 1
-  precision: 16-mixed
-  max_epochs: 200
-  logger:
-    class_path: lightning.pytorch.loggers.TensorBoardLogger
-    init_args:
-      save_dir: logs/wideresnet28x10
-      default_hp_metric: false
-  callbacks:
-    - class_path: torch_uncertainty.callbacks.TUClsCheckpoint
-    - class_path: lightning.pytorch.callbacks.LearningRateMonitor
-      init_args:
-        logging_interval: step
-    - class_path: lightning.pytorch.callbacks.EarlyStopping
-      init_args:
-        monitor: val/cls/Acc
-        patience: 1000
-        check_finite: true
-model:
-  num_classes: 10
-  in_channels: 3
-  loss: CrossEntropyLoss
-  style: cifar
-data:
-  root: ./data
-  batch_size: 128
diff --git a/experiments/classification/cifar10/configs/wideresnet28x10/batched.yaml b/experiments/classification/cifar10/configs/wideresnet28x10/batched.yaml
index 6aa5d8fb..9c4bca52 100644
--- a/experiments/classification/cifar10/configs/wideresnet28x10/batched.yaml
+++ b/experiments/classification/cifar10/configs/wideresnet28x10/batched.yaml
@@ -23,23 +23,35 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.batched_wideresnet28x10
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      num_estimators: 4
+      style: cifar
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: batched
-  style: cifar
-  num_estimators: 4
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.1
+    gamma: 0.2
diff --git a/experiments/classification/cifar10/configs/wideresnet28x10/deep_ensembles.yaml b/experiments/classification/cifar10/configs/wideresnet28x10/deep_ensembles.yaml
new file mode 100644
index 00000000..47512a9a
--- /dev/null
+++ b/experiments/classification/cifar10/configs/wideresnet28x10/deep_ensembles.yaml
@@ -0,0 +1,64 @@
+# lightning.pytorch==2.1.3
+seed_everything: false
+eval_after_fit: true
+trainer:
+  accelerator: gpu
+  devices: 1
+  precision: 16-mixed
+  max_epochs: 200
+  logger:
+    class_path: lightning.pytorch.loggers.TensorBoardLogger
+    init_args:
+      save_dir: logs/wideresnet28x10
+      name: deep_ensembles
+      default_hp_metric: false
+  callbacks:
+    - class_path: torch_uncertainty.callbacks.TUClsCheckpoint
+    - class_path: lightning.pytorch.callbacks.LearningRateMonitor
+      init_args:
+        logging_interval: step
+    - class_path: lightning.pytorch.callbacks.EarlyStopping
+      init_args:
+        monitor: val/cls/Acc
+        patience: 1000
+        check_finite: true
+model:
+  model:
+    class_path: torch_uncertainty.models.deep_ensembles
+    init_args:
+      core_models:
+        class_path: torch_uncertainty.models.classification.wideresnet28x10
+        init_args:
+          in_channels: 3
+          num_classes: 10
+          style: cifar
+          dropout_rate: 0.3
+      num_estimators: 4
+      task: classification
+      # eventually you can pass the checkpoints of standard wideresnet28x10 models here
+      # ckpt_paths: [path/to/ckpt1, path/to/ckpt2, path/to/ckpt3, path/to/ckpt4]
+  num_classes: 10
+  loss: CrossEntropyLoss
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
+data:
+  root: ./data
+  batch_size: 128
+optimizer:
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.4  # initial learning rate times 4 (num_estimators)
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
+lr_scheduler:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
+    - 60
+    - 120
+    - 160
+    gamma: 0.2
diff --git a/experiments/classification/cifar10/configs/wideresnet28x10/masked.yaml b/experiments/classification/cifar10/configs/wideresnet28x10/masked.yaml
index 8c5a255e..06203eb1 100644
--- a/experiments/classification/cifar10/configs/wideresnet28x10/masked.yaml
+++ b/experiments/classification/cifar10/configs/wideresnet28x10/masked.yaml
@@ -23,24 +23,35 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.masked_wideresnet28x10
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      num_estimators: 4
+      scale: 2
+      style: cifar
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: masked
-  style: cifar
-  num_estimators: 4
-  scale: 2
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.1
+    gamma: 0.2
diff --git a/experiments/classification/cifar10/configs/wideresnet28x10/mimo.yaml b/experiments/classification/cifar10/configs/wideresnet28x10/mimo.yaml
index b4026855..497c1bd3 100644
--- a/experiments/classification/cifar10/configs/wideresnet28x10/mimo.yaml
+++ b/experiments/classification/cifar10/configs/wideresnet28x10/mimo.yaml
@@ -23,24 +23,37 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.mimo_wideresnet28x10
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      num_estimators: 4
+      style: cifar
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: mimo
-  style: cifar
-  num_estimators: 4
-  rho: 1.0
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.MIMOBatchFormat
+    init_args:
+      num_estimators: 4
+      rho: 1.0
+      batch_repeat: 1
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.1
+    gamma: 0.1
diff --git a/experiments/classification/cifar10/configs/wideresnet28x10/packed.yaml b/experiments/classification/cifar10/configs/wideresnet28x10/packed.yaml
index 538cc9bb..468a2edc 100644
--- a/experiments/classification/cifar10/configs/wideresnet28x10/packed.yaml
+++ b/experiments/classification/cifar10/configs/wideresnet28x10/packed.yaml
@@ -23,25 +23,37 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.packed_wideresnet28x10
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      style: cifar
+      num_estimators: 4
+      alpha: 2
+      gamma: 2
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: packed
-  style: cifar
-  num_estimators: 4
-  alpha: 2
-  gamma: 2
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar10/configs/wideresnet28x10/standard.yaml b/experiments/classification/cifar10/configs/wideresnet28x10/standard.yaml
index b53521ad..2cff2924 100644
--- a/experiments/classification/cifar10/configs/wideresnet28x10/standard.yaml
+++ b/experiments/classification/cifar10/configs/wideresnet28x10/standard.yaml
@@ -23,23 +23,30 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.wideresnet28x10
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      style: cifar
+      dropout_rate: 0.3
   num_classes: 10
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: std
-  style: cifar
-  dropout_rate: 0.3
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar10/deep_ensembles.py b/experiments/classification/cifar10/deep_ensembles.py
deleted file mode 100644
index d7316811..00000000
--- a/experiments/classification/cifar10/deep_ensembles.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from pathlib import Path
-
-from torch_uncertainty import cli_main, init_args
-from torch_uncertainty.baselines import DeepEnsemblesBaseline
-from torch_uncertainty.datamodules import CIFAR10DataModule
-
-if __name__ == "__main__":
-    args = init_args(DeepEnsemblesBaseline, CIFAR10DataModule)
-    if args.root == "./data/":
-        root = Path(__file__).parent.absolute().parents[2]
-    else:
-        root = Path(args.root)
-
-    net_name = f"de-{args.backbone}-cifar10"
-
-    # datamodule
-    args.root = str(root / "data")
-    dm = CIFAR10DataModule(**vars(args))
-
-    # model
-    args.task = "classification"
-    model = DeepEnsemblesBaseline(
-        **vars(args),
-        num_classes=dm.num_classes,
-        in_channels=dm.num_channels,
-    )
-
-    args.test = -1
-
-    cli_main(model, dm, root, net_name, args)
diff --git a/experiments/classification/cifar10/main.py b/experiments/classification/cifar10/main.py
new file mode 100644
index 00000000..b54d3a87
--- /dev/null
+++ b/experiments/classification/cifar10/main.py
@@ -0,0 +1,20 @@
+import torch
+
+from torch_uncertainty import TULightningCLI
+from torch_uncertainty.datamodules import CIFAR10DataModule
+from torch_uncertainty.routines import ClassificationRoutine
+
+
+def cli_main() -> TULightningCLI:
+    return TULightningCLI(ClassificationRoutine, CIFAR10DataModule)
+
+
+if __name__ == "__main__":
+    torch.set_float32_matmul_precision("medium")
+    cli = cli_main()
+    if (
+        (not cli.trainer.fast_dev_run)
+        and cli.subcommand == "fit"
+        and cli._get(cli.config, "eval_after_fit")
+    ):
+        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/classification/cifar10/resnet.py b/experiments/classification/cifar10/resnet.py
deleted file mode 100644
index de03521d..00000000
--- a/experiments/classification/cifar10/resnet.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.classification import ResNetBaseline
-from torch_uncertainty.datamodules import CIFAR10DataModule
-
-
-class ResNetCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        super().add_arguments_to_parser(parser)
-        parser.add_optimizer_args(torch.optim.SGD)
-        parser.add_lr_scheduler_args(torch.optim.lr_scheduler.MultiStepLR)
-
-
-def cli_main() -> ResNetCLI:
-    return ResNetCLI(ResNetBaseline, CIFAR10DataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/classification/cifar10/vgg.py b/experiments/classification/cifar10/vgg.py
deleted file mode 100644
index 0f40d498..00000000
--- a/experiments/classification/cifar10/vgg.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.classification import VGGBaseline
-from torch_uncertainty.datamodules import CIFAR10DataModule
-
-
-class ResNetCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        super().add_arguments_to_parser(parser)
-        parser.add_optimizer_args(torch.optim.Adam)
-        parser.add_lr_scheduler_args(torch.optim.lr_scheduler.MultiStepLR)
-
-
-def cli_main() -> ResNetCLI:
-    return ResNetCLI(VGGBaseline, CIFAR10DataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/classification/cifar10/wideresnet.py b/experiments/classification/cifar10/wideresnet.py
deleted file mode 100644
index bf11e2d2..00000000
--- a/experiments/classification/cifar10/wideresnet.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.classification import WideResNetBaseline
-from torch_uncertainty.datamodules import CIFAR10DataModule
-
-
-class ResNetCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        super().add_arguments_to_parser(parser)
-        parser.add_optimizer_args(torch.optim.SGD)
-        parser.add_lr_scheduler_args(torch.optim.lr_scheduler.MultiStepLR)
-
-
-def cli_main() -> ResNetCLI:
-    return ResNetCLI(WideResNetBaseline, CIFAR10DataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")

From b3e883fe639185f80418d0be8d60a529a76fddb4 Mon Sep 17 00:00:00 2001
From: alafage <adrienlafage@outlook.com>
Date: Mon, 1 Sep 2025 17:31:13 +0200
Subject: [PATCH 19/25] :art: Fix outdated config files for all experiments

---
 experiments/classification/cifar10/readme.md  |  7 ----
 .../cifar100/configs/resnet.yaml              | 30 ---------------
 .../cifar100/configs/resnet18/batched.yaml    | 34 +++++++++++------
 .../cifar100/configs/resnet18/masked.yaml     | 35 +++++++++++------
 .../cifar100/configs/resnet18/mimo.yaml       | 37 ++++++++++++------
 .../cifar100/configs/resnet18/packed.yaml     | 38 ++++++++++++-------
 .../cifar100/configs/resnet18/standard.yaml   | 27 ++++++++-----
 .../cifar100/configs/resnet50/batched.yaml    | 34 +++++++++++------
 .../cifar100/configs/resnet50/masked.yaml     | 35 +++++++++++------
 .../cifar100/configs/resnet50/mimo.yaml       | 37 ++++++++++++------
 .../cifar100/configs/resnet50/packed.yaml     | 38 ++++++++++++-------
 .../cifar100/configs/resnet50/standard.yaml   | 33 +++++++++-------
 .../configs/wideresnet28x10/standard.yaml     | 27 ++++++++-----
 .../classification/cifar100/deep_ensembles.py | 30 ---------------
 experiments/classification/cifar100/main.py   | 20 ++++++++++
 experiments/classification/cifar100/resnet.py | 28 --------------
 experiments/classification/cifar100/vgg.py    | 28 --------------
 .../classification/cifar100/wideresnet.py     | 28 --------------
 .../configs/resnet18/standard.yaml            | 25 +++++++-----
 .../classification/tiny-imagenet/main.py      | 20 ++++++++++
 .../classification/tiny-imagenet/resnet.py    | 28 --------------
 experiments/depth/kitti/bts.py                | 28 --------------
 experiments/depth/kitti/configs/bts.yaml      | 23 ++++++-----
 experiments/depth/kitti/main.py               | 20 ++++++++++
 experiments/depth/nyu/bts.py                  | 28 --------------
 experiments/depth/nyu/configs/bts.yaml        | 27 +++++++------
 experiments/depth/nyu/main.py                 | 20 ++++++++++
 .../energy-efficiency/mlp/laplace.yaml        | 20 ++++++----
 .../configs/energy-efficiency/mlp/normal.yaml | 20 ++++++----
 .../energy-efficiency/mlp/point_wise.yaml     | 18 ++++++---
 .../configs/kin8nm/mlp/laplace.yaml           | 20 ++++++----
 .../configs/kin8nm/mlp/normal.yaml            | 20 ++++++----
 .../configs/kin8nm/mlp/point_wise.yaml        | 18 ++++++---
 .../naval-propulsion-plant/mlp/laplace.yaml   | 20 ++++++----
 .../naval-propulsion-plant/mlp/normal.yaml    | 20 ++++++----
 .../mlp/point_wise.yaml                       | 18 ++++++---
 .../configs/power-plant/mlp/laplace.yaml      | 20 ++++++----
 .../configs/power-plant/mlp/normal.yaml       | 20 ++++++----
 .../configs/power-plant/mlp/point_wise.yaml   | 18 ++++++---
 .../configs/protein/mlp/laplace.yaml          | 24 +++++++-----
 .../configs/protein/mlp/normal.yaml           | 24 +++++++-----
 .../configs/protein/mlp/point_wise.yaml       | 22 +++++++----
 .../configs/wine-quality-red/mlp/laplace.yaml | 24 +++++++-----
 .../configs/wine-quality-red/mlp/normal.yaml  | 24 +++++++-----
 .../wine-quality-red/mlp/point_wise.yaml      | 22 +++++++----
 .../configs/yacht/mlp/laplace.yaml            | 18 +++++----
 .../configs/yacht/mlp/normal.yaml             | 24 +++++++-----
 .../configs/yacht/mlp/point_wise.yaml         | 22 +++++++----
 .../segmentation/camvid/configs/deeplab.yaml  | 29 ++++++++------
 .../camvid/configs/segformer.yaml             | 20 ++++++----
 experiments/segmentation/camvid/main.py       |  0
 .../cityscapes/configs/deeplab.yaml           |  8 ++--
 experiments/segmentation/cityscapes/main.py   |  0
 .../segmentation/muad/configs/segformer.yaml  | 25 ------------
 experiments/segmentation/muad/segformer.py    | 27 -------------
 .../models/segmentation/__init__.py           |  1 +
 56 files changed, 689 insertions(+), 622 deletions(-)
 delete mode 100644 experiments/classification/cifar100/configs/resnet.yaml
 delete mode 100644 experiments/classification/cifar100/deep_ensembles.py
 create mode 100644 experiments/classification/cifar100/main.py
 delete mode 100644 experiments/classification/cifar100/resnet.py
 delete mode 100644 experiments/classification/cifar100/vgg.py
 delete mode 100644 experiments/classification/cifar100/wideresnet.py
 create mode 100644 experiments/classification/tiny-imagenet/main.py
 delete mode 100644 experiments/classification/tiny-imagenet/resnet.py
 delete mode 100644 experiments/depth/kitti/bts.py
 create mode 100644 experiments/depth/kitti/main.py
 delete mode 100644 experiments/depth/nyu/bts.py
 create mode 100644 experiments/depth/nyu/main.py
 create mode 100644 experiments/segmentation/camvid/main.py
 create mode 100644 experiments/segmentation/cityscapes/main.py
 delete mode 100644 experiments/segmentation/muad/configs/segformer.yaml
 delete mode 100644 experiments/segmentation/muad/segformer.py

diff --git a/experiments/classification/cifar10/readme.md b/experiments/classification/cifar10/readme.md
index 67905f7c..81462220 100644
--- a/experiments/classification/cifar10/readme.md
+++ b/experiments/classification/cifar10/readme.md
@@ -20,13 +20,6 @@ python resnet.py fit --config configs/resnet18/standard.yaml
 python resnet.py fit --config configs/resnet50/packed.yaml
 ```
 
-
-**Note:** In addition we provide a default resnet config file (`configs/resnet.yaml`) to enable the training of any ResNet model. Here a basic example to train a MIMO ResNet101 model with $4$ estimators and $\rho=1.0$:
-
-```bash
-python resnet.py fit --config configs/resnet.yaml --model.arch 101 --model.version mimo --model.num_estimators 4 --model.rho 1.0
-```
-
 ## Available configurations:
 
 ### ResNet
diff --git a/experiments/classification/cifar100/configs/resnet.yaml b/experiments/classification/cifar100/configs/resnet.yaml
deleted file mode 100644
index 7ba158cc..00000000
--- a/experiments/classification/cifar100/configs/resnet.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-# lightning.pytorch==2.1.3
-seed_everything: false
-eval_after_fit: true
-trainer:
-  accelerator: gpu
-  devices: 1
-  precision: 16-mixed
-  logger:
-    class_path: lightning.pytorch.loggers.TensorBoardLogger
-    init_args:
-      save_dir: logs/
-      default_hp_metric: false
-  callbacks:
-    - class_path: torch_uncertainty.callbacks.TUClsCheckpoint
-    - class_path: lightning.pytorch.callbacks.LearningRateMonitor
-      init_args:
-        logging_interval: step
-    - class_path: lightning.pytorch.callbacks.EarlyStopping
-      init_args:
-        monitor: val/cls/Acc
-        patience: 1000
-        check_finite: true
-model:
-  num_classes: 10
-  in_channels: 3
-  loss: CrossEntropyLoss
-  style: cifar
-data:
-  root: ./data
-  batch_size: 128
diff --git a/experiments/classification/cifar100/configs/resnet18/batched.yaml b/experiments/classification/cifar100/configs/resnet18/batched.yaml
index 133db7f6..59822288 100644
--- a/experiments/classification/cifar100/configs/resnet18/batched.yaml
+++ b/experiments/classification/cifar100/configs/resnet18/batched.yaml
@@ -23,24 +23,36 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.batched_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 100
+      arch: 18
+      num_estimators: 4
+      style: cifar
   num_classes: 100
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: batched
-  arch: 18
-  style: cifar
-  num_estimators: 4
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 1e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar100/configs/resnet18/masked.yaml b/experiments/classification/cifar100/configs/resnet18/masked.yaml
index e60b53c6..6fe334bb 100644
--- a/experiments/classification/cifar100/configs/resnet18/masked.yaml
+++ b/experiments/classification/cifar100/configs/resnet18/masked.yaml
@@ -23,25 +23,36 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.masked_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 100
+      arch: 18
+      num_estimators: 4
+      scale: 2
+      style: cifar
   num_classes: 100
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: masked
-  arch: 18
-  style: cifar
-  num_estimators: 4
-  scale: 2
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 1e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar100/configs/resnet18/mimo.yaml b/experiments/classification/cifar100/configs/resnet18/mimo.yaml
index c11fe806..aa553abe 100644
--- a/experiments/classification/cifar100/configs/resnet18/mimo.yaml
+++ b/experiments/classification/cifar100/configs/resnet18/mimo.yaml
@@ -23,25 +23,38 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.mimo_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 100
+      arch: 18
+      num_estimators: 4
+      style: cifar
   num_classes: 100
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: mimo
-  arch: 18
-  style: cifar
-  num_estimators: 4
-  rho: 1.0
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.MIMOBatchFormat
+    init_args:
+      num_estimators: 4
+      rho: 1.0
+      batch_repeat: 1
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 1e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar100/configs/resnet18/packed.yaml b/experiments/classification/cifar100/configs/resnet18/packed.yaml
index cd1704ae..5a5cf0d0 100644
--- a/experiments/classification/cifar100/configs/resnet18/packed.yaml
+++ b/experiments/classification/cifar100/configs/resnet18/packed.yaml
@@ -23,26 +23,38 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.packed_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      arch: 18
+      style: cifar
+      num_estimators: 4
+      alpha: 2
+      gamma: 2
   num_classes: 100
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: packed
-  arch: 18
-  style: cifar
-  num_estimators: 4
-  alpha: 2
-  gamma: 2
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 1e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar100/configs/resnet18/standard.yaml b/experiments/classification/cifar100/configs/resnet18/standard.yaml
index 86cbe552..acb41dc9 100644
--- a/experiments/classification/cifar100/configs/resnet18/standard.yaml
+++ b/experiments/classification/cifar100/configs/resnet18/standard.yaml
@@ -23,23 +23,30 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.resnet
+    init_args:
+      in_channels: 3
+      num_classes: 100
+      arch: 18
+      style: cifar
   num_classes: 100
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: std
-  arch: 18
-  style: cifar
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 1e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar100/configs/resnet50/batched.yaml b/experiments/classification/cifar100/configs/resnet50/batched.yaml
index 607f53c4..04c878f2 100644
--- a/experiments/classification/cifar100/configs/resnet50/batched.yaml
+++ b/experiments/classification/cifar100/configs/resnet50/batched.yaml
@@ -23,24 +23,36 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.batched_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 100
+      arch: 50
+      num_estimators: 4
+      style: cifar
   num_classes: 100
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: batched
-  arch: 50
-  style: cifar
-  num_estimators: 4
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.08
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.08
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar100/configs/resnet50/masked.yaml b/experiments/classification/cifar100/configs/resnet50/masked.yaml
index 79ab9f20..01b0b636 100644
--- a/experiments/classification/cifar100/configs/resnet50/masked.yaml
+++ b/experiments/classification/cifar100/configs/resnet50/masked.yaml
@@ -23,25 +23,36 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.masked_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 100
+      arch: 50
+      num_estimators: 4
+      scale: 2
+      style: cifar
   num_classes: 100
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: masked
-  arch: 50
-  style: cifar
-  num_estimators: 4
-  scale: 2
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar100/configs/resnet50/mimo.yaml b/experiments/classification/cifar100/configs/resnet50/mimo.yaml
index 457a8b13..f671020f 100644
--- a/experiments/classification/cifar100/configs/resnet50/mimo.yaml
+++ b/experiments/classification/cifar100/configs/resnet50/mimo.yaml
@@ -23,25 +23,38 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.mimo_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 100
+      arch: 50
+      num_estimators: 4
+      style: cifar
   num_classes: 100
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: mimo
-  arch: 50
-  style: cifar
-  num_estimators: 4
-  rho: 1.0
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.MIMOBatchFormat
+    init_args:
+      num_estimators: 4
+      rho: 1.0
+      batch_repeat: 1
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar100/configs/resnet50/packed.yaml b/experiments/classification/cifar100/configs/resnet50/packed.yaml
index f664fe23..e253dbed 100644
--- a/experiments/classification/cifar100/configs/resnet50/packed.yaml
+++ b/experiments/classification/cifar100/configs/resnet50/packed.yaml
@@ -23,26 +23,38 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.packed_resnet
+    init_args:
+      in_channels: 3
+      num_classes: 10
+      arch: 50
+      style: cifar
+      num_estimators: 4
+      alpha: 2
+      gamma: 2
   num_classes: 100
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: packed
-  arch: 50
-  style: cifar
-  num_estimators: 4
-  alpha: 2
-  gamma: 2
+  is_ensemble: true
+  format_batch_fn:
+    class_path: torch_uncertainty.transforms.RepeatTarget
+    init_args:
+      num_repeats: 4
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar100/configs/resnet50/standard.yaml b/experiments/classification/cifar100/configs/resnet50/standard.yaml
index 6f31e824..6f6fae82 100644
--- a/experiments/classification/cifar100/configs/resnet50/standard.yaml
+++ b/experiments/classification/cifar100/configs/resnet50/standard.yaml
@@ -23,23 +23,30 @@ trainer:
       patience: 1000
       check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.resnet
+    init_args:
+      in_channels: 3
+      num_classes: 100
+      arch: 50
+      style: cifar
   num_classes: 100
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: std
-  arch: 50
-  style: cifar
 data:
   root: ./data
   batch_size: 128
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
-  - 60
-  - 120
-  - 160
-  gamma: 0.2
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
+    - 60
+    - 120
+    - 160
+    gamma: 0.2
diff --git a/experiments/classification/cifar100/configs/wideresnet28x10/standard.yaml b/experiments/classification/cifar100/configs/wideresnet28x10/standard.yaml
index 5537624d..c1629269 100644
--- a/experiments/classification/cifar100/configs/wideresnet28x10/standard.yaml
+++ b/experiments/classification/cifar100/configs/wideresnet28x10/standard.yaml
@@ -23,24 +23,31 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.wideresnet28x10
+    init_args:
+      in_channels: 3
+      num_classes: 100
+      style: cifar
+      dropout_rate: 0.3
   num_classes: 100
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: std
-  style: cifar
-  dropout_rate: 0.3
 data:
   root: ./data
   batch_size: 128
   auto_augment: rand-m9-n2-mstd1
 optimizer:
-  lr: 0.1
-  momentum: 0.9
-  weight_decay: 5e-4
-  nesterov: true
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 5e-4
+    nesterov: true
 lr_scheduler:
-  milestones:
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
     - 60
     - 120
     - 160
-  gamma: 0.2
+    gamma: 0.2
diff --git a/experiments/classification/cifar100/deep_ensembles.py b/experiments/classification/cifar100/deep_ensembles.py
deleted file mode 100644
index 3a1ed65f..00000000
--- a/experiments/classification/cifar100/deep_ensembles.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from pathlib import Path
-
-from torch_uncertainty import cli_main, init_args
-from torch_uncertainty.baselines import DeepEnsemblesBaseline
-from torch_uncertainty.datamodules import CIFAR100DataModule
-
-if __name__ == "__main__":
-    args = init_args(DeepEnsemblesBaseline, CIFAR100DataModule)
-    if args.root == "./data/":
-        root = Path(__file__).parent.absolute().parents[2]
-    else:
-        root = Path(args.root)
-
-    net_name = f"de-{args.backbone}-cifar100"
-
-    # datamodule
-    args.root = str(root / "data")
-    dm = CIFAR100DataModule(**vars(args))
-
-    # model
-    args.task = "classification"
-    model = DeepEnsemblesBaseline(
-        **vars(args),
-        num_classes=dm.num_classes,
-        in_channels=dm.num_channels,
-    )
-
-    args.test = -1
-
-    cli_main(model, dm, root, net_name, args)
diff --git a/experiments/classification/cifar100/main.py b/experiments/classification/cifar100/main.py
new file mode 100644
index 00000000..336b6ca1
--- /dev/null
+++ b/experiments/classification/cifar100/main.py
@@ -0,0 +1,20 @@
+import torch
+
+from torch_uncertainty import TULightningCLI
+from torch_uncertainty.datamodules import CIFAR100DataModule
+from torch_uncertainty.routines import ClassificationRoutine
+
+
+def cli_main() -> TULightningCLI:
+    return TULightningCLI(ClassificationRoutine, CIFAR100DataModule)
+
+
+if __name__ == "__main__":
+    torch.set_float32_matmul_precision("medium")
+    cli = cli_main()
+    if (
+        (not cli.trainer.fast_dev_run)
+        and cli.subcommand == "fit"
+        and cli._get(cli.config, "eval_after_fit")
+    ):
+        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/classification/cifar100/resnet.py b/experiments/classification/cifar100/resnet.py
deleted file mode 100644
index 8c8b8a00..00000000
--- a/experiments/classification/cifar100/resnet.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.classification import ResNetBaseline
-from torch_uncertainty.datamodules import CIFAR100DataModule
-
-
-class ResNetCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        super().add_arguments_to_parser(parser)
-        parser.add_optimizer_args(torch.optim.SGD)
-        parser.add_lr_scheduler_args(torch.optim.lr_scheduler.MultiStepLR)
-
-
-def cli_main() -> ResNetCLI:
-    return ResNetCLI(ResNetBaseline, CIFAR100DataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/classification/cifar100/vgg.py b/experiments/classification/cifar100/vgg.py
deleted file mode 100644
index af07c997..00000000
--- a/experiments/classification/cifar100/vgg.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.classification import VGGBaseline
-from torch_uncertainty.datamodules import CIFAR100DataModule
-
-
-class ResNetCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        super().add_arguments_to_parser(parser)
-        parser.add_optimizer_args(torch.optim.Adam)
-        parser.add_lr_scheduler_args(torch.optim.lr_scheduler.MultiStepLR)
-
-
-def cli_main() -> ResNetCLI:
-    return ResNetCLI(VGGBaseline, CIFAR100DataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/classification/cifar100/wideresnet.py b/experiments/classification/cifar100/wideresnet.py
deleted file mode 100644
index f29ad2ff..00000000
--- a/experiments/classification/cifar100/wideresnet.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.classification import WideResNetBaseline
-from torch_uncertainty.datamodules import CIFAR100DataModule
-
-
-class ResNetCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        super().add_arguments_to_parser(parser)
-        parser.add_optimizer_args(torch.optim.SGD)
-        parser.add_lr_scheduler_args(torch.optim.lr_scheduler.MultiStepLR)
-
-
-def cli_main() -> ResNetCLI:
-    return ResNetCLI(WideResNetBaseline, CIFAR100DataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/classification/tiny-imagenet/configs/resnet18/standard.yaml b/experiments/classification/tiny-imagenet/configs/resnet18/standard.yaml
index 86017395..3823f24c 100644
--- a/experiments/classification/tiny-imagenet/configs/resnet18/standard.yaml
+++ b/experiments/classification/tiny-imagenet/configs/resnet18/standard.yaml
@@ -23,19 +23,26 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.classification.resnet
+    init_args:
+      in_channels: 3
+      num_classes: 100
+      arch: 18
+      style: cifar
   num_classes: 100
-  in_channels: 3
   loss: CrossEntropyLoss
-  version: std
-  arch: 18
-  style: cifar
 data:
   root: ./data
   batch_size: 256
 optimizer:
-  lr: 0.2
-  momentum: 0.9
-  weight_decay: 1e-4
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.2
+    momentum: 0.9
+    weight_decay: 1e-4
 lr_scheduler:
-  eta_min: 0.0
-  T_max: 200
+  class_path: torch.optim.lr_scheduler.CosineAnnealingLR
+  init_args:
+    eta_min: 0.0
+    T_max: 200
diff --git a/experiments/classification/tiny-imagenet/main.py b/experiments/classification/tiny-imagenet/main.py
new file mode 100644
index 00000000..d1509fee
--- /dev/null
+++ b/experiments/classification/tiny-imagenet/main.py
@@ -0,0 +1,20 @@
+import torch
+
+from torch_uncertainty import TULightningCLI
+from torch_uncertainty.datamodules import TinyImageNetDataModule
+from torch_uncertainty.routines import ClassificationRoutine
+
+
+def cli_main() -> TULightningCLI:
+    return TULightningCLI(ClassificationRoutine, TinyImageNetDataModule)
+
+
+if __name__ == "__main__":
+    torch.set_float32_matmul_precision("medium")
+    cli = cli_main()
+    if (
+        (not cli.trainer.fast_dev_run)
+        and cli.subcommand == "fit"
+        and cli._get(cli.config, "eval_after_fit")
+    ):
+        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/classification/tiny-imagenet/resnet.py b/experiments/classification/tiny-imagenet/resnet.py
deleted file mode 100644
index 1959cf6c..00000000
--- a/experiments/classification/tiny-imagenet/resnet.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.classification import ResNetBaseline
-from torch_uncertainty.datamodules import TinyImageNetDataModule
-
-
-class ResNetCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        super().add_arguments_to_parser(parser)
-        parser.add_optimizer_args(torch.optim.SGD)
-        parser.add_lr_scheduler_args(torch.optim.lr_scheduler.CosineAnnealingLR)
-
-
-def cli_main() -> ResNetCLI:
-    return ResNetCLI(ResNetBaseline, TinyImageNetDataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/depth/kitti/bts.py b/experiments/depth/kitti/bts.py
deleted file mode 100644
index d69b870e..00000000
--- a/experiments/depth/kitti/bts.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-from torch.optim.lr_scheduler import PolynomialLR
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.depth import BTSBaseline
-from torch_uncertainty.datamodules.depth import KITTIDataModule
-
-
-class BTSCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        parser.add_optimizer_args(torch.optim.AdamW)
-        parser.add_lr_scheduler_args(PolynomialLR)
-
-
-def cli_main() -> BTSCLI:
-    return BTSCLI(BTSBaseline, KITTIDataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/depth/kitti/configs/bts.yaml b/experiments/depth/kitti/configs/bts.yaml
index 55bdc780..bd4a9cc1 100644
--- a/experiments/depth/kitti/configs/bts.yaml
+++ b/experiments/depth/kitti/configs/bts.yaml
@@ -22,15 +22,16 @@ trainer:
       init_args:
         logging_interval: step
 model:
+  model:
+    class_path: torch_uncertainty.models.depth.bts_resnet
+    init_args:
+      arch: 50
+      max_depth: 80.0
+      pretrained_backbone: true
   loss:
     class_path: torch_uncertainty.metrics.SILog
     init_args:
-      sqrt: true
-  version: std
-  arch: 50
-  max_depth: 80.0
-  num_estimators: 1
-  pretrained_backbone: true
+      sqrt: true  
 data:
   root: ./data
   batch_size: 4
@@ -42,7 +43,11 @@ data:
     - 1216
   num_workers: 4
 optimizer:
-  lr: 1e-4
+  class_path: torch.optim.AdamW
+  init_args:
+    lr: 1e-4
 lr_scheduler:
-  power: 0.9
-  total_iters: 50
+  class_path: torch.optim.lr_scheduler.PolynomialLR
+  init_args:
+    power: 0.9
+    total_iters: 50
diff --git a/experiments/depth/kitti/main.py b/experiments/depth/kitti/main.py
new file mode 100644
index 00000000..f0e7e2d1
--- /dev/null
+++ b/experiments/depth/kitti/main.py
@@ -0,0 +1,20 @@
+import torch
+
+from torch_uncertainty import TULightningCLI
+from torch_uncertainty.datamodules.depth import KITTIDataModule
+from torch_uncertainty.routines import PixelRegressionRoutine
+
+
+def cli_main() -> TULightningCLI:
+    return TULightningCLI(PixelRegressionRoutine, KITTIDataModule)
+
+
+if __name__ == "__main__":
+    torch.set_float32_matmul_precision("medium")
+    cli = cli_main()
+    if (
+        (not cli.trainer.fast_dev_run)
+        and cli.subcommand == "fit"
+        and cli._get(cli.config, "eval_after_fit")
+    ):
+        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/depth/nyu/bts.py b/experiments/depth/nyu/bts.py
deleted file mode 100644
index 0e419abc..00000000
--- a/experiments/depth/nyu/bts.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-from torch.optim.lr_scheduler import PolynomialLR
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.depth import BTSBaseline
-from torch_uncertainty.datamodules.depth import NYUv2DataModule
-
-
-class BTSCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        parser.add_optimizer_args(torch.optim.AdamW)
-        parser.add_lr_scheduler_args(PolynomialLR)
-
-
-def cli_main() -> BTSCLI:
-    return BTSCLI(BTSBaseline, NYUv2DataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/depth/nyu/configs/bts.yaml b/experiments/depth/nyu/configs/bts.yaml
index db399bfe..3cd33dc8 100644
--- a/experiments/depth/nyu/configs/bts.yaml
+++ b/experiments/depth/nyu/configs/bts.yaml
@@ -22,15 +22,16 @@ trainer:
       init_args:
         logging_interval: step
 model:
+  model:
+    class_path: torch_uncertainty.models.depth.bts_resnet
+    init_args:
+      arch: 50
+      max_depth: 10.0
+      pretrained_backbone: true
   loss:
     class_path: torch_uncertainty.metrics.SILog
     init_args:
-      sqrt: true
-  version: std
-  arch: 50
-  max_depth: 10.0
-  num_estimators: 1
-  pretrained_backbone: true
+      sqrt: true  
 data:
   root: ./data
   batch_size: 8
@@ -44,9 +45,13 @@ data:
   max_depth: 10.0
   min_depth: 1e-3
 optimizer:
-  lr: 1e-4
-  weight_decay: 1e-2
-  eps: 1e-3
+  class_path: torch.optim.AdamW
+  init_args:
+    lr: 1e-4
+    weight_decay: 1e-2
+    eps: 1e-3
 lr_scheduler:
-  power: 0.9
-  total_iters: 100
+  class_path: torch.optim.lr_scheduler.PolynomialLR
+  init_args:
+    power: 0.9
+    total_iters: 100
diff --git a/experiments/depth/nyu/main.py b/experiments/depth/nyu/main.py
new file mode 100644
index 00000000..e55d0ce9
--- /dev/null
+++ b/experiments/depth/nyu/main.py
@@ -0,0 +1,20 @@
+import torch
+
+from torch_uncertainty import TULightningCLI
+from torch_uncertainty.datamodules.depth import NYUv2DataModule
+from torch_uncertainty.routines import PixelRegressionRoutine
+
+
+def cli_main() -> TULightningCLI:
+    return TULightningCLI(PixelRegressionRoutine, NYUv2DataModule)
+
+
+if __name__ == "__main__":
+    torch.set_float32_matmul_precision("medium")
+    cli = cli_main()
+    if (
+        (not cli.trainer.fast_dev_run)
+        and cli.subcommand == "fit"
+        and cli._get(cli.config, "eval_after_fit")
+    ):
+        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/regression/uci_datasets/configs/energy-efficiency/mlp/laplace.yaml b/experiments/regression/uci_datasets/configs/energy-efficiency/mlp/laplace.yaml
index 86cc7e29..0b871a1e 100644
--- a/experiments/regression/uci_datasets/configs/energy-efficiency/mlp/laplace.yaml
+++ b/experiments/regression/uci_datasets/configs/energy-efficiency/mlp/laplace.yaml
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 8
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: laplace
   output_dim: 1
-  in_features: 8
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  distribution: laplace
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
   dataset_name: energy-efficiency
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/energy-efficiency/mlp/normal.yaml b/experiments/regression/uci_datasets/configs/energy-efficiency/mlp/normal.yaml
index 41bc571d..7c9f9ed4 100644
--- a/experiments/regression/uci_datasets/configs/energy-efficiency/mlp/normal.yaml
+++ b/experiments/regression/uci_datasets/configs/energy-efficiency/mlp/normal.yaml
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 8
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: normal
   output_dim: 1
-  in_features: 8
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  distribution: normal
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
   dataset_name: energy-efficiency
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/energy-efficiency/mlp/point_wise.yaml b/experiments/regression/uci_datasets/configs/energy-efficiency/mlp/point_wise.yaml
index 05cfd417..0fe33248 100644
--- a/experiments/regression/uci_datasets/configs/energy-efficiency/mlp/point_wise.yaml
+++ b/experiments/regression/uci_datasets/configs/energy-efficiency/mlp/point_wise.yaml
@@ -23,16 +23,22 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 8
+      num_outputs: 1
+      hidden_dims:
+        - 50
   output_dim: 1
-  in_features: 8
-  hidden_dims:
-    - 50
   loss: MSELoss
-  version: std
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
   dataset_name: concrete
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/kin8nm/mlp/laplace.yaml b/experiments/regression/uci_datasets/configs/kin8nm/mlp/laplace.yaml
index 12b7b5e6..c73ec2e6 100644
--- a/experiments/regression/uci_datasets/configs/kin8nm/mlp/laplace.yaml
+++ b/experiments/regression/uci_datasets/configs/kin8nm/mlp/laplace.yaml
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 8
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: laplace
   output_dim: 1
-  in_features: 8
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  dist_family: laplace
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
   dataset_name: kin8nm
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/kin8nm/mlp/normal.yaml b/experiments/regression/uci_datasets/configs/kin8nm/mlp/normal.yaml
index 2b2eb698..6eb01d38 100644
--- a/experiments/regression/uci_datasets/configs/kin8nm/mlp/normal.yaml
+++ b/experiments/regression/uci_datasets/configs/kin8nm/mlp/normal.yaml
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 8
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: normal
   output_dim: 1
-  in_features: 8
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  dist_family: normal
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
   dataset_name: kin8nm
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/kin8nm/mlp/point_wise.yaml b/experiments/regression/uci_datasets/configs/kin8nm/mlp/point_wise.yaml
index 3cd74e4a..60b1f550 100644
--- a/experiments/regression/uci_datasets/configs/kin8nm/mlp/point_wise.yaml
+++ b/experiments/regression/uci_datasets/configs/kin8nm/mlp/point_wise.yaml
@@ -23,16 +23,22 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 8
+      num_outputs: 1
+      hidden_dims:
+        - 50
   output_dim: 1
-  in_features: 8
-  hidden_dims:
-    - 50
   loss: MSELoss
-  version: std
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
   dataset_name: kin8nm
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/naval-propulsion-plant/mlp/laplace.yaml b/experiments/regression/uci_datasets/configs/naval-propulsion-plant/mlp/laplace.yaml
index 76d5394e..a2eee4a7 100644
--- a/experiments/regression/uci_datasets/configs/naval-propulsion-plant/mlp/laplace.yaml
+++ b/experiments/regression/uci_datasets/configs/naval-propulsion-plant/mlp/laplace.yaml
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 16
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: laplace
   output_dim: 1
-  in_features: 16
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  distribution: laplace
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
   dataset_name: naval-propulsion-plant
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/naval-propulsion-plant/mlp/normal.yaml b/experiments/regression/uci_datasets/configs/naval-propulsion-plant/mlp/normal.yaml
index 43cb1c2b..84d9c86c 100644
--- a/experiments/regression/uci_datasets/configs/naval-propulsion-plant/mlp/normal.yaml
+++ b/experiments/regression/uci_datasets/configs/naval-propulsion-plant/mlp/normal.yaml
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 16
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: normal
   output_dim: 1
-  in_features: 16
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  distribution: normal
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
   dataset_name: naval-propulsion-plant
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/naval-propulsion-plant/mlp/point_wise.yaml b/experiments/regression/uci_datasets/configs/naval-propulsion-plant/mlp/point_wise.yaml
index f68c63a8..59032cfa 100644
--- a/experiments/regression/uci_datasets/configs/naval-propulsion-plant/mlp/point_wise.yaml
+++ b/experiments/regression/uci_datasets/configs/naval-propulsion-plant/mlp/point_wise.yaml
@@ -23,16 +23,22 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 16
+      num_outputs: 1
+      hidden_dims:
+        - 50
   output_dim: 1
-  in_features: 16
-  hidden_dims:
-    - 50
   loss: MSELoss
-  version: std
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
   dataset_name: naval-propulsion-plant
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/power-plant/mlp/laplace.yaml b/experiments/regression/uci_datasets/configs/power-plant/mlp/laplace.yaml
index 80ffa4ae..b737a5dc 100644
--- a/experiments/regression/uci_datasets/configs/power-plant/mlp/laplace.yaml
+++ b/experiments/regression/uci_datasets/configs/power-plant/mlp/laplace.yaml
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 4
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: laplace
   output_dim: 1
-  in_features: 4
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  distribution: laplace
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
   dataset_name: power-plant
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/power-plant/mlp/normal.yaml b/experiments/regression/uci_datasets/configs/power-plant/mlp/normal.yaml
index 5adc86d9..1ca97fdd 100644
--- a/experiments/regression/uci_datasets/configs/power-plant/mlp/normal.yaml
+++ b/experiments/regression/uci_datasets/configs/power-plant/mlp/normal.yaml
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 4
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: normal
   output_dim: 1
-  in_features: 4
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  distribution: normal
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
   dataset_name: power-plant
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/power-plant/mlp/point_wise.yaml b/experiments/regression/uci_datasets/configs/power-plant/mlp/point_wise.yaml
index e83ea686..1a7acc4d 100644
--- a/experiments/regression/uci_datasets/configs/power-plant/mlp/point_wise.yaml
+++ b/experiments/regression/uci_datasets/configs/power-plant/mlp/point_wise.yaml
@@ -23,16 +23,22 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 4
+      num_outputs: 1
+      hidden_dims:
+        - 50
   output_dim: 1
-  in_features: 4
-  hidden_dims:
-    - 50
   loss: MSELoss
-  version: std
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
   dataset_name: power-plant
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/protein/mlp/laplace.yaml b/experiments/regression/uci_datasets/configs/protein/mlp/laplace.yaml
index 809c6d88..d8e21476 100644
--- a/experiments/regression/uci_datasets/configs/protein/mlp/laplace.yaml
+++ b/experiments/regression/uci_datasets/configs/protein/mlp/laplace.yaml
@@ -9,7 +9,7 @@ trainer:
   logger:
     class_path: lightning.pytorch.loggers.TensorBoardLogger
     init_args:
-      save_dir: logs/concrete/mlp/laplace
+      save_dir: logs/protein/mlp/laplace
       name: standard
       default_hp_metric: false
   callbacks:
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 9
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: laplace
   output_dim: 1
-  in_features: 9
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  distribution: laplace
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
-  dataset_name: concrete
+  dataset_name: protein
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/protein/mlp/normal.yaml b/experiments/regression/uci_datasets/configs/protein/mlp/normal.yaml
index b6aa744f..b3e62c74 100644
--- a/experiments/regression/uci_datasets/configs/protein/mlp/normal.yaml
+++ b/experiments/regression/uci_datasets/configs/protein/mlp/normal.yaml
@@ -9,7 +9,7 @@ trainer:
   logger:
     class_path: lightning.pytorch.loggers.TensorBoardLogger
     init_args:
-      save_dir: logs/concrete/mlp/normal
+      save_dir: logs/protein/mlp/normal
       name: standard
       default_hp_metric: false
   callbacks:
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 9
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: normal
   output_dim: 1
-  in_features: 9
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  distribution: normal
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
-  dataset_name: concrete
+  dataset_name: protein
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/protein/mlp/point_wise.yaml b/experiments/regression/uci_datasets/configs/protein/mlp/point_wise.yaml
index 3fe2bf3b..24734392 100644
--- a/experiments/regression/uci_datasets/configs/protein/mlp/point_wise.yaml
+++ b/experiments/regression/uci_datasets/configs/protein/mlp/point_wise.yaml
@@ -9,7 +9,7 @@ trainer:
   logger:
     class_path: lightning.pytorch.loggers.TensorBoardLogger
     init_args:
-      save_dir: logs/concrete/mlp/point_wise
+      save_dir: logs/protein/mlp/point_wise
       name: standard
       default_hp_metric: false
   callbacks:
@@ -23,16 +23,22 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 9
+      num_outputs: 1
+      hidden_dims:
+        - 50
   output_dim: 1
-  in_features: 9
-  hidden_dims:
-    - 50
   loss: MSELoss
-  version: std
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
-  dataset_name: concrete
+  dataset_name: protein
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/wine-quality-red/mlp/laplace.yaml b/experiments/regression/uci_datasets/configs/wine-quality-red/mlp/laplace.yaml
index f5e422e3..3f216e0d 100644
--- a/experiments/regression/uci_datasets/configs/wine-quality-red/mlp/laplace.yaml
+++ b/experiments/regression/uci_datasets/configs/wine-quality-red/mlp/laplace.yaml
@@ -9,7 +9,7 @@ trainer:
   logger:
     class_path: lightning.pytorch.loggers.TensorBoardLogger
     init_args:
-      save_dir: logs/concrete/mlp/laplace
+      save_dir: logs/wine-quality-red/mlp/laplace
       name: standard
       default_hp_metric: false
   callbacks:
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 11
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: laplace
   output_dim: 1
-  in_features: 11
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  distribution: laplace
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
-  dataset_name: concrete
+  dataset_name: wine-quality-red
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/wine-quality-red/mlp/normal.yaml b/experiments/regression/uci_datasets/configs/wine-quality-red/mlp/normal.yaml
index e1c552b8..3de91301 100644
--- a/experiments/regression/uci_datasets/configs/wine-quality-red/mlp/normal.yaml
+++ b/experiments/regression/uci_datasets/configs/wine-quality-red/mlp/normal.yaml
@@ -9,7 +9,7 @@ trainer:
   logger:
     class_path: lightning.pytorch.loggers.TensorBoardLogger
     init_args:
-      save_dir: logs/concrete/mlp/normal
+      save_dir: logs/wine-quality-red/mlp/normal
       name: standard
       default_hp_metric: false
   callbacks:
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 11
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: normal
   output_dim: 1
-  in_features: 11
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  distribution: normal
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
-  dataset_name: concrete
+  dataset_name: wine-quality-red
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/wine-quality-red/mlp/point_wise.yaml b/experiments/regression/uci_datasets/configs/wine-quality-red/mlp/point_wise.yaml
index ee8e813d..30496c6e 100644
--- a/experiments/regression/uci_datasets/configs/wine-quality-red/mlp/point_wise.yaml
+++ b/experiments/regression/uci_datasets/configs/wine-quality-red/mlp/point_wise.yaml
@@ -9,7 +9,7 @@ trainer:
   logger:
     class_path: lightning.pytorch.loggers.TensorBoardLogger
     init_args:
-      save_dir: logs/concrete/mlp/point_wise
+      save_dir: logs/wine-quality-red/mlp/point_wise
       name: standard
       default_hp_metric: false
   callbacks:
@@ -23,16 +23,22 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 11
+      num_outputs: 1
+      hidden_dims:
+        - 50
   output_dim: 1
-  in_features: 11
-  hidden_dims:
-    - 50
   loss: MSELoss
-  version: std
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
-  dataset_name: concrete
+  dataset_name: wine-quality-red
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/yacht/mlp/laplace.yaml b/experiments/regression/uci_datasets/configs/yacht/mlp/laplace.yaml
index 15658cda..d17c66fa 100644
--- a/experiments/regression/uci_datasets/configs/yacht/mlp/laplace.yaml
+++ b/experiments/regression/uci_datasets/configs/yacht/mlp/laplace.yaml
@@ -9,7 +9,7 @@ trainer:
   logger:
     class_path: lightning.pytorch.loggers.TensorBoardLogger
     init_args:
-      save_dir: logs/concrete/mlp/laplace
+      save_dir: logs/yacht/mlp/laplace
       name: standard
       default_hp_metric: false
   callbacks:
@@ -25,17 +25,21 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 6
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: laplace
   output_dim: 1
-  in_features: 6
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  distribution: laplace
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
-  dataset_name: concrete
+  dataset_name: yacht
 optimizer:
   lr: 5e-3
   weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/yacht/mlp/normal.yaml b/experiments/regression/uci_datasets/configs/yacht/mlp/normal.yaml
index 9f836719..6669211d 100644
--- a/experiments/regression/uci_datasets/configs/yacht/mlp/normal.yaml
+++ b/experiments/regression/uci_datasets/configs/yacht/mlp/normal.yaml
@@ -9,7 +9,7 @@ trainer:
   logger:
     class_path: lightning.pytorch.loggers.TensorBoardLogger
     init_args:
-      save_dir: logs/concrete/mlp/normal
+      save_dir: logs/yacht/mlp/normal
       name: standard
       default_hp_metric: false
   callbacks:
@@ -25,17 +25,23 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 6
+      num_outputs: 1
+      hidden_dims:
+        - 50
+      dist_family: normal
   output_dim: 1
-  in_features: 6
-  hidden_dims:
-    - 50
   loss: torch_uncertainty.losses.DistributionNLLLoss
-  version: std
-  distribution: normal
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
-  dataset_name: concrete
+  dataset_name: yacht
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/regression/uci_datasets/configs/yacht/mlp/point_wise.yaml b/experiments/regression/uci_datasets/configs/yacht/mlp/point_wise.yaml
index 51640c4b..94accc21 100644
--- a/experiments/regression/uci_datasets/configs/yacht/mlp/point_wise.yaml
+++ b/experiments/regression/uci_datasets/configs/yacht/mlp/point_wise.yaml
@@ -9,7 +9,7 @@ trainer:
   logger:
     class_path: lightning.pytorch.loggers.TensorBoardLogger
     init_args:
-      save_dir: logs/concrete/mlp/point_wise
+      save_dir: logs/yacht/mlp/point_wise
       name: standard
       default_hp_metric: false
   callbacks:
@@ -23,16 +23,22 @@ trainer:
         patience: 1000
         check_finite: true
 model:
+  model:
+    class_path: torch_uncertainty.models.mlp.mlp
+    init_args:
+      in_features: 6
+      num_outputs: 1
+      hidden_dims:
+        - 50
   output_dim: 1
-  in_features: 6
-  hidden_dims:
-    - 50
   loss: MSELoss
-  version: std
+  save_in_csv: true
 data:
   root: ./data
   batch_size: 128
-  dataset_name: concrete
+  dataset_name: yacht
 optimizer:
-  lr: 5e-3
-  weight_decay: 0
+  class_path: torch.optim.Adam
+  init_args:
+    lr: 5e-3
+    weight_decay: 0
diff --git a/experiments/segmentation/camvid/configs/deeplab.yaml b/experiments/segmentation/camvid/configs/deeplab.yaml
index 613648a3..7c77e679 100644
--- a/experiments/segmentation/camvid/configs/deeplab.yaml
+++ b/experiments/segmentation/camvid/configs/deeplab.yaml
@@ -18,21 +18,28 @@ trainer:
       init_args:
         logging_interval: step
 model:
-  num_classes: 11
+  model:
+    class_path: torch_uncertainty.models.segmentation.deep_lab_v3_resnet
+    init_args:
+      num_classes: 11
+      version: std
+      arch: 50
+      style: v3+
+      output_stride: 16
+      separable: false
   loss: CrossEntropyLoss
-  version: std
-  arch: 50
-  style: v3+
-  output_stride: 16
-  separable: false
 data:
   root: ./data
   batch_size: 8
   num_workers: 8
 optimizer:
-  lr: 0.002
-  weight_decay: 1e-4
-  momentum: 0.9
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.002
+    weight_decay: 1e-4
+    momentum: 0.9
 lr_scheduler:
-  power: 1.0
-  total_iters: 120
+  class_path: torch.optim.lr_scheduler.PolynomialLR
+  init_args:
+    power: 1.0
+    total_iters: 120
diff --git a/experiments/segmentation/camvid/configs/segformer.yaml b/experiments/segmentation/camvid/configs/segformer.yaml
index bc46c03e..7ac95f92 100644
--- a/experiments/segmentation/camvid/configs/segformer.yaml
+++ b/experiments/segmentation/camvid/configs/segformer.yaml
@@ -16,17 +16,23 @@ trainer:
       init_args:
         logging_interval: step
 model:
-  num_classes: 11
+  model:
+    class_path: torch_uncertainty.models.segmentation.seg_former
+    init_args:
+      num_classes: 11
+      arch: 0
   loss: CrossEntropyLoss
-  version: std
-  arch: 0
 data:
   root: ./data
   batch_size: 16
   num_workers: 20
 optimizer:
-  lr: 0.01
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 0.01
 lr_scheduler:
-  milestones:
-    - 30
-  gamma: 0.1
+  class_path: torch.optim.lr_scheduler.MultiStepLR
+  init_args:
+    milestones:
+      - 30
+    gamma: 0.1
diff --git a/experiments/segmentation/camvid/main.py b/experiments/segmentation/camvid/main.py
new file mode 100644
index 00000000..e69de29b
diff --git a/experiments/segmentation/cityscapes/configs/deeplab.yaml b/experiments/segmentation/cityscapes/configs/deeplab.yaml
index 7d0c2415..d5ae9571 100644
--- a/experiments/segmentation/cityscapes/configs/deeplab.yaml
+++ b/experiments/segmentation/cityscapes/configs/deeplab.yaml
@@ -34,8 +34,10 @@ data:
     - 2048
   num_workers: 8
 optimizer:
-  lr: 1e-2
-  weight_decay: 1e-4
-  momentum: 0.9
+  class_path: torch.optim.SGD
+  init_args:
+    lr: 1e-2
+    weight_decay: 1e-4
+    momentum: 0.9
 lr_scheduler:
   total_iters: 200
diff --git a/experiments/segmentation/cityscapes/main.py b/experiments/segmentation/cityscapes/main.py
new file mode 100644
index 00000000..e69de29b
diff --git a/experiments/segmentation/muad/configs/segformer.yaml b/experiments/segmentation/muad/configs/segformer.yaml
deleted file mode 100644
index 6c29986a..00000000
--- a/experiments/segmentation/muad/configs/segformer.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-# lightning.pytorch==2.2.0
-eval_after_fit: true
-seed_everything: false
-trainer:
-  accelerator: gpu
-  devices: 1
-  max_steps: 160000
-model:
-  num_classes: 19
-  loss: CrossEntropyLoss
-  version: std
-  arch: 0
-data:
-  root: ./data
-  batch_size: 8
-  crop_size: 1024
-  eval_size:
-    - 1024
-    - 2048
-  num_workers: 30
-optimizer:
-  lr: 6e-5
-lr_scheduler:
-  step_size: 10000
-  gamma: 0.1
diff --git a/experiments/segmentation/muad/segformer.py b/experiments/segmentation/muad/segformer.py
deleted file mode 100644
index 3feb6271..00000000
--- a/experiments/segmentation/muad/segformer.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.segmentation import SegFormerBaseline
-from torch_uncertainty.datamodules.segmentation import MUADDataModule
-
-
-class SegFormerCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        parser.add_optimizer_args(torch.optim.AdamW)
-        parser.add_lr_scheduler_args(torch.optim.lr_scheduler.StepLR)
-
-
-def cli_main() -> SegFormerCLI:
-    return SegFormerCLI(SegFormerBaseline, MUADDataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/torch_uncertainty/models/segmentation/__init__.py b/torch_uncertainty/models/segmentation/__init__.py
index 3180e986..72166537 100644
--- a/torch_uncertainty/models/segmentation/__init__.py
+++ b/torch_uncertainty/models/segmentation/__init__.py
@@ -1,5 +1,6 @@
 # ruff: noqa: F401
 from .deeplab import deep_lab_v3_resnet
+from .segformer import seg_former
 from .unet import (
     batched_small_unet,
     batched_unet,

From 253bcfa14ba70c3f095dc494a9bd2cb81c08d123 Mon Sep 17 00:00:00 2001
From: alafage <adrienlafage@outlook.com>
Date: Mon, 22 Sep 2025 10:30:10 +0200
Subject: [PATCH 20/25] :hammer: Make title and labels customizable in
 `CalibrationError.plot()`

---
 .../classification/calibration_error.py       | 24 +++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/torch_uncertainty/metrics/classification/calibration_error.py b/torch_uncertainty/metrics/classification/calibration_error.py
index 3b26adae..c28b06c4 100644
--- a/torch_uncertainty/metrics/classification/calibration_error.py
+++ b/torch_uncertainty/metrics/classification/calibration_error.py
@@ -140,6 +140,10 @@ def reliability_chart(
     bin_sizes: np.ndarray,
     bins: np.ndarray,
     title: str = "Reliability Diagram",
+    rd_xlabel: str = "Top-class Confidence (%)",
+    rd_ylabel: str = "Success Rate (%)",
+    ch_xlabel: str = "Top-class Confidence (%)",
+    ch_ylabel: str = "Density (%)",
     figsize: tuple[int, int] = (6, 6),
     dpi: int = 150,
 ) -> tuple[object, object]:
@@ -168,15 +172,26 @@ def reliability_chart(
         bin_sizes,
         bins,
         title=title,
+        xlabel=rd_xlabel,
+        ylabel=rd_ylabel,
     )
 
     # confidence histogram subplot
-    _confidence_histogram_subplot(ax[1], accuracies, confidences, title="")
+    _confidence_histogram_subplot(
+        ax[1], accuracies, confidences, title="", xlabel=ch_xlabel, ylabel=ch_ylabel
+    )
     ax[1].yaxis.set_major_formatter(mticker.PercentFormatter(1.0))
     return fig, ax
 
 
-def custom_plot(self) -> tuple[object, object]:
+def custom_plot(
+    self,
+    title: str = "Reliability Diagram",
+    rd_xlabel: str = "Top-class Confidence (%)",
+    rd_ylabel: str = "Success Rate (%)",
+    ch_xlabel: str = "Top-class Confidence (%)",
+    ch_ylabel: str = "Density (%)",
+) -> tuple[object, object]:
     confidences = dim_zero_cat(self.confidences)
     accuracies = dim_zero_cat(self.accuracies)
 
@@ -203,6 +218,11 @@ def custom_plot(self) -> tuple[object, object]:
         bin_confidences=np_conf_bin,
         bin_sizes=np_prop_bin,
         bins=np_bin_boundaries,
+        title=title,
+        rd_xlabel=rd_xlabel,
+        rd_ylabel=rd_ylabel,
+        ch_xlabel=ch_xlabel,
+        ch_ylabel=ch_ylabel,
     )
 
 

From ba1af874415a7872ba7bec5c9d8cd90c2edf22c0 Mon Sep 17 00:00:00 2001
From: alafage <adrienlafage@outlook.com>
Date: Mon, 22 Sep 2025 11:40:27 +0200
Subject: [PATCH 21/25] :bug: Fix image download in `tutorial_corruption.py`

---
 .../Data_Augmentation/tutorial_corruption.py    | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/auto_tutorial_source/Data_Augmentation/tutorial_corruption.py b/auto_tutorial_source/Data_Augmentation/tutorial_corruption.py
index 405e467d..5a2d916a 100644
--- a/auto_tutorial_source/Data_Augmentation/tutorial_corruption.py
+++ b/auto_tutorial_source/Data_Augmentation/tutorial_corruption.py
@@ -14,6 +14,7 @@
 """
 
 # %%
+from pathlib import Path
 from urllib import request
 
 import matplotlib.pyplot as plt
@@ -30,8 +31,20 @@
 
 
 def download_img(url, i):
-    request.urlretrieve(url, f"tmp_{i}.png")  # noqa: S310
-    return Image.open(f"tmp_{i}.png").convert("RGB")
+    # Create a request with proper headers to avoid 403 Forbidden error
+    if not url.startswith(("http:", "https:")):
+        raise ValueError("URL must start with 'http:' or 'https:'")
+
+    req = request.Request(  # noqa: S310
+        url,
+        headers={
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        },
+    )
+    filename = Path(f"tmp_{i}.png")
+    with request.urlopen(req) as response, filename.open("wb") as f:  # noqa: S310
+        f.write(response.read())
+    return Image.open(filename).convert("RGB")
 
 
 images_ds = [download_img(url, i) for i, url in enumerate(urls)]

From 0477de9818cea23924ab879abf3b374361b3272a Mon Sep 17 00:00:00 2001
From: alafage <adrienlafage@outlook.com>
Date: Mon, 22 Sep 2025 12:11:20 +0200
Subject: [PATCH 22/25] :bug: Fix `GammaConvNd` and add tests for Gamma
 distribution layers

---
 tests/layers/test_distributions.py        | 25 +++++++++++++++++++++++
 torch_uncertainty/layers/distributions.py | 14 ++++++-------
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/tests/layers/test_distributions.py b/tests/layers/test_distributions.py
index 91e1754c..e02e9ed2 100644
--- a/tests/layers/test_distributions.py
+++ b/tests/layers/test_distributions.py
@@ -55,6 +55,18 @@ def test_cauchy_linear(self, feat_input: torch.Tensor) -> None:
         assert out["loc"].shape == torch.Size([3, 2])
         assert out["scale"].shape == torch.Size([3, 2])
 
+    def test_gamma_linear(self, feat_input: torch.Tensor) -> None:
+        dist_layer = get_dist_linear_layer("gamma")
+        layer = dist_layer(
+            base_layer=torch.nn.Linear,
+            event_dim=2,
+            in_features=8,
+        )
+        out = layer(feat_input)
+        assert out.keys() == {"concentration", "rate"}
+        assert out["concentration"].shape == torch.Size([3, 2])
+        assert out["rate"].shape == torch.Size([3, 2])
+
     def test_student_linear(self, feat_input: torch.Tensor) -> None:
         dist_layer = get_dist_linear_layer("student")
         layer = dist_layer(
@@ -150,6 +162,19 @@ def test_cauchy_conv(self) -> None:
         assert out["loc"].shape == torch.Size([3, 2, 30, 30])
         assert out["scale"].shape == torch.Size([3, 2, 30, 30])
 
+    def test_gamma_conv(self) -> None:
+        dist_layer = get_dist_conv_layer("gamma")
+        layer = dist_layer(
+            base_layer=torch.nn.Conv2d,
+            event_dim=2,
+            in_channels=2,
+            kernel_size=3,
+        )
+        out = layer(img_input())
+        assert out.keys() == {"concentration", "rate"}
+        assert out["concentration"].shape == torch.Size([3, 2, 30, 30])
+        assert out["rate"].shape == torch.Size([3, 2, 30, 30])
+
     def test_student_conv(self) -> None:
         dist_layer = get_dist_conv_layer("student")
         layer = dist_layer(
diff --git a/torch_uncertainty/layers/distributions.py b/torch_uncertainty/layers/distributions.py
index c710c436..fc1e366d 100644
--- a/torch_uncertainty/layers/distributions.py
+++ b/torch_uncertainty/layers/distributions.py
@@ -364,7 +364,8 @@ def __init__(
         self,
         base_layer: type[nn.Module],
         event_dim: int,
-        min_scale: float = 1e-6,
+        min_concentration: float = 1e-6,
+        min_rate: float = 1e-6,
         **layer_args,
     ) -> None:
         super().__init__(
@@ -373,15 +374,14 @@ def __init__(
             num_params=2,
             **layer_args,
         )
-        self.min_scale = min_scale
+        self.min_concentration = min_concentration
+        self.min_rate = min_rate
 
     def forward(self, x: Tensor) -> dict[str, Tensor]:
         x = super().forward(x)
-        loc = x[:, : self.event_dim]
-        scale = torch.clamp(
-            F.softplus(x[:, self.event_dim : 2 * self.event_dim]), min=self.min_scale
-        )
-        return {"loc": loc, "scale": scale}
+        concentration = torch.clamp(F.softplus(x[:, : self.event_dim]), min=self.min_concentration)
+        rate = torch.clamp(F.softplus(x[:, self.event_dim : 2 * self.event_dim]), min=self.min_rate)
+        return {"concentration": concentration, "rate": rate}
 
 
 class StudentTLinear(_ExpandOutputLinear):

From c058f8d9e2c18f7283e6a6ddf0589797a1e52fc8 Mon Sep 17 00:00:00 2001
From: alafage <adrienlafage@outlook.com>
Date: Mon, 22 Sep 2025 13:25:20 +0200
Subject: [PATCH 23/25] :white_check_mark: Improve coverage

---
 tests/routines/test_pixel_regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/routines/test_pixel_regression.py b/tests/routines/test_pixel_regression.py
index b6064f27..a193790c 100644
--- a/tests/routines/test_pixel_regression.py
+++ b/tests/routines/test_pixel_regression.py
@@ -90,7 +90,7 @@ def test_two_estimators_one_class(self) -> None:
 
         trainer = TUTrainer(accelerator="cpu", fast_dev_run=True, logger=None)
         model = DummyPixelRegressionBaseline(
-            dist_family="normal",
+            dist_family="gamma",
             in_channels=dm.num_channels,
             output_dim=dm.output_dim,
             image_size=dm.image_size,

From ddda6db2a5c46fe196efa707aad8ffd086a53665 Mon Sep 17 00:00:00 2001
From: alafage <adrienlafage@outlook.com>
Date: Thu, 2 Oct 2025 23:10:50 +0200
Subject: [PATCH 24/25] :hammer: Update segmentation experiment config files

---
 docs/source/cli_guide.rst                     |  4 +++
 .../regression/uci_datasets/deep_ensemble.py  | 28 -------------------
 experiments/regression/uci_datasets/mlp.py    | 26 -----------------
 .../segmentation/camvid/configs/deeplab.yaml  |  1 +
 .../camvid/configs/segformer.yaml             |  1 +
 experiments/segmentation/camvid/deeplab.py    | 28 -------------------
 experiments/segmentation/camvid/main.py       | 20 +++++++++++++
 experiments/segmentation/camvid/segformer.py  | 27 ------------------
 .../cityscapes/configs/deeplab.yaml           | 18 ++++++++----
 .../cityscapes/configs/segformer.yaml         | 17 +++++++----
 .../segmentation/cityscapes/deeplab.py        | 28 -------------------
 experiments/segmentation/cityscapes/main.py   | 20 +++++++++++++
 .../segmentation/cityscapes/segformer.py      | 27 ------------------
 13 files changed, 70 insertions(+), 175 deletions(-)
 delete mode 100644 experiments/regression/uci_datasets/deep_ensemble.py
 delete mode 100644 experiments/regression/uci_datasets/mlp.py
 delete mode 100644 experiments/segmentation/camvid/deeplab.py
 delete mode 100644 experiments/segmentation/camvid/segformer.py
 delete mode 100644 experiments/segmentation/cityscapes/deeplab.py
 delete mode 100644 experiments/segmentation/cityscapes/segformer.py

diff --git a/docs/source/cli_guide.rst b/docs/source/cli_guide.rst
index 07904b98..835a901a 100644
--- a/docs/source/cli_guide.rst
+++ b/docs/source/cli_guide.rst
@@ -4,6 +4,10 @@ CLI Guide
 Introduction to the Lightning CLI
 ---------------------------------
 
+.. warning::
+
+    Deprecated: This guide needs to be updated to reflect the latest changes (removal of the torch_uncertainty.baselines module, etc.)
+
 The Lightning CLI tool eases the implementation of a CLI to instanciate models to train and evaluate them on
 some data. The CLI tool is a wrapper around the ``Trainer`` class and provides a set of subcommands to train
 and test a ``LightningModule`` on a ``LightningDataModule``. To better match our needs, we created an inherited
diff --git a/experiments/regression/uci_datasets/deep_ensemble.py b/experiments/regression/uci_datasets/deep_ensemble.py
deleted file mode 100644
index 2a8bdc8f..00000000
--- a/experiments/regression/uci_datasets/deep_ensemble.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from pathlib import Path
-
-from torch_uncertainty import cli_main, init_args
-from torch_uncertainty.baselines import DeepEnsemblesBaseline
-from torch_uncertainty.datamodules import UCIRegressionDataModule
-
-if __name__ == "__main__":
-    args = init_args(DeepEnsemblesBaseline, UCIRegressionDataModule)
-    if args.root == "./data/":
-        root = Path(__file__).parent.absolute().parents[2]
-    else:
-        root = Path(args.root)
-
-    net_name = f"de-{args.backbone}-kin8nm"
-
-    # datamodule
-    args.root = str(root / "data")
-    dm = UCIRegressionDataModule(dataset_name="kin8nm", **vars(args))
-
-    # model
-    args.task = "regression"
-    model = DeepEnsemblesBaseline(
-        **vars(args),
-    )
-
-    args.test = -1
-
-    cli_main(model, dm, root, net_name, args)
diff --git a/experiments/regression/uci_datasets/mlp.py b/experiments/regression/uci_datasets/mlp.py
deleted file mode 100644
index 54a9fafc..00000000
--- a/experiments/regression/uci_datasets/mlp.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.regression import MLPBaseline
-from torch_uncertainty.datamodules import UCIRegressionDataModule
-
-
-class MLPCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        parser.add_optimizer_args(torch.optim.Adam)
-
-
-def cli_main() -> MLPCLI:
-    return MLPCLI(MLPBaseline, UCIRegressionDataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/segmentation/camvid/configs/deeplab.yaml b/experiments/segmentation/camvid/configs/deeplab.yaml
index 7c77e679..342e5e17 100644
--- a/experiments/segmentation/camvid/configs/deeplab.yaml
+++ b/experiments/segmentation/camvid/configs/deeplab.yaml
@@ -27,6 +27,7 @@ model:
       style: v3+
       output_stride: 16
       separable: false
+  num_classes: 11
   loss: CrossEntropyLoss
 data:
   root: ./data
diff --git a/experiments/segmentation/camvid/configs/segformer.yaml b/experiments/segmentation/camvid/configs/segformer.yaml
index 7ac95f92..d36d7468 100644
--- a/experiments/segmentation/camvid/configs/segformer.yaml
+++ b/experiments/segmentation/camvid/configs/segformer.yaml
@@ -21,6 +21,7 @@ model:
     init_args:
       num_classes: 11
       arch: 0
+  num_classes: 11
   loss: CrossEntropyLoss
 data:
   root: ./data
diff --git a/experiments/segmentation/camvid/deeplab.py b/experiments/segmentation/camvid/deeplab.py
deleted file mode 100644
index 44f47e46..00000000
--- a/experiments/segmentation/camvid/deeplab.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-from torch.optim.lr_scheduler import PolynomialLR
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.segmentation import DeepLabBaseline
-from torch_uncertainty.datamodules.segmentation import CamVidDataModule
-
-
-class DeepLabV3CLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        parser.add_optimizer_args(torch.optim.SGD)
-        parser.add_lr_scheduler_args(PolynomialLR)
-
-
-def cli_main() -> DeepLabV3CLI:
-    return DeepLabV3CLI(DeepLabBaseline, CamVidDataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/segmentation/camvid/main.py b/experiments/segmentation/camvid/main.py
index e69de29b..4b9100f9 100644
--- a/experiments/segmentation/camvid/main.py
+++ b/experiments/segmentation/camvid/main.py
@@ -0,0 +1,20 @@
+import torch
+
+from torch_uncertainty import TULightningCLI
+from torch_uncertainty.datamodules.segmentation import CamVidDataModule
+from torch_uncertainty.routines import SegmentationRoutine
+
+
+def cli_main() -> TULightningCLI:
+    return TULightningCLI(SegmentationRoutine, CamVidDataModule)
+
+
+if __name__ == "__main__":
+    torch.set_float32_matmul_precision("medium")
+    cli = cli_main()
+    if (
+        (not cli.trainer.fast_dev_run)
+        and cli.subcommand == "fit"
+        and cli._get(cli.config, "eval_after_fit")
+    ):
+        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/segmentation/camvid/segformer.py b/experiments/segmentation/camvid/segformer.py
deleted file mode 100644
index 537ccf64..00000000
--- a/experiments/segmentation/camvid/segformer.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.segmentation import SegFormerBaseline
-from torch_uncertainty.datamodules.segmentation import CamVidDataModule
-
-
-class SegFormerCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        parser.add_optimizer_args(torch.optim.SGD)
-        parser.add_lr_scheduler_args(torch.optim.lr_scheduler.MultiStepLR)
-
-
-def cli_main() -> SegFormerCLI:
-    return SegFormerCLI(SegFormerBaseline, CamVidDataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/segmentation/cityscapes/configs/deeplab.yaml b/experiments/segmentation/cityscapes/configs/deeplab.yaml
index d5ae9571..97f4b39d 100644
--- a/experiments/segmentation/cityscapes/configs/deeplab.yaml
+++ b/experiments/segmentation/cityscapes/configs/deeplab.yaml
@@ -18,13 +18,17 @@ trainer:
       init_args:
         logging_interval: step
 model:
+  model:
+    class_path: torch_uncertainty.models.segmentation.deep_lab_v3_resnet
+    init_args:
+      num_classes: 19
+      version: std
+      arch: 50
+      style: v3+
+      output_stride: 16
+      separable: false
   num_classes: 19
   loss: CrossEntropyLoss
-  version: std
-  arch: 50
-  style: v3+
-  output_stride: 16
-  separable: false
 data:
   root: ./data/Cityscapes
   batch_size: 8
@@ -40,4 +44,6 @@ optimizer:
     weight_decay: 1e-4
     momentum: 0.9
 lr_scheduler:
-  total_iters: 200
+  class_path: torch.optim.lr_scheduler.PolynomialLR
+  init_args:
+    total_iters: 200
diff --git a/experiments/segmentation/cityscapes/configs/segformer.yaml b/experiments/segmentation/cityscapes/configs/segformer.yaml
index 2ccb3745..e193e7a2 100644
--- a/experiments/segmentation/cityscapes/configs/segformer.yaml
+++ b/experiments/segmentation/cityscapes/configs/segformer.yaml
@@ -17,10 +17,13 @@ trainer:
       init_args:
         logging_interval: step
 model:
+  model:
+    class_path: torch_uncertainty.models.segmentation.seg_former
+    init_args:
+      num_classes: 19
+      arch: 0
   num_classes: 19
   loss: CrossEntropyLoss
-  version: std
-  arch: 0
 data:
   root: ./data/Cityscapes
   batch_size: 8
@@ -30,7 +33,11 @@ data:
     - 2048
   num_workers: 8
 optimizer:
-  lr: 6e-5
+  class_path: torch.optim.AdamW
+  init_args:
+    lr: 6e-5
 lr_scheduler:
-  step_size: 10000
-  gamma: 0.1
+  class_path: torch.optim.lr_scheduler.StepLR
+  init_args:
+    step_size: 10000
+    gamma: 0.1
diff --git a/experiments/segmentation/cityscapes/deeplab.py b/experiments/segmentation/cityscapes/deeplab.py
deleted file mode 100644
index cc865b9d..00000000
--- a/experiments/segmentation/cityscapes/deeplab.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-from torch.optim.lr_scheduler import PolynomialLR
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.segmentation import DeepLabBaseline
-from torch_uncertainty.datamodules.segmentation import CityscapesDataModule
-
-
-class DeepLabV3CLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        parser.add_optimizer_args(torch.optim.SGD)
-        parser.add_lr_scheduler_args(PolynomialLR)
-
-
-def cli_main() -> DeepLabV3CLI:
-    return DeepLabV3CLI(DeepLabBaseline, CityscapesDataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/segmentation/cityscapes/main.py b/experiments/segmentation/cityscapes/main.py
index e69de29b..f04764df 100644
--- a/experiments/segmentation/cityscapes/main.py
+++ b/experiments/segmentation/cityscapes/main.py
@@ -0,0 +1,20 @@
+import torch
+
+from torch_uncertainty import TULightningCLI
+from torch_uncertainty.datamodules.segmentation import CityscapesDataModule
+from torch_uncertainty.routines import SegmentationRoutine
+
+
+def cli_main() -> TULightningCLI:
+    return TULightningCLI(SegmentationRoutine, CityscapesDataModule)
+
+
+if __name__ == "__main__":
+    torch.set_float32_matmul_precision("medium")
+    cli = cli_main()
+    if (
+        (not cli.trainer.fast_dev_run)
+        and cli.subcommand == "fit"
+        and cli._get(cli.config, "eval_after_fit")
+    ):
+        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")
diff --git a/experiments/segmentation/cityscapes/segformer.py b/experiments/segmentation/cityscapes/segformer.py
deleted file mode 100644
index 6fb976bb..00000000
--- a/experiments/segmentation/cityscapes/segformer.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import torch
-from lightning.pytorch.cli import LightningArgumentParser
-
-from torch_uncertainty import TULightningCLI
-from torch_uncertainty.baselines.segmentation import SegFormerBaseline
-from torch_uncertainty.datamodules.segmentation import CityscapesDataModule
-
-
-class SegFormerCLI(TULightningCLI):
-    def add_arguments_to_parser(self, parser: LightningArgumentParser) -> None:
-        parser.add_optimizer_args(torch.optim.AdamW)
-        parser.add_lr_scheduler_args(torch.optim.lr_scheduler.StepLR)
-
-
-def cli_main() -> SegFormerCLI:
-    return SegFormerCLI(SegFormerBaseline, CityscapesDataModule)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("medium")
-    cli = cli_main()
-    if (
-        (not cli.trainer.fast_dev_run)
-        and cli.subcommand == "fit"
-        and cli._get(cli.config, "eval_after_fit")
-    ):
-        cli.trainer.test(datamodule=cli.datamodule, ckpt_path="best")

From ee3fbae0c5e9c5c74cf5461b528f56c9081e1534 Mon Sep 17 00:00:00 2001
From: Adrien Lafage <adrienlafage@outlook.com>
Date: Mon, 6 Oct 2025 10:41:57 +0200
Subject: [PATCH 25/25] :white_check_mark: Improve `deep_ensembles` test
 coverage

Add error handling tests for deep_ensembles function.
---
 tests/models/wrappers/test_deep_ensembles.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/models/wrappers/test_deep_ensembles.py b/tests/models/wrappers/test_deep_ensembles.py
index 70cf24bf..0438665f 100644
--- a/tests/models/wrappers/test_deep_ensembles.py
+++ b/tests/models/wrappers/test_deep_ensembles.py
@@ -86,6 +86,9 @@ def test_store_on_cpu_prob_regression(self) -> None:
         assert de.core_models[1].linear.weight.device == torch.device("cpu")
 
     def test_errors(self) -> None:
+        with pytest.raises(ValueError):
+            deep_ensembles([])
+
         model_1 = dummy_model(1, 10)
         with pytest.raises(ValueError):
             deep_ensembles(model_1, num_estimators=None)