LouisRouss · LouisRouss · Sep 10, 2025 · Sep 11, 2025 · Sep 13, 2025 · Sep 13, 2025
diff --git a/.gitignore b/.gitignore
@@ -27,8 +27,6 @@ share/python-wheels/
 MANIFEST
 
 # PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 
@@ -83,36 +81,12 @@ notebooks/
 profile_default/
 ipython_config.py
 
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-
 # pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#pdm.lock
-#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
-#   in version control.
-#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
 .pdm-python
 .pdm-build/
 
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+# PEP 582
 __pypackages__/
 
 # Celery stuff
@@ -155,13 +129,6 @@ dmypy.json
 # Cython debug symbols
 cython_debug/
 
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
-
 # running logs
 examples/wandb
 outputs/

diff --git a/README.md b/README.md
@@ -63,10 +63,10 @@ Here is a To-Do list, feel welcome to help to any point along this list. The alr
 - [ ] add some more context embedders
 - [ ] add reflow algorithm
 - [ ] add EDM
-- [ ] think about how to add a sampler abstraction and use it in the different Diffusion classes (generalist class with euler, heuns etc)
+- [x] think about how to add a sampler abstraction and use it in the different Diffusion classes (generalist class with euler, heuns etc)
 - [ ] Train our models on toy datasets for different tasks (conditional generation, Image to Image ...)
 - [ ] Add possibility to train LORA/DORA
-- [ ] add different sampler
+- [x] add different sampler
 - [ ] Try out Differential Transformers
 - [ ] Check to add https://arxiv.org/pdf/2406.02507
 - [ ] inject lessons learned from nvidia https://developer.nvidia.com/blog/rethinking-how-to-train-diffusion-models/
diff --git a/examples/train_diffusion.py b/examples/train_diffusion.py
@@ -5,7 +5,7 @@
 from torch.utils.data import DataLoader
 
 from diffulab.diffuse import Diffuser
-from diffulab.training import Trainer
+from diffulab.training import BaseTrainer
 
 
 @hydra.main(version_base=None, config_path="../configs", config_name="train_mnist_flow_matching")
@@ -52,8 +52,7 @@ def count_parameters(model: torch.nn.Module) -> int:
         params=denoiser.parameters(),
     )
 
-    # TODO: add a run name for wandb
-    trainer = Trainer(
+    trainer = BaseTrainer(
         n_epoch=cfg.trainer.n_epoch,
         gradient_accumulation_step=cfg.trainer.gradient_accumulation_step,
         precision_type=cfg.trainer.precision_type,
@@ -62,6 +61,10 @@ def count_parameters(model: torch.nn.Module) -> int:
         ema_update_after_step=cfg.trainer.get("ema_update_after_step", 0),
         ema_update_every=cfg.trainer.get("ema_update_every", 10),
         run_config=OmegaConf.to_container(cfg, resolve=True),  # type: ignore[reportArgumentType]
+        compile=cfg.trainer.get("compile", False),
+        init_kwargs={
+            "wandb": cfg.trainer.get("wandb", {}),
+        },
     )
 
     trainer.train(

diff --git a/examples/train_repa.py b/examples/train_repa.py
@@ -7,7 +7,7 @@
 from torch.utils.data import DataLoader
 
 from diffulab.diffuse import Diffuser
-from diffulab.training import Trainer
+from diffulab.training import BaseTrainer
 from diffulab.training.losses.repa import RepaLoss
 
 
@@ -77,7 +77,7 @@ def count_parameters(model: torch.nn.Module) -> int:
         + list(repa_loss.resampler.parameters() if repa_loss.resampler else []),
     )
 
-    trainer = Trainer(
+    trainer = BaseTrainer(
         n_epoch=cfg.trainer.n_epoch,
         gradient_accumulation_step=cfg.trainer.gradient_accumulation_step,
         precision_type=cfg.trainer.precision_type,

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,20 +1,23 @@
 [project]
 name = "diffulab"
 version = "0.1.0"
-description = "Add your description here"
+description = "DiffuLab is designed to provide a simple and flexible way to train diffusion models while allowing full customization of its core components"
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "accelerate>=1.4.0",
+    "blobfile>=3.1.0",
     "diffusers>=0.33.1",
     "einops>=0.8.1",
     "ema-pytorch>=0.7.7",
     "hydra-core>=1.3.2",
     "jaxtyping>=0.3.0",
+    "loguru>=0.7.3",
     "mosaicml-streaming>=0.12.0",
     "omegaconf>=2.3.0",
-    "open-clip-torch>=2.30.0",
     "pyopenssl==23.2.0",
+    "sentencepiece>=0.2.1",
+    "tiktoken>=0.11.0",
     "torch>=2.6.0",
     "transformers>=4.49.0",
     "wandb>=0.19.6",
@@ -31,6 +34,9 @@ dev = [
 repa = [
     "timm>=1.0.15",
 ]
+prefgrpo = [
+    "qwen-vl-utils>=0.0.11",
+]
 
 [tool.uv.sources]
 diffulab = {workspace = true}

diff --git a/src/diffulab/__init__.py b/src/diffulab/__init__.py
@@ -1,7 +1,7 @@
 from .datasets import BaseDataset, CIFAR10Dataset, ImageNetLatentREPA, MNISTDataset
 from .diffuse import Diffuser, Flow, GaussianDiffusion
 from .networks import DCAE, REPA, Denoiser, DinoV2, MMDiT, PerceiverResampler, SD3TextEmbedder, UNetModel, VisionTower
-from .training import LossFunction, RepaLoss, Trainer
+from .training import BaseTrainer, GRPOTrainer, LossFunction, RepaLoss, Trainer
 
 __all__ = [
     "BaseDataset",
@@ -22,5 +22,7 @@
     "VisionTower",
     "LossFunction",
     "RepaLoss",
+    "BaseTrainer",
+    "GRPOTrainer",
     "Trainer",
 ]
diff --git a/src/diffulab/datasets/base.py b/src/diffulab/datasets/base.py
@@ -7,12 +7,17 @@
 from torch import Tensor
 from torch.utils.data import Dataset
 
-from diffulab.networks.denoisers.common import ModelInput
+from diffulab.networks.denoisers.common import ExtraInputGRPO, ModelInput, ModelInputGRPO
 
 
 class BatchData(TypedDict, total=False):
     model_inputs: Required[ModelInput]
-    extra: NotRequired[dict[str, Tensor | None]]
+    extra: NotRequired[dict[str, Tensor | list[str] | None]]
+
+
+class BatchDataGRPO(TypedDict, total=False):
+    model_inputs: Required[ModelInputGRPO]
+    extra: Required[ExtraInputGRPO]
 
 
 class BaseDataset(Dataset[BatchData], ABC):

diff --git a/src/diffulab/datasets/mnist.py b/src/diffulab/datasets/mnist.py
@@ -24,7 +24,12 @@ def __init__(self, data_path: str, train: bool = True):
         self.images, self.labels = self.load_data()
 
     def load_data(self) -> tuple[NDArray[np.float32], NDArray[np.int64]]:
-        """Load MNIST data from files."""
+        """
+        Load MNIST data from files.
+
+        Returns:
+            A tuple of images and labels arrays.
+        """
         if self.train:
             images_file = self.data_path / "train-images-idx3-ubyte"
             labels_file = self.data_path / "train-labels-idx1-ubyte"
@@ -38,7 +43,13 @@ def load_data(self) -> tuple[NDArray[np.float32], NDArray[np.int64]]:
         return images, labels
 
     def _load_images(self, file: Path) -> NDArray[np.float32]:
-        """Load and preprocess MNIST images."""
+        """
+        Load and preprocess MNIST images.
+        Args:
+            file: Path to the MNIST images file.
+        Returns:
+            A numpy array of shape (num_images, 1, 32, 32) containing the resized images.
+        """
         with open(file, "rb") as f:
             _, num_images, rows, cols = struct.unpack(">IIII", f.read(16))
             images = np.frombuffer(f.read(), dtype=np.uint8).reshape(num_images, 1, rows, cols)
@@ -52,12 +63,24 @@ def _load_images(self, file: Path) -> NDArray[np.float32]:
         return resized_images
 
     def _load_labels(self, file: Path) -> NDArray[np.int64]:
-        """Load MNIST labels."""
+        """
+        Load MNIST labels.
+        Args:
+            file: Path to the MNIST labels file.
+        Returns:
+            A numpy array of shape (num_labels,) containing the labels.
+        """
         with open(file, "rb") as f:
             _, _ = struct.unpack(">II", f.read(8))
             labels = np.frombuffer(f.read(), dtype=np.uint8)
         return labels.astype(np.int64)
 
     def preprocess_image(self, image: NDArray[Any]) -> NDArray[np.float32]:
-        """Normalize the image to [-1, 1] range."""
+        """
+        Normalize the image to [-1, 1] range.
+        Args:
+            image: A numpy array representing the image.
+        Returns:
+            A normalized numpy array.
+        """
         return ((image.astype(np.float32) / 255.0) - 0.5) / 0.5