Merge remote-tracking branch 'upstream/main'

EAOZONE · EAOZONE · commit 4575d1af443d · 2025-07-28T08:30:59.000-05:00
diff --git a/docs/source/hilserl.mdx b/docs/source/hilserl.mdx
@@ -28,7 +28,7 @@ This guide provides step-by-step instructions for training a robot policy using
 - A gamepad (recommended) or keyboard to control the robot
 - A Nvidia GPU
 - A real robot with a follower and leader arm (optional if you use the keyboard or the gamepad)
-- A URDF file for the robot for the kinematics package (check `lerobot/common/model/kinematics.py`)
+- A URDF file for the robot for the kinematics package (check `lerobot/model/kinematics.py`)
 
 ## What kind of tasks can I train?
 
@@ -477,7 +477,7 @@ Create a training configuration file (example available [here](https://huggingfa
 1. Configure the policy settings (`type="sac"`, `device`, etc.)
 2. Set `dataset` to your cropped dataset
 3. Configure environment settings with crop parameters
-4. Check the other parameters related to SAC in [configuration_sac.py](https://github.com/huggingface/lerobot/blob/19bb621a7d0a31c20cd3cc08b1dbab68d3031454/lerobot/common/policies/sac/configuration_sac.py#L79).
+4. Check the other parameters related to SAC in [configuration_sac.py](https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/sac/configuration_sac.py#L79).
 5. Verify that the `policy` config is correct with the right `input_features` and `output_features` for your task.
 
 **Starting the Learner**
diff --git a/docs/source/il_robots.mdx b/docs/source/il_robots.mdx
@@ -323,7 +323,7 @@ The `record` function provides a suite of tools for capturing and managing data
 ##### 2. Checkpointing and Resuming
 
 - Checkpoints are automatically created during recording.
-- If an issue occurs, you can resume by re-running the same command with `--resume=true`.
+- If an issue occurs, you can resume by re-running the same command with `--resume=true`. When resuming a recording, `--dataset.num_episodes` must be set to the **number of additional episodes to be recorded**, and not to the targeted total number of episodes in the dataset !
 - To start recording from scratch, **manually delete** the dataset directory.
 
 ##### 3. Recording Parameters
diff --git a/docs/source/lekiwi.mdx b/docs/source/lekiwi.mdx
@@ -258,7 +258,7 @@ You should see on your laptop something like this: `[INFO] Connected to remote r
 | F   | Decrease speed |
 
 > [!TIP]
-> If you use a different keyboard, you can change the keys for each command in the [`LeKiwiConfig`](../src/lerobot/robot_devices/robots/configs.py).
+> If you use a different keyboard, you can change the keys for each command in the [`LeKiwiClientConfig`](https://github.com/huggingface/lerobot/blob/main/src/lerobot/robots/lekiwi/config_lekiwi.py).
 
 ### Wired version
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -61,7 +61,7 @@ dependencies = [
     # Hugging Face dependencies
     "datasets>=2.19.0,<=3.6.0", # TODO: Bumb dependency
     "diffusers>=0.27.2",
-    "huggingface-hub[hf-transfer,cli]>=0.27.1",
+    "huggingface-hub[hf-transfer,cli]>=0.27.1,<0.34.0",
 
     # Core dependencies
     "cmake>=3.29.0.1",
@@ -75,7 +75,7 @@ dependencies = [
     "packaging>=24.2",
     "pynput>=1.7.7",
     "pyserial>=3.5",
-    "wandb>=0.16.3",
+    "wandb>=0.20.0",
 
     "draccus==0.10.0", # TODO: Remove ==
     "gymnasium>=0.29.1,<1.0.0", # TODO: Bumb dependency
diff --git a/src/lerobot/__init__.py b/src/lerobot/__init__.py
@@ -170,7 +170,7 @@
 # lists all available policies from `lerobot/policies`
 available_policies = ["act", "diffusion", "tdmpc", "vqbet"]
 
-# lists all available robots from `lerobot/robot_devices/robots`
+# lists all available robots from `lerobot/robots`
 available_robots = [
     "koch",
     "koch_bimanual",
@@ -179,13 +179,13 @@
     "so101",
 ]
 
-# lists all available cameras from `lerobot/robot_devices/cameras`
+# lists all available cameras from `lerobot/cameras`
 available_cameras = [
     "opencv",
     "intelrealsense",
 ]
 
-# lists all available motors from `lerobot/robot_devices/motors`
+# lists all available motors from `lerobot/motors`
 available_motors = [
     "dynamixel",
     "feetech",
diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py
@@ -44,7 +44,7 @@ def gym_kwargs(self) -> dict:
 @EnvConfig.register_subclass("aloha")
 @dataclass
 class AlohaEnv(EnvConfig):
-    task: str = "AlohaInsertion-v0"
+    task: str | None = "AlohaInsertion-v0"
     fps: int = 50
     episode_length: int = 400
     obs_type: str = "pixels_agent_pos"
@@ -82,7 +82,7 @@ def gym_kwargs(self) -> dict:
 @EnvConfig.register_subclass("pusht")
 @dataclass
 class PushtEnv(EnvConfig):
-    task: str = "PushT-v0"
+    task: str | None = "PushT-v0"
     fps: int = 10
     episode_length: int = 300
     obs_type: str = "pixels_agent_pos"
@@ -124,7 +124,7 @@ def gym_kwargs(self) -> dict:
 @EnvConfig.register_subclass("xarm")
 @dataclass
 class XarmEnv(EnvConfig):
-    task: str = "XarmLift-v0"
+    task: str | None = "XarmLift-v0"
     fps: int = 15
     episode_length: int = 200
     obs_type: str = "pixels_agent_pos"
@@ -200,10 +200,10 @@ class HILSerlRobotEnvConfig(EnvConfig):
     wrapper: EnvTransformConfig | None = None
     fps: int = 10
     name: str = "real_robot"
-    mode: str = None  # Either "record", "replay", None
+    mode: str | None = None  # Either "record", "replay", None
     repo_id: str | None = None
     dataset_root: str | None = None
-    task: str = ""
+    task: str | None = ""
     num_episodes: int = 10  # only for record mode
     episode: int = 0
     device: str = "cuda"
@@ -213,6 +213,7 @@ class HILSerlRobotEnvConfig(EnvConfig):
     # For the reward classifier, to record more positive examples after a success
     number_of_steps_after_success: int = 0
 
+    @property
     def gym_kwargs(self) -> dict:
         return {}
 
@@ -222,9 +223,8 @@ def gym_kwargs(self) -> dict:
 class HILEnvConfig(EnvConfig):
     """Configuration for the HIL environment."""
 
-    type: str = "hil"
     name: str = "PandaPickCube"
-    task: str = "PandaPickCubeKeyboard-v0"
+    task: str | None = "PandaPickCubeKeyboard-v0"
     use_viewer: bool = True
     gripper_penalty: float = 0.0
     use_gamepad: bool = True
@@ -252,7 +252,7 @@ class HILEnvConfig(EnvConfig):
     robot_config: RobotConfig | None = None
     teleop_config: TeleoperatorConfig | None = None
     wrapper: EnvTransformConfig | None = None
-    mode: str = None  # Either "record", "replay", None
+    mode: str | None = None  # Either "record", "replay", None
     repo_id: str | None = None
     dataset_root: str | None = None
     num_episodes: int = 10  # only for record mode
diff --git a/src/lerobot/policies/diffusion/modeling_diffusion.py b/src/lerobot/policies/diffusion/modeling_diffusion.py
@@ -133,11 +133,15 @@ def select_action(self, batch: dict[str, Tensor]) -> Tensor:
         "horizon" may not the best name to describe what the variable actually means, because this period is
         actually measured from the first observation which (if `n_obs_steps` > 1) happened in the past.
         """
+        # NOTE: for offline evaluation, we have action in the batch, so we need to pop it out
+        if ACTION in batch:
+            batch.pop(ACTION)
+
         batch = self.normalize_inputs(batch)
         if self.config.image_features:
             batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
             batch[OBS_IMAGES] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
-        # Note: It's important that this happens after stacking the images into a single key.
+        # NOTE: It's important that this happens after stacking the images into a single key.
         self._queues = populate_queues(self._queues, batch)
 
         if len(self._queues[ACTION]) == 0:
diff --git a/src/lerobot/policies/pi0/modeling_pi0.py b/src/lerobot/policies/pi0/modeling_pi0.py
@@ -515,9 +515,10 @@ def sample_noise(self, shape, device):
         return noise
 
     def sample_time(self, bsize, device):
-        time_beta = sample_beta(1.5, 1.0, bsize, device)
+        beta_dist = torch.distributions.Beta(concentration1=1.5, concentration0=1.0)
+        time_beta = beta_dist.sample((bsize,)).to(device=device, dtype=torch.float32)
         time = time_beta * 0.999 + 0.001
-        return time.to(dtype=torch.float32, device=device)
+        return time
 
     def embed_prefix(
         self, images, img_masks, lang_tokens, lang_masks
diff --git a/src/lerobot/policies/pi0fast/modeling_pi0fast.py b/src/lerobot/policies/pi0fast/modeling_pi0fast.py
@@ -488,6 +488,8 @@ def __init__(self, config: PI0FASTConfig):
                 param.data = param.data.to(dtype=torch_precision)
         self.set_requires_grad()
         self.image_keys = self.config.image_features.keys()
+        # TODO: Remove this once we bump transformers to >4.52.0 because the attribute will be removed
+        # AttributeError: 'PaliGemmaConfig' object has no attribute 'ignore_index'
         self.ignore_index = self.pi0_paligemma.config.ignore_index
         self.padding_side = self.config.padding_side
 
diff --git a/src/lerobot/policies/smolvla/modeling_smolvla.py b/src/lerobot/policies/smolvla/modeling_smolvla.py
@@ -384,8 +384,13 @@ def get_optim_params(self) -> dict:
         return self.parameters()
 
     def _get_action_chunk(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor:
+        # TODO: Check if this for loop is needed.
+        # Context: In fact, self.queues contains only ACTION field, and in inference, we don't have action in the batch
+        # In the case of offline inference, we have the action in the batch
+        # that why without the k != ACTION check, it will raise an error because we are trying to stack
+        # on an empty container.
         for k in batch:
-            if k in self._queues:
+            if k in self._queues and k != ACTION:
                 batch[k] = torch.stack(list(self._queues[k]), dim=1)
 
         images, img_masks = self.prepare_images(batch)
@@ -631,7 +636,7 @@ class VLAFlowMatching(nn.Module):
     └──────────────────────────────┘
     """
 
-    def __init__(self, config):
+    def __init__(self, config: SmolVLAConfig):
         super().__init__()
         self.config = config
 
diff --git a/src/lerobot/policies/tdmpc/modeling_tdmpc.py b/src/lerobot/policies/tdmpc/modeling_tdmpc.py
@@ -143,7 +143,12 @@ def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
     @torch.no_grad()
     def select_action(self, batch: dict[str, Tensor]) -> Tensor:
         """Select a single action given environment observations."""
+        # NOTE: for offline evaluation, we have action in the batch, so we need to pop it out
+        if ACTION in batch:
+            batch.pop(ACTION)
+
         batch = self.normalize_inputs(batch)
+
         if self.config.image_features:
             batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
             batch[OBS_IMAGE] = batch[next(iter(self.config.image_features))]
diff --git a/src/lerobot/policies/vqbet/modeling_vqbet.py b/src/lerobot/policies/vqbet/modeling_vqbet.py
@@ -139,11 +139,14 @@ def select_action(self, batch: dict[str, Tensor]) -> Tensor:
         environment. It works by managing the actions in a queue and only calling `select_actions` when the
         queue is empty.
         """
-
+        # NOTE: for offline evaluation, we have action in the batch, so we need to pop it out
+        if ACTION in batch:
+            batch.pop(ACTION)
         batch = self.normalize_inputs(batch)
         batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
+        # NOTE: It's important that this happens after stacking the images into a single key.
         batch["observation.images"] = torch.stack([batch[key] for key in self.config.image_features], dim=-4)
-        # Note: It's important that this happens after stacking the images into a single key.
+
         self._queues = populate_queues(self._queues, batch)
 
         if not self.vqbet.action_head.vqvae_model.discretized.item():
diff --git a/src/lerobot/robots/viperx/README.md b/src/lerobot/robots/viperx/README.md
@@ -63,7 +63,7 @@ python lerobot/scripts/control_robot.py \
   --control.type=teleoperate
 ```
 
-By adding `--robot.max_relative_target=5`, we override the default value for `max_relative_target` defined in [`AlohaRobotConfig`](lerobot/robot_devices/robots/configs.py). It is expected to be `5` to limit the magnitude of the movement for more safety, but the teleoperation won't be smooth. When you feel confident, you can disable this limit by adding `--robot.max_relative_target=null` to the command line:
+By adding `--robot.max_relative_target=5`, we override the default value for `max_relative_target` defined in [`ViperXConfig`](./config_viperx.py). It is expected to be `5` to limit the magnitude of the movement for more safety, but the teleoperation won't be smooth. When you feel confident, you can disable this limit by adding `--robot.max_relative_target=null` to the command line:
 
 ```bash
 python lerobot/scripts/control_robot.py \
diff --git a/tests/async_inference/test_robot_client.py b/tests/async_inference/test_robot_client.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """Unit-tests for the `RobotClient` action-queue logic (pure Python, no gRPC).
 
-We monkey-patch `lerobot.common.robot_devices.robots.utils.make_robot` so that
+We monkey-patch `lerobot.robots.utils.make_robot_from_config` so that
 no real hardware is accessed. Only the queue-update mechanism is verified.
 """