|
16 | 16 | from torchrl.envs.llm.transforms.kl import RetrieveLogProb |
17 | 17 | from torchrl.modules.llm import TransformersWrapper, vLLMWrapper |
18 | 18 | from torchrl.modules.llm.policies.common import ChatHistory, Masks, Text, Tokens |
19 | | -from torchrl.objectives.llm.grpo import GRPOLoss, MCAdvantage |
| 19 | +from torchrl.objectives.llm.grpo import ( |
| 20 | + CISPO, |
| 21 | + CISPOLossOutput, |
| 22 | + GRPOLoss, |
| 23 | + GRPOLossOutput, |
| 24 | + MCAdvantage, |
| 25 | +) |
20 | 26 | from torchrl.objectives.llm.sft import SFTLoss |
21 | 27 |
|
22 | 28 | _has_transformers = importlib.util.find_spec("transformers") is not None |
@@ -203,7 +209,6 @@ def test_grpo(self, mock_transformer_model, dapo): |
203 | 209 | loss_vals = loss_fn(data) |
204 | 210 |
|
205 | 211 | # Assertions: Check output type and structure |
206 | | - from torchrl.objectives.llm.grpo import GRPOLossOutput |
207 | 212 |
|
208 | 213 | assert isinstance( |
209 | 214 | loss_vals, GRPOLossOutput |
@@ -240,6 +245,68 @@ def test_grpo(self, mock_transformer_model, dapo): |
240 | 245 | 0 <= loss_vals.clip_fraction <= 1 |
241 | 246 | ), f"clip_fraction out of range: {loss_vals.clip_fraction}" |
242 | 247 |
|
| 248 | + def test_cispo(self, mock_transformer_model): |
| 249 | + """Test CISPO loss computation with mock models.""" |
| 250 | + vocab_size = 1024 |
| 251 | + device = torch.device("cpu") |
| 252 | + eps = 0.20 |
| 253 | + |
| 254 | + # Create mock model and wrap it |
| 255 | + model = mock_transformer_model(vocab_size=vocab_size, device=device) |
| 256 | + actor_network = TransformersWrapper( |
| 257 | + model, |
| 258 | + generate=False, |
| 259 | + pad_output=True, |
| 260 | + input_mode="history", |
| 261 | + ) |
| 262 | + |
| 263 | + # Create loss module |
| 264 | + |
| 265 | + loss_fn = CISPO(actor_network, clip_epsilon=eps) |
| 266 | + |
| 267 | + # Create fake data |
| 268 | + data = _mock_data_grpo(vocab_size=vocab_size, device=device) |
| 269 | + |
| 270 | + # Compute loss |
| 271 | + loss_vals = loss_fn(data) |
| 272 | + |
| 273 | + # Assertions: Check output type and structure |
| 274 | + |
| 275 | + assert isinstance( |
| 276 | + loss_vals, CISPOLossOutput |
| 277 | + ), f"Expected CISPOLossOutput, got {type(loss_vals)}" |
| 278 | + |
| 279 | + # Check that all expected keys are present (same as GRPO) |
| 280 | + assert hasattr(loss_vals, "loss_objective"), "Missing loss_objective" |
| 281 | + assert hasattr(loss_vals, "clip_fraction"), "Missing clip_fraction" |
| 282 | + assert hasattr(loss_vals, "kl_approx"), "Missing kl_approx" |
| 283 | + assert hasattr(loss_vals, "ESS"), "Missing ESS" |
| 284 | + assert hasattr(loss_vals, "entropy"), "Missing entropy" |
| 285 | + assert hasattr(loss_vals, "loss_entropy"), "Missing loss_entropy" |
| 286 | + |
| 287 | + # Check tensor shapes (all losses should be scalars after reduction) |
| 288 | + assert ( |
| 289 | + loss_vals.loss_objective.shape == () |
| 290 | + ), f"loss_objective should be scalar, got {loss_vals.loss_objective.shape}" |
| 291 | + assert ( |
| 292 | + loss_vals.clip_fraction.shape == () |
| 293 | + ), f"clip_fraction should be scalar, got {loss_vals.clip_fraction.shape}" |
| 294 | + assert ( |
| 295 | + loss_vals.kl_approx.shape == () |
| 296 | + ), f"kl_approx should be scalar, got {loss_vals.kl_approx.shape}" |
| 297 | + assert ( |
| 298 | + loss_vals.ESS.shape == () |
| 299 | + ), f"ESS should be scalar, got {loss_vals.ESS.shape}" |
| 300 | + |
| 301 | + # Check that losses are finite |
| 302 | + assert torch.isfinite(loss_vals.loss_objective), "loss_objective is not finite" |
| 303 | + assert torch.isfinite(loss_vals.ESS), "ESS is not finite" |
| 304 | + |
| 305 | + # Check that clip_fraction is in valid range [0, 1] |
| 306 | + assert ( |
| 307 | + 0 <= loss_vals.clip_fraction <= 1 |
| 308 | + ), f"clip_fraction out of range: {loss_vals.clip_fraction}" |
| 309 | + |
243 | 310 |
|
244 | 311 | class TestSFT: |
245 | 312 | @pytest.fixture(scope="class") |
|
0 commit comments