Fix(backend/torch): Resolve MPS broadcast crash in binary_crossentropy

Mithil27360 · Mithil27360 · commit 206d7e253b62 · 2025-11-02T16:25:34.000+05:30
diff --git a/examples/mre_test.py b/examples/mre_test.py
@@ -0,0 +1,26 @@
+import os
+
+os.environ["KERAS_BACKEND"] = "torch"
+import numpy as np
+import keras
+from keras import layers, Model
+import torch
+
+print("Keras:", keras.__version__)
+print("PyTorch:", torch.__version__)
+print("MPS available:", torch.backends.mps.is_available())
+
+B, T, F = 32, 50, 10
+inp = layers.Input(shape=(T, F))
+x = layers.Dense(1, activation=None)(inp)  # (B,T,1)
+x = layers.Activation("sigmoid")(x)
+model = Model(inp, x)
+
+model.compile(optimizer="adam", loss="binary_crossentropy")
+
+X = np.random.randn(B, T, F).astype(np.float32)
+y = np.random.randint(0, 2, (B, T, 1)).astype(np.float32)
+
+print("X:", X.shape, "y:", y.shape)
+model.fit(X, y, epochs=1, batch_size=32, verbose=1)
+print("\n✅ Test finished successfully! The fix works.")
diff --git a/keras/src/backend/torch/nn.py b/keras/src/backend/torch/nn.py
@@ -755,12 +755,27 @@ def binary_crossentropy(target, output, from_logits=False):
     target = convert_to_tensor(target)
     output = convert_to_tensor(output)
 
+    # Fix for MPS broadcast error:
+    # The backward pass for BCELoss on MPS fails if inputs have a
+    # trailing dim of 1 (e.g., (B, T, 1) or (B, H, W, 1)).
+    # Squeezing to (B, T) or (B, H, W) resolves the conflict.
+    # .contiguous() is added to force a new tensor copy.
+    if (
+        target.ndim > 1
+        and output.ndim == target.ndim
+        and target.shape[-1] == 1
+        and output.shape[-1] == 1
+    ):
+        target = torch.squeeze(target, -1).contiguous()
+        output = torch.squeeze(output, -1).contiguous()
+
     if target.shape != output.shape:
         raise ValueError(
             "Arguments `target` and `output` must have the same shape. "
             "Received: "
             f"target.shape={target.shape}, output.shape={output.shape}"
         )
+
     # By default, PyTorch, does reduction of `sum` over all rows,
     # change reduction to `none` to keep dim
     if from_logits:
@@ -771,7 +786,6 @@ def binary_crossentropy(target, output, from_logits=False):
         output = torch.clip(output, backend.epsilon(), 1.0 - backend.epsilon())
         return tnn.binary_cross_entropy(output, target, reduction="none")
 
-
 def moments(x, axes, keepdims=False, synchronized=False):
     if synchronized:
         raise NotImplementedError(