Fix(backend/torch): Resolve MPS broadcast crash in binary_crossentropy

Mithil27360 · Mithil27360 · commit d2f436446756 · 2025-11-02T16:12:10.000+05:30
diff --git a/keras/src/backend/torch/nn.py b/keras/src/backend/torch/nn.py
@@ -755,12 +755,27 @@ def binary_crossentropy(target, output, from_logits=False):
     target = convert_to_tensor(target)
     output = convert_to_tensor(output)
 
+    # Fix for MPS broadcast error:
+    # The backward pass for BCELoss on MPS fails if inputs are (B, T, 1).
+    # We squeeze both to (B, T).
+    # .contiguous() is added to force a new tensor copy, as the backward
+    # pass seems to be using the original tensor's shape (a view bug).
+    if (
+        target.ndim == 3
+        and target.shape[-1] == 1
+        and output.ndim == 3
+        and output.shape[-1] == 1
+    ):
+        target = torch.squeeze(target, -1).contiguous()
+        output = torch.squeeze(output, -1).contiguous()
+
     if target.shape != output.shape:
         raise ValueError(
             "Arguments `target` and `output` must have the same shape. "
             "Received: "
             f"target.shape={target.shape}, output.shape={output.shape}"
         )
+    
     # By default, PyTorch, does reduction of `sum` over all rows,
     # change reduction to `none` to keep dim
     if from_logits:
@@ -771,7 +786,6 @@ def binary_crossentropy(target, output, from_logits=False):
         output = torch.clip(output, backend.epsilon(), 1.0 - backend.epsilon())
         return tnn.binary_cross_entropy(output, target, reduction="none")
 
-
 def moments(x, axes, keepdims=False, synchronized=False):
     if synchronized:
         raise NotImplementedError(