Implement aten.add for IntxUnpackedToInt8Tensor

jackzhxng · jackzhxng · commit 235cad9a17ce · 2025-11-06T06:44:59.000-08:00
diff --git a/test/quantization/quantize_/workflows/intx/test_intx_unpacked_to_int8_tensor.py b/test/quantization/quantize_/workflows/intx/test_intx_unpacked_to_int8_tensor.py
@@ -50,6 +50,25 @@ def test_embedding(self):
         error = compute_error(original, quantized)
         self.assertTrue(error > 20)
 
+    def test_add(self):
+        dtype = torch.bfloat16
+        device = "cpu"
+        a = torch.randint(low=0, high=128, size=(10,), device=device)
+        a_orig = a.clone()
+        b = torch.randint(low=0, high=128, size=(10,), device=device)
+        sum = a + b
+
+        quantize_(a, self.config)
+        a_quant_sum = a + b
+
+        quantize(b, self.config)
+        b_quant_sum = a_orig + b
+        a_b_quant_sum = a + b
+
+        for quantized_sum in [a_quant_sum, b_quant_sum, a_b_quant_sum]:
+            error = compute_error(original, quantized_sum)
+            self.assertTrue(error > 20)
+
     def test_linear(self):
         dtype = torch.bfloat16
         device = "cpu"
diff --git a/torchao/quantization/quantize_/workflows/intx/intx_unpacked_to_int8_tensor.py b/torchao/quantization/quantize_/workflows/intx/intx_unpacked_to_int8_tensor.py
@@ -355,6 +355,19 @@ def _(func, types, args, kwargs):
     return torch.nn.functional.embedding(indices, weight_tensor, **kwargs)
 
 
+@implements(aten.add.Tensor)
+def _(func, types, args, kwargs):
+    assert len(args) == 2
+    t1, t2 = args[0], args[1]
+    if isinstance(t1, IntxUnpackedToInt8Tensor):
+        assert t1.activation_quantization is None
+        t1 = t1.dequantize()
+    if isinstance(t2, IntxUnpackedToInt8Tensor):
+        assert t2.activation_quantization is None
+        t2 = t2.dequantize()
+    return t1 + t2
+
+
 @implements(aten.slice.Tensor)
 def _(func, types, args, kwargs):
     self, dim, start, end, step = fill_defaults(args, 5, [0, None, None, 1])