Use linalg.index to lower aten.reflection_pad2d (#4105)

pravg-amd · web-flow · commit 692b2c070069 · 2025-03-27T20:26:46.000+05:30
"aten.reflection_pad2d" was lowered to linalg using affine maps of the
form {indexing_maps = [affine_map&lt;(d0, d1) -&gt; (d0, -d1 + 1)&gt;,
affine_map&lt;(d0, d1) -&gt; (d0, d1)&gt;]}.

This causes lowering issues in downstream passes such as
""BinaryOpExpr(AffineBinaryOpExpr): Assertion
`cast(expr.getRHS()).getValue() &gt; 0 &amp;&amp; "nonpositive multiplying
coefficient"' failed."

Using linalg.index with tensor.extract op instead of the above affine
map helps in successful compilation of the same.

Signed-off-by: Praveen G &lt;Praveen.G2@amd.com&gt;
diff --git a/lib/Conversion/TorchToLinalg/DataMovement.cpp b/lib/Conversion/TorchToLinalg/DataMovement.cpp
@@ -434,17 +434,6 @@ class ConvertAtenReflectionPad2dOp
     for (auto v : {TOP, BOTTOM})
       tileHeight[v] = getConstant(rewriter, loc, getVPadArgument(v), indexType);
 
-    // Helper to reflect/reverse the i-th dimension of an affine map
-    // without symbols. This only works if applied on a tensor
-    // for which the corresponding dimension has a statically
-    // known size which is good enough since we only apply
-    // it to reflect the padding slices.
-    auto reflectDim = [](AffineMap map, unsigned numDims, int64_t i,
-                         int64_t size) {
-      AffineExpr d = map.getResult(i);
-      return map.replace(d, size - d - 1, numDims, 0);
-    };
-
     // Create output shape and tensor
     SmallVector<Value> resultShape{inputShape};
     resultShape[vDim] =
@@ -538,26 +527,41 @@ class ConvertAtenReflectionPad2dOp
       Value tile = rewriter.create<tensor::ExtractSliceOp>(
           loc, input, extractOffsets, extractShape, allOneStrides);
 
-      // Reverse the tile along the horizontal, vertical, or both
-      // dimensions.
       auto inputMap = AffineMap::getMultiDimIdentityMap(numDims, context);
-      if (shouldHReflect(horizontalPos)) {
-        inputMap =
-            reflectDim(inputMap, numDims, hDim, getHPadArgument(horizontalPos));
-      }
-      if (shouldVReflect(verticalPos)) {
-        inputMap =
-            reflectDim(inputMap, numDims, vDim, getVPadArgument(verticalPos));
-      }
 
-      tile = rewriter
-                 .create<linalg::GenericOp>(
-                     loc, llvm::cast<RankedTensorType>(tile.getType()), tile,
-                     tile, ArrayRef({inputMap, idMap}), iteratorTypes,
-                     [](OpBuilder &b, Location nestedLoc, ValueRange args) {
-                       b.create<linalg::YieldOp>(nestedLoc, args[0]);
-                     })
-                 .getResult(0);
+      tile =
+          rewriter
+              .create<linalg::GenericOp>(
+                  loc, llvm::cast<RankedTensorType>(tile.getType()), tile, tile,
+                  ArrayRef({inputMap, idMap}), iteratorTypes,
+                  [&](OpBuilder &b, Location nestedLoc, ValueRange args) {
+                    // Use linalg.index to reflect the dims
+                    SmallVector<Value> extractIndices(numDims);
+                    for (unsigned i = 0; i < numDims; i++)
+                      extractIndices[i] =
+                          b.create<linalg::IndexOp>(nestedLoc, i);
+
+                    auto reflectDim = [&](int64_t padSize, Value dim) {
+                      Value reflectDimSize = getConstant(
+                          rewriter, loc, padSize - 1, rewriter.getIndexType());
+                      return b.create<arith::SubIOp>(loc, reflectDimSize, dim);
+                    };
+
+                    // Reverse the tile along the horizontal, vertical, or both
+                    // dimensions.
+                    if (shouldHReflect(horizontalPos))
+                      extractIndices[hDim] = reflectDim(
+                          getHPadArgument(horizontalPos), extractIndices[hDim]);
+
+                    if (shouldVReflect(verticalPos))
+                      extractIndices[vDim] = reflectDim(
+                          getVPadArgument(verticalPos), extractIndices[vDim]);
+
+                    Value extractValue = rewriter.create<tensor::ExtractOp>(
+                        nestedLoc, tile, extractIndices);
+                    b.create<linalg::YieldOp>(nestedLoc, extractValue);
+                  })
+              .getResult(0);
 
       // Insert the tile in the resultTensor.
       SmallVector<Value> insertOffsets(numDims, zero);
diff --git a/test/Conversion/TorchToLinalg/datamovement.mlir b/test/Conversion/TorchToLinalg/datamovement.mlir
@@ -32,3 +32,43 @@ func.func @torch.aten.permute$rank0(%arg0: !torch.vtensor<[],f32>) -> !torch.vte
   %1 = torch.aten.permute %arg0, %0 : !torch.vtensor<[],f32>, !torch.list<int> -> !torch.vtensor<[],f32>
   return %1 : !torch.vtensor<[],f32>
 }
+
+// -----
+
+// CHECK: #[[$INPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
+// CHECK-LABEL:   func.func @torch.aten.reflection_pad2d(
+// CHECK-SAME:                                           %[[VAL_0:.*]]: !torch.vtensor<[1,1,4,4],f32>) -> !torch.vtensor<[1,1,8,9],f32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = torch_c.to_builtin_tensor %[[VAL_0]] : !torch.vtensor<[1,1,4,4],f32> -> tensor<1x1x4x4xf32>
+// CHECK:           %[[VAL_5:.*]] = tensor.empty() : tensor<1x1x8x9xf32>
+// CHECK:           %[[VAL_6:.*]] = linalg.fill ins(%[[VAL_1]] : f32) outs(%[[VAL_5]] : tensor<1x1x8x9xf32>) -> tensor<1x1x8x9xf32>
+// CHECK:           %[[VAL_7:.*]] = tensor.extract_slice %[[VAL_4]][0, 0, 1, 1] [1, 1, 2, 2] [1, 1, 1, 1] : tensor<1x1x4x4xf32> to tensor<1x1x2x2xf32>
+// CHECK:           %[[VAL_8:.*]] = tensor.extract_slice %[[VAL_4]][0, 0, 1, 1] [1, 1, 2, 2] [1, 1, 1, 1] : tensor<1x1x4x4xf32> to tensor<1x1x2x2xf32>
+// CHECK:           %[[VAL_9:.*]] = tensor.extract_slice %[[VAL_4]][0, 0, 1, 1] [1, 1, 2, 2] [1, 1, 1, 1] : tensor<1x1x4x4xf32> to tensor<1x1x2x2xf32>
+// CHECK:           %[[VAL_10:.*]] = linalg.generic {indexing_maps = [#[[$INPUT_MAP]], #[[$INPUT_MAP]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[VAL_9]] : tensor<1x1x2x2xf32>) outs(%[[VAL_8]] : tensor<1x1x2x2xf32>) {
+// CHECK:           ^bb0(%[[VAL_11:.*]]: f32, %[[VAL_12:.*]]: f32):
+// CHECK:             %[[VAL_13:.*]] = linalg.index 0 : index
+// CHECK:             %[[VAL_14:.*]] = linalg.index 1 : index
+// CHECK:             %[[VAL_15:.*]] = linalg.index 2 : index
+// CHECK:             %[[VAL_16:.*]] = linalg.index 3 : index
+// CHECK:             %[[VAL_17:.*]] = arith.subi %[[VAL_3]], %[[VAL_16]] : index
+// CHECK:             %[[VAL_18:.*]] = arith.subi %[[VAL_3]], %[[VAL_15]] : index
+// CHECK:             %[[VAL_19:.*]] = tensor.extract %[[VAL_7]]{{\[}}%[[VAL_13]], %[[VAL_14]], %[[VAL_18]], %[[VAL_17]]] : tensor<1x1x2x2xf32>
+// CHECK:             linalg.yield %[[VAL_19]] : f32
+// CHECK:           } -> tensor<1x1x2x2xf32>
+// CHECK:           %[[VAL_20:.*]] = tensor.insert_slice %[[VAL_10]] into %[[VAL_6]][0, 0, 0, 0] [1, 1, 2, 2] [1, 1, 1, 1] : tensor<1x1x2x2xf32> into tensor<1x1x8x9xf32>
+// CHECK-COUNT-8:   linalg.generic
+// CHECK:           %[[VAL_123:.*]] = tensor.insert_slice
+// CHECK:           %[[VAL_124:.*]] = torch_c.from_builtin_tensor %[[VAL_123]] : tensor<1x1x8x9xf32> -> !torch.vtensor<[1,1,8,9],f32>
+// CHECK:           return %[[VAL_124]] : !torch.vtensor<[1,1,8,9],f32>
+// CHECK:         }
+
+func.func @torch.aten.reflection_pad2d(%arg0: !torch.vtensor<[1,1,4,4],f32>) -> !torch.vtensor<[1,1,8,9],f32>  {
+  %int2 = torch.constant.int 2
+  %int3 = torch.constant.int 3
+  %0 = torch.prim.ListConstruct %int2, %int3, %int2, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
+  %1 = torch.aten.reflection_pad2d %arg0, %0 : !torch.vtensor<[1,1,4,4],f32>, !torch.list<int> -> !torch.vtensor<[1,1,8,9],f32>
+  return %1 : !torch.vtensor<[1,1,8,9],f32>
+}