@@ -50,16 +50,14 @@ module attributes {"ttg.num-warps" = 8 : i32, "ttg.threads-per-warp" = 16 : i32}
5050 // CHECK: %[[VAL_27:.*]] = llvm.add %[[VAL_26]], %[[CST_0]] : i32
5151 // CHECK: %[[CST_32:.*]] = llvm.mlir.constant(32 : i32) : i32
5252 // CHECK: %[[VAL_28:.*]] = llvm.urem %[[VAL_27]], %[[CST_32]] : i32
53- // CHECK: %[[VAL_29 :.*]] = llvm.add %[[VAL_28]], %[[OFFSET_1]] : i32
53+ // CHECK: %[[ROW_MAJOR_OFFSET_X :.*]] = llvm.add %[[VAL_28]], %[[OFFSET_1]] : i32
5454 // CHECK: %[[CST_0:.*]] = llvm.mlir.constant(0 : i32) : i32
5555 // CHECK: %[[CST_2:.*]] = llvm.mlir.constant(2 : i32) : i32
5656 // CHECK: %[[VAL_30:.*]] = llvm.mul %[[VAL_22]], %[[CST_2]] : i32
5757 // CHECK: %[[VAL_31:.*]] = llvm.add %[[VAL_30]], %[[CST_0]] : i32
5858 // CHECK: %[[CST_16:.*]] = llvm.mlir.constant(16 : i32) : i32
5959 // CHECK: %[[VAL_32:.*]] = llvm.urem %[[VAL_31]], %[[CST_16]] : i32
60- // CHECK: %[[VAL_33:.*]] = llvm.add %[[VAL_32]], %[[OFFSET_0]] : i32
61- // CHECK: %[[ROW_MAJOR_OFFSET_Y:.*]] = llvm.trunc %[[VAL_33]] : i32 to i32
62- // CHECK: %[[ROW_MAJOR_OFFSET_X:.*]] = llvm.trunc %[[VAL_29]] : i32 to i32
60+ // CHECK: %[[ROW_MAJOR_OFFSET_Y:.*]] = llvm.add %[[VAL_32]], %[[OFFSET_0]] : i32
6361 // CHECK: triton_gen.2Dblockprefetch %[[BASE_]], %[[ROW_MAJOR_BASE_WIDTH]], %[[ROW_MAJOR_BASE_HEIGHT]], %[[ROW_MAJOR_PITCH]], %[[ROW_MAJOR_OFFSET_X]], %[[ROW_MAJOR_OFFSET_Y]] {elem_size_in_bits = 16, tile_width = 16, tile_height = 2, v_blocks = 2, cache_control = L1C_L3C}
6462 %rowMajorPtr = tt.make_tensor_ptr %arg0 , [%arg2 , %arg4 ], [%arg5 , %c1_i64 ], [%c0_i32 , %c0_i32 ] {order = array<i32 : 1 , 0 >} : <tensor <16 x32 xf16 >>
6563 ttig.prefetch %rowMajorPtr {cache = 1 : i32 , evict = 1 : i32 , isVolatile = false , ttig.block_io = " row_major" } : !tt.ptr <tensor <16 x32 xf16 >>
@@ -101,16 +99,14 @@ module attributes {"ttg.num-warps" = 8 : i32, "ttg.threads-per-warp" = 16 : i32}
10199 // CHECK: %[[VAL_27:.*]] = llvm.add %[[VAL_26]], %[[CST_0]] : i32
102100 // CHECK: %[[CST_32:.*]] = llvm.mlir.constant(32 : i32) : i32
103101 // CHECK: %[[VAL_28:.*]] = llvm.urem %[[VAL_27]], %[[CST_32]] : i32
104- // CHECK: %[[VAL_29 :.*]] = llvm.add %[[VAL_28]], %[[OFFSET_1]] : i32
102+ // CHECK: %[[COL_MAJOR_OFFSET_X :.*]] = llvm.add %[[VAL_28]], %[[OFFSET_1]] : i32
105103 // CHECK: %[[CST_0:.*]] = llvm.mlir.constant(0 : i32) : i32
106104 // CHECK: %[[CST_2:.*]] = llvm.mlir.constant(4 : i32) : i32
107105 // CHECK: %[[VAL_30:.*]] = llvm.mul %[[VAL_22]], %[[CST_2]] : i32
108106 // CHECK: %[[VAL_31:.*]] = llvm.add %[[VAL_30]], %[[CST_0]] : i32
109107 // CHECK: %[[CST_16:.*]] = llvm.mlir.constant(16 : i32) : i32
110108 // CHECK: %[[VAL_32:.*]] = llvm.urem %[[VAL_31]], %[[CST_16]] : i32
111- // CHECK: %[[VAL_33:.*]] = llvm.add %[[VAL_32]], %[[OFFSET_0]] : i32
112- // CHECK: %[[COL_MAJOR_OFFSET_Y:.*]] = llvm.trunc %[[VAL_33]] : i32 to i32
113- // CHECK: %[[COL_MAJOR_OFFSET_X:.*]] = llvm.trunc %[[VAL_29]] : i32 to i32
109+ // CHECK: %[[COL_MAJOR_OFFSET_Y:.*]] = llvm.add %[[VAL_32]], %[[OFFSET_0]] : i32
114110 // CHECK: triton_gen.2Dblockprefetch %[[BASE_]], %[[COL_MAJOR_BASE_WIDTH]], %[[COL_MAJOR_BASE_HEIGHT]], %[[COL_MAJOR_PITCH]], %[[COL_MAJOR_OFFSET_X]], %[[COL_MAJOR_OFFSET_Y]] {elem_size_in_bits = 16, tile_width = 16, tile_height = 4, v_blocks = 1, cache_control = L1C_L3C}
115111 %columnMajorPtr = tt.make_tensor_ptr %arg0 , [%arg4 , %arg2 ], [%c1_i64 , %arg5 ], [%c0_i32 , %c0_i32 ] {order = array<i32 : 0 , 1 >} : <tensor <32 x16 xf16 >>
116112 ttig.prefetch %columnMajorPtr {cache = 1 : i32 , evict = 1 : i32 , isVolatile = false , ttig.block_io = " column_major" } : !tt.ptr <tensor <32 x16 xf16 >>
0 commit comments