Skip to content

Commit af1d3af

Browse files
RKSimonmahesh-attarde
authored andcommitted
[CostModel][X86] Update SK_Reverse based on cost kinds (llvm#150650)
When these were converted to CostKindTblEntry the throughput was mainly copied to all cost kinds Regenerated with my check_cost_tables.py helper script
1 parent fe21605 commit af1d3af

File tree

3 files changed

+140
-139
lines changed

3 files changed

+140
-139
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1842,10 +1842,11 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
18421842
{ TTI::SK_Broadcast, MVT::v32f16, { 1, 3, 1, 1 } }, // vpbroadcastw
18431843
{ TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb
18441844

1845-
{ TTI::SK_Reverse, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
1846-
{ TTI::SK_Reverse, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
1845+
{ TTI::SK_Reverse, MVT::v32i16, { 2, 6, 2, 4 } }, // vpermw
1846+
{ TTI::SK_Reverse, MVT::v32f16, { 2, 6, 2, 4 } }, // vpermw
18471847
{ TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vpermw
1848-
{ TTI::SK_Reverse, MVT::v64i8, { 2, 2, 2, 2 } }, // pshufb + vshufi64x2
1848+
{ TTI::SK_Reverse, MVT::v16f16, { 2, 2, 2, 2 } }, // vpermw
1849+
{ TTI::SK_Reverse, MVT::v64i8, { 2, 9, 2, 3 } }, // pshufb + vshufi64x2
18491850

18501851
{ TTI::SK_PermuteSingleSrc, MVT::v32i16, { 2, 2, 2, 2 } }, // vpermw
18511852
{ TTI::SK_PermuteSingleSrc, MVT::v32f16, { 2, 2, 2, 2 } }, // vpermw
@@ -1889,10 +1890,10 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
18891890
{TTI::SK_Broadcast, MVT::v64i8, { 1, 3, 1, 1 } }, // vpbroadcastb
18901891
{TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 1 }}, // vpbroadcastb
18911892

1892-
{TTI::SK_Reverse, MVT::v8f64, { 1, 3, 1, 1 } }, // vpermpd
1893-
{TTI::SK_Reverse, MVT::v16f32, { 1, 3, 1, 1 } }, // vpermps
1894-
{TTI::SK_Reverse, MVT::v8i64, { 1, 3, 1, 1 } }, // vpermq
1895-
{TTI::SK_Reverse, MVT::v16i32, { 1, 3, 1, 1 } }, // vpermd
1893+
{TTI::SK_Reverse, MVT::v8f64, { 1, 5, 2, 3 } }, // vpermpd
1894+
{TTI::SK_Reverse, MVT::v16f32, { 1, 3, 2, 3 } }, // vpermps
1895+
{TTI::SK_Reverse, MVT::v8i64, { 1, 5, 2, 3 } }, // vpermq
1896+
{TTI::SK_Reverse, MVT::v16i32, { 1, 3, 2, 3 } }, // vpermd
18961897
{TTI::SK_Reverse, MVT::v32i16, { 7, 7, 7, 7 } }, // per mca
18971898
{TTI::SK_Reverse, MVT::v32f16, { 7, 7, 7, 7 } }, // per mca
18981899
{TTI::SK_Reverse, MVT::v64i8, { 7, 7, 7, 7 } }, // per mca
@@ -1991,13 +1992,13 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
19911992
{ TTI::SK_Broadcast, MVT::v32i8, { 1, 3, 1, 2 } }, // vpbroadcastb
19921993
{ TTI::SK_Broadcast, MVT::v16i8, { 1, 3, 1, 1 } }, // vpbroadcastb
19931994

1994-
{ TTI::SK_Reverse, MVT::v4f64, { 1, 1, 1, 1 } }, // vpermpd
1995-
{ TTI::SK_Reverse, MVT::v8f32, { 1, 1, 1, 1 } }, // vpermps
1996-
{ TTI::SK_Reverse, MVT::v4i64, { 1, 1, 1, 1 } }, // vpermq
1997-
{ TTI::SK_Reverse, MVT::v8i32, { 1, 1, 1, 1 } }, // vpermd
1998-
{ TTI::SK_Reverse, MVT::v16i16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
1999-
{ TTI::SK_Reverse, MVT::v16f16, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
2000-
{ TTI::SK_Reverse, MVT::v32i8, { 2, 2, 2, 2 } }, // vperm2i128 + pshufb
1995+
{ TTI::SK_Reverse, MVT::v4f64, { 1, 6, 1, 2 } }, // vpermpd
1996+
{ TTI::SK_Reverse, MVT::v8f32, { 2, 7, 2, 4 } }, // vpermps
1997+
{ TTI::SK_Reverse, MVT::v4i64, { 1, 6, 1, 2 } }, // vpermq
1998+
{ TTI::SK_Reverse, MVT::v8i32, { 2, 7, 2, 4 } }, // vpermd
1999+
{ TTI::SK_Reverse, MVT::v16i16, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb
2000+
{ TTI::SK_Reverse, MVT::v16f16, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb
2001+
{ TTI::SK_Reverse, MVT::v32i8, { 2, 9, 2, 4 } }, // vperm2i128 + pshufb
20012002

20022003
{ TTI::SK_Select, MVT::v16i16, { 1, 1, 1, 1 } }, // vpblendvb
20032004
{ TTI::SK_Select, MVT::v16f16, { 1, 1, 1, 1 } }, // vpblendvb
@@ -2095,15 +2096,15 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
20952096
{TTI::SK_Broadcast, MVT::v16f16, {2,3,3,4}}, // vpshuflw + vpshufd + vinsertf128
20962097
{TTI::SK_Broadcast, MVT::v32i8, {3,4,3,6}}, // vpshufb + vinsertf128
20972098

2098-
{TTI::SK_Reverse, MVT::v4f64, {2,2,2,2}}, // vperm2f128 + vpermilpd
2099-
{TTI::SK_Reverse, MVT::v8f32, {2,2,2,2}}, // vperm2f128 + vpermilps
2100-
{TTI::SK_Reverse, MVT::v4i64, {2,2,2,2}}, // vperm2f128 + vpermilpd
2101-
{TTI::SK_Reverse, MVT::v8i32, {2,2,2,2}}, // vperm2f128 + vpermilps
2102-
{TTI::SK_Reverse, MVT::v16i16, {4,4,4,4}}, // vextractf128 + 2*pshufb
2099+
{TTI::SK_Reverse, MVT::v4f64, {2,6,2,2}}, // vperm2f128 + vpermilpd
2100+
{TTI::SK_Reverse, MVT::v8f32, {2,7,2,4}}, // vperm2f128 + vpermilps
2101+
{TTI::SK_Reverse, MVT::v4i64, {2,6,2,2}}, // vperm2f128 + vpermilpd
2102+
{TTI::SK_Reverse, MVT::v8i32, {2,7,2,4}}, // vperm2f128 + vpermilps
2103+
{TTI::SK_Reverse, MVT::v16i16, {2,9,5,5}}, // vextractf128 + 2*pshufb
21032104
// + vinsertf128
2104-
{TTI::SK_Reverse, MVT::v16f16, {4,4,4,4}}, // vextractf128 + 2*pshufb
2105+
{TTI::SK_Reverse, MVT::v16f16, {2,9,5,5}}, // vextractf128 + 2*pshufb
21052106
// + vinsertf128
2106-
{TTI::SK_Reverse, MVT::v32i8, {4,4,4,4}}, // vextractf128 + 2*pshufb
2107+
{TTI::SK_Reverse, MVT::v32i8, {2,9,5,5}}, // vextractf128 + 2*pshufb
21072108
// + vinsertf128
21082109

21092110
{TTI::SK_Select, MVT::v4i64, {1,1,1,1}}, // vblendpd
@@ -2170,9 +2171,9 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
21702171
{TTI::SK_Broadcast, MVT::v8f16, {1, 3, 2, 2}}, // pshufb
21712172
{TTI::SK_Broadcast, MVT::v16i8, {1, 3, 2, 2}}, // pshufb
21722173

2173-
{TTI::SK_Reverse, MVT::v8i16, {1, 1, 1, 1}}, // pshufb
2174-
{TTI::SK_Reverse, MVT::v8f16, {1, 1, 1, 1}}, // pshufb
2175-
{TTI::SK_Reverse, MVT::v16i8, {1, 1, 1, 1}}, // pshufb
2174+
{TTI::SK_Reverse, MVT::v8i16, {1, 2, 1, 2}}, // pshufb
2175+
{TTI::SK_Reverse, MVT::v8f16, {1, 2, 1, 2}}, // pshufb
2176+
{TTI::SK_Reverse, MVT::v16i8, {1, 2, 1, 2}}, // pshufb
21762177

21772178
{TTI::SK_Select, MVT::v8i16, {3, 3, 3, 3}}, // 2*pshufb + por
21782179
{TTI::SK_Select, MVT::v8f16, {3, 3, 3, 3}}, // 2*pshufb + por
@@ -2209,9 +2210,9 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
22092210
{TTI::SK_Reverse, MVT::v2f64, {1, 1, 1, 1}}, // shufpd
22102211
{TTI::SK_Reverse, MVT::v2i64, {1, 1, 1, 1}}, // pshufd
22112212
{TTI::SK_Reverse, MVT::v4i32, {1, 1, 1, 1}}, // pshufd
2212-
{TTI::SK_Reverse, MVT::v8i16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
2213-
{TTI::SK_Reverse, MVT::v8f16, {3, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
2214-
{TTI::SK_Reverse, MVT::v16i8, {9, 9, 9, 9}}, // 2*pshuflw + 2*pshufhw
2213+
{TTI::SK_Reverse, MVT::v8i16, {2, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
2214+
{TTI::SK_Reverse, MVT::v8f16, {2, 3, 3, 3}}, // pshuflw + pshufhw + pshufd
2215+
{TTI::SK_Reverse, MVT::v16i8, {5, 6,11,11}}, // 2*pshuflw + 2*pshufhw
22152216
// + 2*pshufd + 2*unpck + packus
22162217

22172218
{TTI::SK_Select, MVT::v2i64, {1, 1, 1, 1}}, // movsd

llvm/test/Analysis/CostModel/X86/shuffle-reverse-fp16.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33

44
define void @test_vXf16(<2 x half> %src32, <4 x half> %src64, <8 x half> %src128, <16 x half> %src256, <32 x half> %src512) {
55
; CHECK-LABEL: 'test_vXf16'
6-
; CHECK-NEXT: Cost Model: Found costs of 1 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 0>
7-
; CHECK-NEXT: Cost Model: Found costs of 1 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
8-
; CHECK-NEXT: Cost Model: Found costs of 1 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
6+
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 0>
7+
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V64 = shufflevector <4 x half> %src64, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
8+
; CHECK-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %V128 = shufflevector <8 x half> %src128, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
99
; CHECK-NEXT: Cost Model: Found costs of 2 for: %V256 = shufflevector <16 x half> %src256, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
10-
; CHECK-NEXT: Cost Model: Found costs of 2 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
10+
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:6 SizeLat:4 for: %V512 = shufflevector <32 x half> %src512, <32 x half> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1111
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
1212
;
1313
%V32 = shufflevector <2 x half> %src32, <2 x half> undef, <2 x i32> <i32 1, i32 0>

0 commit comments

Comments
 (0)