Skip to content

Commit 4155e87

Browse files
committed
[AArch64] Add bf16 fpext and fpround costs.
This prevents them from generating Invalid costs, as generating the instructions seems to work fine with and without +bf16. The costs are mostly taken from the number of instructions (minus ptrue and constants) from https://llvm.godbolt.org/z/16zMd47he
1 parent 526b672 commit 4155e87

File tree

3 files changed

+50
-17
lines changed

3 files changed

+50
-17
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3093,6 +3093,12 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
30933093
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn
30943094
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn
30953095
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn
3096+
{ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 1}, // bfcvt
3097+
{ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 1}, // bfcvt
3098+
{ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 3}, // bfcvt+bfcvt+uzp1
3099+
{ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 2}, // fcvtx+bfcvt
3100+
{ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 5}, // fcvtx+bfcvt+bfcvt+uzp1
3101+
{ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 11}, // 4*fcvt+4*bfcvt+3*uzp
30963102
};
30973103

30983104
if (ST->hasBF16())
@@ -3501,11 +3507,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
35013507
{ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1},
35023508
{ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3},
35033509

3510+
// Truncate from nxvmf32 to nxvmbf16.
3511+
{ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 8},
3512+
{ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 8},
3513+
{ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 17},
3514+
35043515
// Truncate from nxvmf64 to nxvmf16.
35053516
{ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1},
35063517
{ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3},
35073518
{ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7},
35083519

3520+
// Truncate from nxvmf64 to nxvmbf16.
3521+
{ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 9},
3522+
{ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 19},
3523+
{ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 39},
3524+
35093525
// Truncate from nxvmf64 to nxvmf32.
35103526
{ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1},
35113527
{ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3},
@@ -3516,11 +3532,22 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
35163532
{ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
35173533
{ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
35183534

3535+
// Extend from nxvmbf16 to nxvmf32.
3536+
{ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1}, // lsl
3537+
{ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1}, // lsl
3538+
{ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 2}, // unpck+unpck+lsl+lsl
3539+
35193540
// Extend from nxvmf16 to nxvmf64.
35203541
{ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
35213542
{ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
35223543
{ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
35233544

3545+
// Extend from nxvmbf16 to nxvmf64.
3546+
{ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2}, // lsl+fcvt
3547+
{ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6}, // unpck+unpck+lsl+lsl
3548+
// + fcvt+fcvt
3549+
{ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14}, // 6*unpck+4*lsl+4*fcvt
3550+
35243551
// Extend from nxvmf32 to nxvmf64.
35253552
{ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
35263553
{ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},

llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ define void @sve_fpext() {
3434

3535
define void @sve_fpext_bf16() {
3636
; CHECK-LABEL: 'sve_fpext_bf16'
37-
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
38-
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float>
39-
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float>
40-
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double>
41-
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double>
42-
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double>
37+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
38+
; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float>
39+
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float>
40+
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double>
41+
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double>
42+
; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double>
4343
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
4444
;
4545
%nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>

llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,23 @@ define void @sve_fptruncs() {
3434
}
3535

3636
define void @sve_fptruncs_bf16() {
37-
; CHECK-LABEL: 'sve_fptruncs_bf16'
38-
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
39-
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
40-
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
41-
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
42-
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
43-
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
44-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
37+
; CHECK-NOBF16-LABEL: 'sve_fptruncs_bf16'
38+
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
39+
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
40+
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
41+
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
42+
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
43+
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
44+
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
45+
;
46+
; CHECK-BF16-LABEL: 'sve_fptruncs_bf16'
47+
; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
48+
; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
49+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
50+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
51+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
52+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
53+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
4554
;
4655
%nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
4756
%nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
@@ -53,6 +62,3 @@ define void @sve_fptruncs_bf16() {
5362

5463
ret void
5564
}
56-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
57-
; CHECK-BF16: {{.*}}
58-
; CHECK-NOBF16: {{.*}}

0 commit comments

Comments
 (0)