Skip to content

Commit ef71c66

Browse files
committed
Add sve1only invalid costs
1 parent 90fb2e3 commit ef71c66

File tree

3 files changed

+29
-14
lines changed

3 files changed

+29
-14
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3092,6 +3092,12 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
30923092
return AdjustCost(
30933093
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
30943094

3095+
// For the moment we do not have lowering for SVE1-only fptrunc f64->bf16 as
3096+
// we use fcvtx undef SVE2. Give them invalid costs.
3097+
if (!ST->hasSVE2() && ISD == ISD::FP_ROUND && SrcTy.isScalableVector() &&
3098+
DstTy.getScalarType() == MVT::bf16 && SrcTy.getScalarType() == MVT::f64)
3099+
return InstructionCost::getInvalid();
3100+
30953101
static const TypeConversionCostTblEntry BF16Tbl[] = {
30963102
{ISD::FP_ROUND, MVT::bf16, MVT::f32, 1}, // bfcvt
30973103
{ISD::FP_ROUND, MVT::bf16, MVT::f64, 1}, // bfcvt
@@ -3542,7 +3548,7 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
35423548
// Extend from nxvmbf16 to nxvmf32.
35433549
{ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1}, // lsl
35443550
{ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1}, // lsl
3545-
{ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 2}, // unpck+unpck+lsl+lsl
3551+
{ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4}, // unpck+unpck+lsl+lsl
35463552

35473553
// Extend from nxvmf16 to nxvmf64.
35483554
{ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
@@ -3551,8 +3557,7 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
35513557

35523558
// Extend from nxvmbf16 to nxvmf64.
35533559
{ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2}, // lsl+fcvt
3554-
{ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6}, // unpck+unpck+lsl+lsl
3555-
// + fcvt+fcvt
3560+
{ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6}, // 2*unpck+2*lsl+2*fcvt
35563561
{ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14}, // 6*unpck+4*lsl+4*fcvt
35573562

35583563
// Extend from nxvmf32 to nxvmf64.

llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ define void @sve_fpext_bf16() {
3636
; CHECK-LABEL: 'sve_fpext_bf16'
3737
; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
3838
; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float>
39-
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float>
39+
; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float>
4040
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double>
4141
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double>
4242
; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double>

llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 3
2-
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOBF16
3-
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve,+bf16 -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BF16
2+
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
3+
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve2 -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE2
4+
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve2,+bf16 -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BF16
45

56
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
67
target triple = "aarch64-unknown-linux-gnu"
@@ -34,14 +35,23 @@ define void @sve_fptruncs() {
3435
}
3536

3637
define void @sve_fptruncs_bf16() {
37-
; CHECK-NOBF16-LABEL: 'sve_fptruncs_bf16'
38-
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
39-
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
40-
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
41-
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
42-
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
43-
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
44-
; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
38+
; CHECK-SVE-LABEL: 'sve_fptruncs_bf16'
39+
; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
40+
; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
41+
; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
42+
; CHECK-SVE-NEXT: Cost Model: Found costs of Invalid for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
43+
; CHECK-SVE-NEXT: Cost Model: Found costs of Invalid for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
44+
; CHECK-SVE-NEXT: Cost Model: Found costs of Invalid for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
45+
; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
46+
;
47+
; CHECK-SVE2-LABEL: 'sve_fptruncs_bf16'
48+
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
49+
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
50+
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
51+
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
52+
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
53+
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
54+
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
4555
;
4656
; CHECK-BF16-LABEL: 'sve_fptruncs_bf16'
4757
; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>

0 commit comments

Comments
 (0)