Skip to content

[AArch64] Add sve bf16 fpext and fpround costs. #150485

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3092,6 +3092,13 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
return AdjustCost(
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));

// For the moment we do not have lowering for SVE1-only fptrunc f64->bf16 as
// we use fcvtx under SVE2. Give them invalid costs.
if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
ISD == ISD::FP_ROUND && SrcTy.isScalableVector() &&
DstTy.getScalarType() == MVT::bf16 && SrcTy.getScalarType() == MVT::f64)
return InstructionCost::getInvalid();

static const TypeConversionCostTblEntry BF16Tbl[] = {
{ISD::FP_ROUND, MVT::bf16, MVT::f32, 1}, // bfcvt
{ISD::FP_ROUND, MVT::bf16, MVT::f64, 1}, // bfcvt
Expand All @@ -3100,6 +3107,12 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn
{ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 1}, // bfcvt
{ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 1}, // bfcvt
{ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 3}, // bfcvt+bfcvt+uzp1
{ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 2}, // fcvtx+bfcvt
{ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 5}, // 2*fcvtx+2*bfcvt+uzp1
{ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 11}, // 4*fcvt+4*bfcvt+3*uzp
};

if (ST->hasBF16())
Expand Down Expand Up @@ -3508,11 +3521,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1},
{ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3},

// Truncate from nxvmf32 to nxvmbf16.
{ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 8},
{ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 8},
{ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 17},

// Truncate from nxvmf64 to nxvmf16.
{ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1},
{ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3},
{ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7},

// Truncate from nxvmf64 to nxvmbf16.
{ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 9},
{ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 19},
{ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 39},

// Truncate from nxvmf64 to nxvmf32.
{ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1},
{ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3},
Expand All @@ -3523,11 +3546,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
{ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},

// Extend from nxvmbf16 to nxvmf32.
{ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1}, // lsl
{ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1}, // lsl
{ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4}, // unpck+unpck+lsl+lsl

// Extend from nxvmf16 to nxvmf64.
{ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
{ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
{ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},

// Extend from nxvmbf16 to nxvmf64.
{ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2}, // lsl+fcvt
{ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6}, // 2*unpck+2*lsl+2*fcvt
{ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14}, // 6*unpck+4*lsl+4*fcvt

// Extend from nxvmf32 to nxvmf64.
{ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
{ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ define void @sve_fpext() {

define void @sve_fpext_bf16() {
; CHECK-LABEL: 'sve_fpext_bf16'
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float>
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float>
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double>
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double>
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double>
; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float>
; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float>
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double>
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double>
; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double>
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
Expand Down
42 changes: 29 additions & 13 deletions llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 3
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOBF16
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve,+bf16 -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BF16
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve2 -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE2
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve2,+bf16 -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BF16

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
Expand Down Expand Up @@ -34,14 +35,32 @@ define void @sve_fptruncs() {
}

define void @sve_fptruncs_bf16() {
; CHECK-LABEL: 'sve_fptruncs_bf16'
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
; CHECK-SVE-LABEL: 'sve_fptruncs_bf16'
; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
; CHECK-SVE-NEXT: Cost Model: Found costs of Invalid for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
; CHECK-SVE-NEXT: Cost Model: Found costs of Invalid for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
; CHECK-SVE-NEXT: Cost Model: Found costs of Invalid for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE2-LABEL: 'sve_fptruncs_bf16'
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-BF16-LABEL: 'sve_fptruncs_bf16'
; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
%nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
Expand All @@ -53,6 +72,3 @@ define void @sve_fptruncs_bf16() {

ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-BF16: {{.*}}
; CHECK-NOBF16: {{.*}}
Loading