diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index e1adc0b07dd21..028f476a767b4 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3092,6 +3092,13 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, return AdjustCost( BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); + // For the moment we do not have lowering for SVE1-only fptrunc f64->bf16 as + // we use fcvtx under SVE2. Give them invalid costs. + if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() && + ISD == ISD::FP_ROUND && SrcTy.isScalableVector() && + DstTy.getScalarType() == MVT::bf16 && SrcTy.getScalarType() == MVT::f64) + return InstructionCost::getInvalid(); + static const TypeConversionCostTblEntry BF16Tbl[] = { {ISD::FP_ROUND, MVT::bf16, MVT::f32, 1}, // bfcvt {ISD::FP_ROUND, MVT::bf16, MVT::f64, 1}, // bfcvt @@ -3100,6 +3107,12 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn + {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 1}, // bfcvt + {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 1}, // bfcvt + {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 3}, // bfcvt+bfcvt+uzp1 + {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 2}, // fcvtx+bfcvt + {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 5}, // 2*fcvtx+2*bfcvt+uzp1 + {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 11}, // 4*fcvt+4*bfcvt+3*uzp }; if (ST->hasBF16()) @@ -3508,11 +3521,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, {ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1}, {ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3}, + // Truncate from nxvmf32 to nxvmbf16. + {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 8}, + {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 8}, + {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 17}, + // Truncate from nxvmf64 to nxvmf16. {ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1}, {ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3}, {ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7}, + // Truncate from nxvmf64 to nxvmbf16. + {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 9}, + {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 19}, + {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 39}, + // Truncate from nxvmf64 to nxvmf32. {ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1}, {ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3}, @@ -3523,11 +3546,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1}, {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2}, + // Extend from nxvmbf16 to nxvmf32. + {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1}, // lsl + {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1}, // lsl + {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4}, // unpck+unpck+lsl+lsl + // Extend from nxvmf16 to nxvmf64. {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1}, {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2}, {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4}, + // Extend from nxvmbf16 to nxvmf64. + {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2}, // lsl+fcvt + {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6}, // 2*unpck+2*lsl+2*fcvt + {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14}, // 6*unpck+4*lsl+4*fcvt + // Extend from nxvmf32 to nxvmf64. {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1}, {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2}, diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll index 805b3713830ab..4ad0e3fb8b4ca 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll @@ -34,12 +34,12 @@ define void @sve_fpext() { define void @sve_fpext_bf16() { ; CHECK-LABEL: 'sve_fpext_bf16' -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f32 = fpext undef to -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f32 = fpext undef to -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext undef to -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_to_f32 = fpext undef to +; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_to_f32 = fpext undef to +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext undef to +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext undef to ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %nxv2_f16_to_f32 = fpext undef to diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll index bb31ebfbed4ba..73556d7ee1d41 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -passes="print" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOBF16 -; RUN: opt -passes="print" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve,+bf16 -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BF16 +; RUN: opt -passes="print" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE +; RUN: opt -passes="print" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve2 -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE2 +; RUN: opt -passes="print" -cost-kind=all 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve2,+bf16 -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BF16 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" @@ -34,14 +35,32 @@ define void @sve_fptruncs() { } define void @sve_fptruncs_bf16() { -; CHECK-LABEL: 'sve_fptruncs_bf16' -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-SVE-LABEL: 'sve_fptruncs_bf16' +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc undef to +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc undef to +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc undef to +; CHECK-SVE-NEXT: Cost Model: Found costs of Invalid for: %nxv2_f16_from_f64 = fptrunc undef to +; CHECK-SVE-NEXT: Cost Model: Found costs of Invalid for: %nxv4_f16_from_f64 = fptrunc undef to +; CHECK-SVE-NEXT: Cost Model: Found costs of Invalid for: %nxv8_f16_from_f64 = fptrunc undef to +; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-SVE2-LABEL: 'sve_fptruncs_bf16' +; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc undef to +; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc undef to +; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc undef to +; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc undef to +; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc undef to +; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc undef to +; CHECK-SVE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-BF16-LABEL: 'sve_fptruncs_bf16' +; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_from_f32 = fptrunc undef to +; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_from_f32 = fptrunc undef to +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc undef to +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc undef to +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc undef to +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc undef to +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %nxv2_f16_from_f32 = fptrunc undef to %nxv4_f16_from_f32 = fptrunc undef to @@ -53,6 +72,3 @@ define void @sve_fptruncs_bf16() { ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-BF16: {{.*}} -; CHECK-NOBF16: {{.*}}