From 68dc4a83ba8a2122cce1ca96fe581f0a9d0a4678 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Wed, 23 Jul 2025 17:02:40 -0700 Subject: [PATCH 1/2] [VPlan] Get Addr computation cost with scalar type if it is uniform for gather/scatter. This patch query `getAddressComputationCost()` with scalar type if the address is uniform. This can help the cost for gather/scatter more accurate. In current LV, non consecutive VPWidenMemoryRecipe (gather/scatter) will account the cost of address computation. But there are some cases that the addr is uniform accross lanes, that makes the address can be calculated with scalar type and broadcast. I have a follow optimization that try to converts gather/scatter with uniform memory acces to scalar load/store + broadcast. With this optimization, we can remove this temporary change. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index acd6a97344116..ab52ef8d78f12 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3145,10 +3145,17 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF, // TODO: Using the original IR may not be accurate. // Currently, ARM will use the underlying IR to calculate gather/scatter // instruction cost. - const Value *Ptr = getLoadStorePointerOperand(&Ingredient); - Type *PtrTy = toVectorTy(Ptr->getType(), VF); assert(!Reverse && "Inconsecutive memory access should not have the order."); + + const Value *Ptr = getLoadStorePointerOperand(&Ingredient); + Type *PtrTy = Ptr->getType(); + + // If the address value is uniform across all lane, then the address can be + // calculated with scalar type and broacast. + if (!vputils::isSingleScalar(getAddr())) + PtrTy = toVectorTy(PtrTy, VF); + return Ctx.TTI.getAddressComputationCost(PtrTy, nullptr, nullptr, Ctx.CostKind) + Ctx.TTI.getGatherScatterOpCost(Opcode, Ty, Ptr, IsMasked, Alignment, From f55fa1c70394445f138bfdce04f83ab0797e9701 Mon Sep 17 00:00:00 2001 From: Elvis Wang Date: Tue, 12 Aug 2025 16:51:33 -0700 Subject: [PATCH 2/2] !fixup, address comments. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index ab52ef8d78f12..ae610fb66ae28 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3151,8 +3151,8 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF, const Value *Ptr = getLoadStorePointerOperand(&Ingredient); Type *PtrTy = Ptr->getType(); - // If the address value is uniform across all lane, then the address can be - // calculated with scalar type and broacast. + // If the address value is uniform across all lanes, then the address can be + // calculated with scalar type and broadcast. if (!vputils::isSingleScalar(getAddr())) PtrTy = toVectorTy(PtrTy, VF);