@@ -2681,6 +2681,181 @@ void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) {
2681
2681
R->dissolveToCFGLoop ();
2682
2682
}
2683
2683
2684
+ static std::pair<VPValue *, VPValue *> matchStridedStart (VPValue *CurIndex) {
2685
+ if (auto *WidenIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(CurIndex))
2686
+ return {WidenIV, WidenIV->getStepValue ()};
2687
+
2688
+ auto *WidenR = dyn_cast<VPWidenRecipe>(CurIndex);
2689
+ if (!WidenR || !CurIndex->getUnderlyingValue ())
2690
+ return {nullptr , nullptr };
2691
+
2692
+ unsigned Opcode = WidenR->getOpcode ();
2693
+ // TODO: Support Instruction::Add and Instruction::Or.
2694
+ if (Opcode != Instruction::Shl && Opcode != Instruction::Mul)
2695
+ return {nullptr , nullptr };
2696
+
2697
+ // Match the pattern binop(variant, invariant), or binop(invariant, variant)
2698
+ // if the binary operator is commutative.
2699
+ bool IsLHSUniform = vputils::isSingleScalar (WidenR->getOperand (0 ));
2700
+ if (IsLHSUniform == vputils::isSingleScalar (WidenR->getOperand (1 )) ||
2701
+ (IsLHSUniform && !Instruction::isCommutative (Opcode)))
2702
+ return {nullptr , nullptr };
2703
+ unsigned VarIdx = IsLHSUniform ? 1 : 0 ;
2704
+
2705
+ auto [Start, Stride] = matchStridedStart (WidenR->getOperand (VarIdx));
2706
+ if (!Start)
2707
+ return {nullptr , nullptr };
2708
+
2709
+ SmallVector<VPValue *> StartOps (WidenR->operands ());
2710
+ StartOps[VarIdx] = Start;
2711
+ auto *StartR = new VPReplicateRecipe (WidenR->getUnderlyingInstr (), StartOps,
2712
+ /* IsUniform*/ true );
2713
+ StartR->insertBefore (WidenR);
2714
+
2715
+ unsigned InvIdx = VarIdx == 0 ? 1 : 0 ;
2716
+ auto *StrideR =
2717
+ new VPInstruction (Opcode, {Stride, WidenR->getOperand (InvIdx)});
2718
+ StrideR->insertBefore (WidenR);
2719
+ return {StartR, StrideR};
2720
+ }
2721
+
2722
+ static std::pair<VPValue *, VPValue *>
2723
+ determineBaseAndStride (VPWidenGEPRecipe *WidenGEP) {
2724
+ // Not considered strided if both base pointer and all indices are
2725
+ // loop-invariant.
2726
+ if (WidenGEP->areAllOperandsInvariant ())
2727
+ return {nullptr , nullptr };
2728
+
2729
+ // TODO: Check if the base pointer is strided.
2730
+ if (!WidenGEP->isPointerLoopInvariant ())
2731
+ return {nullptr , nullptr };
2732
+
2733
+ // Find the only one variant index.
2734
+ unsigned VarOp = 0 ;
2735
+ for (unsigned I = 1 , E = WidenGEP->getNumOperands (); I < E; I++) {
2736
+ if (WidenGEP->isIndexLoopInvariant (I - 1 ))
2737
+ continue ;
2738
+
2739
+ if (VarOp != 0 )
2740
+ return {nullptr , nullptr };
2741
+ VarOp = I;
2742
+ }
2743
+
2744
+ if (VarOp == 0 )
2745
+ return {nullptr , nullptr };
2746
+
2747
+ // TODO: Support cases with a variant index in the middle.
2748
+ if (VarOp != WidenGEP->getNumOperands () - 1 )
2749
+ return {nullptr , nullptr };
2750
+
2751
+ VPValue *VarIndex = WidenGEP->getOperand (VarOp);
2752
+ auto [Start, Stride] = matchStridedStart (VarIndex);
2753
+ if (!Start)
2754
+ return {nullptr , nullptr };
2755
+
2756
+ SmallVector<VPValue *> Ops (WidenGEP->operands ());
2757
+ Ops[VarOp] = Start;
2758
+ auto *BasePtr = new VPReplicateRecipe (WidenGEP->getUnderlyingInstr (), Ops,
2759
+ /* IsUniform*/ true );
2760
+ BasePtr->insertBefore (WidenGEP);
2761
+
2762
+ return {BasePtr, Stride};
2763
+ }
2764
+
2765
+ void VPlanTransforms::convertToStridedAccesses (VPlan &Plan, VPCostContext &Ctx,
2766
+ VFRange &Range) {
2767
+ if (Plan.hasScalarVFOnly ())
2768
+ return ;
2769
+
2770
+ DenseMap<VPWidenGEPRecipe *, std::pair<VPValue *, VPValue *>> StrideCache;
2771
+ SmallVector<VPRecipeBase *> ToErase;
2772
+ SmallPtrSet<VPValue *, 4 > PossiblyDead;
2773
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
2774
+ vp_depth_first_shallow (Plan.getVectorLoopRegion ()->getEntry ()))) {
2775
+ for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
2776
+ auto *MemR = dyn_cast<VPWidenMemoryRecipe>(&R);
2777
+ // TODO: support strided store
2778
+ // TODO: support reverse access
2779
+ // TODO: transform interleave access into multiple strided accesses
2780
+ if (!MemR || !isa<VPWidenLoadRecipe>(MemR) || MemR->isConsecutive ())
2781
+ continue ;
2782
+
2783
+ auto *Ptr = dyn_cast<VPWidenGEPRecipe>(MemR->getAddr ());
2784
+ if (!Ptr)
2785
+ continue ;
2786
+
2787
+ // Memory cost model requires the pointer operand of memory access
2788
+ // instruction.
2789
+ Value *PtrUV = Ptr->getUnderlyingValue ();
2790
+ if (!PtrUV)
2791
+ continue ;
2792
+
2793
+ // Try to get base and stride here.
2794
+ VPValue *BasePtr, *Stride;
2795
+ auto It = StrideCache.find (Ptr);
2796
+ if (It != StrideCache.end ())
2797
+ std::tie (BasePtr, Stride) = It->second ;
2798
+ else
2799
+ std::tie (BasePtr, Stride) = StrideCache[Ptr] =
2800
+ determineBaseAndStride (Ptr);
2801
+
2802
+ // Skip if the memory acces is not a strided accesses.
2803
+ if (!BasePtr) {
2804
+ assert (!Stride);
2805
+ continue ;
2806
+ }
2807
+ assert (Stride);
2808
+
2809
+ Instruction &Ingredient = MemR->getIngredient ();
2810
+ Type *ElementTy = getLoadStoreType (&Ingredient);
2811
+
2812
+ auto IsProfitable = [&](ElementCount VF) -> bool {
2813
+ Type *DataTy = toVectorTy (ElementTy, VF);
2814
+ const Align Alignment = getLoadStoreAlignment (&Ingredient);
2815
+ if (!Ctx.TTI .isLegalStridedLoadStore (DataTy, Alignment))
2816
+ return false ;
2817
+ const InstructionCost CurrentCost = MemR->computeCost (VF, Ctx);
2818
+ const InstructionCost StridedLoadStoreCost =
2819
+ Ctx.TTI .getStridedMemoryOpCost (Instruction::Load, DataTy, PtrUV,
2820
+ MemR->isMasked (), Alignment,
2821
+ Ctx.CostKind , &Ingredient);
2822
+ return StridedLoadStoreCost < CurrentCost;
2823
+ };
2824
+
2825
+ if (!LoopVectorizationPlanner::getDecisionAndClampRange (IsProfitable,
2826
+ Range)) {
2827
+ PossiblyDead.insert (BasePtr);
2828
+ PossiblyDead.insert (Stride);
2829
+ continue ;
2830
+ }
2831
+ PossiblyDead.insert (Ptr);
2832
+
2833
+ // Create a new vector pointer for strided access.
2834
+ auto *GEP = dyn_cast<GetElementPtrInst>(PtrUV->stripPointerCasts ());
2835
+ auto *NewPtr = new VPVectorPointerRecipe (BasePtr, ElementTy, Stride,
2836
+ GEP ? GEP->getNoWrapFlags ()
2837
+ : GEPNoWrapFlags::none (),
2838
+ Ptr->getDebugLoc ());
2839
+ NewPtr->insertBefore (MemR);
2840
+
2841
+ auto *LoadR = cast<VPWidenLoadRecipe>(MemR);
2842
+ auto *StridedLoad = new VPWidenStridedLoadRecipe (
2843
+ *cast<LoadInst>(&Ingredient), NewPtr, Stride, &Plan.getVF (),
2844
+ LoadR->getMask (), *LoadR, LoadR->getDebugLoc ());
2845
+ StridedLoad->insertBefore (LoadR);
2846
+ LoadR->replaceAllUsesWith (StridedLoad);
2847
+
2848
+ ToErase.push_back (LoadR);
2849
+ }
2850
+ }
2851
+
2852
+ // Clean up dead memory access recipes, and unused base address and stride.
2853
+ for (auto *R : ToErase)
2854
+ R->eraseFromParent ();
2855
+ for (auto *V : PossiblyDead)
2856
+ recursivelyDeleteDeadRecipes (V);
2857
+ }
2858
+
2684
2859
void VPlanTransforms::convertToConcreteRecipes (VPlan &Plan,
2685
2860
Type &CanonicalIVTy) {
2686
2861
using namespace llvm ::VPlanPatternMatch;
0 commit comments