Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit e3ef547

Browse files
author
Jina Nahias
committed
[X86][AVX512] Adding a pattern for broadcastm intrinsic.
Differential Revision: https://reviews.llvm.org/D38312 Change-Id: I6551fb13879e098aed74de410e29815cf37d9ab5 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316890 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 5d03e9f commit e3ef547

File tree

2 files changed

+73
-57
lines changed

2 files changed

+73
-57
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6687,6 +6687,44 @@ static bool isUseOfShuffle(SDNode *N) {
66876687
return false;
66886688
}
66896689

6690+
// Check if the current node of build vector is a zero extended vector.
6691+
// If so, return the value extended.
6692+
// For example: (0,0,0,a,0,0,0,a,0,0,0,a,0,0,0,a) returns a.
6693+
// NumElt - return the number of zero extended identical values.
6694+
// EltType - return the type of the value include the zero extend.
6695+
static SDValue isSplatZeroExtended(const BuildVectorSDNode *Op,
6696+
unsigned &NumElt, MVT &EltType) {
6697+
SDValue ExtValue = Op->getOperand(0);
6698+
unsigned NumElts = Op->getNumOperands();
6699+
unsigned Delta = NumElts;
6700+
6701+
for (unsigned i = 1; i < NumElts; i++) {
6702+
if (Op->getOperand(i) == ExtValue) {
6703+
Delta = i;
6704+
break;
6705+
}
6706+
if (!(Op->getOperand(i).isUndef() || isNullConstant(Op->getOperand(i))))
6707+
return SDValue();
6708+
}
6709+
if (!isPowerOf2_32(Delta) || Delta == 1)
6710+
return SDValue();
6711+
6712+
for (unsigned i = Delta; i < NumElts; i++) {
6713+
if (i % Delta == 0) {
6714+
if (Op->getOperand(i) != ExtValue)
6715+
return SDValue();
6716+
} else if (!(isNullConstant(Op->getOperand(i)) ||
6717+
Op->getOperand(i).isUndef()))
6718+
return SDValue();
6719+
}
6720+
unsigned EltSize =
6721+
Op->getSimpleValueType(0).getScalarSizeInBits();
6722+
unsigned ExtVTSize = EltSize * Delta;
6723+
EltType = MVT::getIntegerVT(ExtVTSize);
6724+
NumElt = NumElts / Delta;
6725+
return ExtValue;
6726+
}
6727+
66906728
/// Attempt to use the vbroadcast instruction to generate a splat value
66916729
/// from a splat BUILD_VECTOR which uses:
66926730
/// a. A single scalar load, or a constant.
@@ -6709,6 +6747,32 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
67096747
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
67106748
"Unsupported vector type for broadcast.");
67116749

6750+
// Attempt to use VBROADCASTM
6751+
// From this paterrn:
6752+
// a. t0 = (zext_i64 (bitcast_i8 v2i1 X))
6753+
// b. t1 = (build_vector t0 t0)
6754+
//
6755+
// Create (VBROADCASTM v2i1 X)
6756+
if (Subtarget.hasCDI() && (VT.is512BitVector() || Subtarget.hasVLX())) {
6757+
MVT EltType;
6758+
unsigned NumElts;
6759+
SDValue ZeroExtended = isSplatZeroExtended(BVOp, NumElts, EltType);
6760+
if (ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) {
6761+
SDValue BOperand = ZeroExtended.getOperand(0);
6762+
if (BOperand.getSimpleValueType().getVectorElementType() == MVT::i1) {
6763+
if ((EltType == MVT::i64 &&
6764+
VT.getVectorElementType() == MVT::i8) || // for broadcastmb2q
6765+
(EltType == MVT::i32 &&
6766+
VT.getVectorElementType() == MVT::i16)) { // for broadcastmw2d
6767+
SDValue Brdcst =
6768+
DAG.getNode(X86ISD::VBROADCASTM, dl,
6769+
MVT::getVectorVT(EltType, NumElts), BOperand);
6770+
return DAG.getBitcast(VT, Brdcst);
6771+
}
6772+
}
6773+
}
6774+
}
6775+
67126776
BitVector UndefElements;
67136777
SDValue Ld = BVOp->getSplatValue(&UndefElements);
67146778

test/CodeGen/X86/broadcastm-lowering.ll

Lines changed: 9 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,7 @@ define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) {
2020
; AVX512VLCDBW-LABEL: test_mm_epi64:
2121
; AVX512VLCDBW: # BB#0: # %entry
2222
; AVX512VLCDBW-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
23-
; AVX512VLCDBW-NEXT: kmovd %k0, %eax
24-
; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
25-
; AVX512VLCDBW-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
26-
; AVX512VLCDBW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
23+
; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %xmm0
2724
; AVX512VLCDBW-NEXT: retq
2825
;
2926
; X86-AVX512VLCDBW-LABEL: test_mm_epi64:
@@ -62,23 +59,13 @@ define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) {
6259
; AVX512VLCDBW-LABEL: test_mm_epi32:
6360
; AVX512VLCDBW: # BB#0: # %entry
6461
; AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
65-
; AVX512VLCDBW-NEXT: kmovd %k0, %eax
66-
; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
67-
; AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
68-
; AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
69-
; AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
70-
; AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
62+
; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %xmm0
7163
; AVX512VLCDBW-NEXT: retq
7264
;
7365
; X86-AVX512VLCDBW-LABEL: test_mm_epi32:
7466
; X86-AVX512VLCDBW: # BB#0: # %entry
7567
; X86-AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
76-
; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
77-
; X86-AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
78-
; X86-AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
79-
; X86-AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
80-
; X86-AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
81-
; X86-AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
68+
; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %xmm0
8269
; X86-AVX512VLCDBW-NEXT: retl
8370
entry:
8471
%0 = icmp eq <16 x i8> %a, %b
@@ -100,27 +87,13 @@ define <16 x i32> @test_mm512_epi32(<16 x i32> %a, <16 x i32> %b) {
10087
; AVX512VLCDBW-LABEL: test_mm512_epi32:
10188
; AVX512VLCDBW: # BB#0: # %entry
10289
; AVX512VLCDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
103-
; AVX512VLCDBW-NEXT: kmovd %k0, %eax
104-
; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
105-
; AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
106-
; AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
107-
; AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
108-
; AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
109-
; AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
110-
; AVX512VLCDBW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
90+
; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %zmm0
11191
; AVX512VLCDBW-NEXT: retq
11292
;
11393
; X86-AVX512VLCDBW-LABEL: test_mm512_epi32:
11494
; X86-AVX512VLCDBW: # BB#0: # %entry
11595
; X86-AVX512VLCDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
116-
; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
117-
; X86-AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
118-
; X86-AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
119-
; X86-AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
120-
; X86-AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
121-
; X86-AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
122-
; X86-AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
123-
; X86-AVX512VLCDBW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
96+
; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %zmm0
12497
; X86-AVX512VLCDBW-NEXT: retl
12598
entry:
12699
%0 = icmp eq <16 x i32> %a, %b
@@ -145,12 +118,7 @@ define <8 x i64> @test_mm512_epi64(<8 x i32> %a, <8 x i32> %b) {
145118
; AVX512VLCDBW-LABEL: test_mm512_epi64:
146119
; AVX512VLCDBW: # BB#0: # %entry
147120
; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
148-
; AVX512VLCDBW-NEXT: kmovd %k0, %eax
149-
; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
150-
; AVX512VLCDBW-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
151-
; AVX512VLCDBW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
152-
; AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
153-
; AVX512VLCDBW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
121+
; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %zmm0
154122
; AVX512VLCDBW-NEXT: retq
155123
;
156124
; X86-AVX512VLCDBW-LABEL: test_mm512_epi64:
@@ -188,11 +156,7 @@ define <4 x i64> @test_mm256_epi64(<8 x i32> %a, <8 x i32> %b) {
188156
; AVX512VLCDBW-LABEL: test_mm256_epi64:
189157
; AVX512VLCDBW: # BB#0: # %entry
190158
; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
191-
; AVX512VLCDBW-NEXT: kmovd %k0, %eax
192-
; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
193-
; AVX512VLCDBW-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
194-
; AVX512VLCDBW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
195-
; AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
159+
; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %ymm0
196160
; AVX512VLCDBW-NEXT: retq
197161
;
198162
; X86-AVX512VLCDBW-LABEL: test_mm256_epi64:
@@ -232,25 +196,13 @@ define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
232196
; AVX512VLCDBW-LABEL: test_mm256_epi32:
233197
; AVX512VLCDBW: # BB#0: # %entry
234198
; AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
235-
; AVX512VLCDBW-NEXT: kmovd %k0, %eax
236-
; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
237-
; AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
238-
; AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
239-
; AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
240-
; AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
241-
; AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
199+
; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %ymm0
242200
; AVX512VLCDBW-NEXT: retq
243201
;
244202
; X86-AVX512VLCDBW-LABEL: test_mm256_epi32:
245203
; X86-AVX512VLCDBW: # BB#0: # %entry
246204
; X86-AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
247-
; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
248-
; X86-AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
249-
; X86-AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
250-
; X86-AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
251-
; X86-AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
252-
; X86-AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
253-
; X86-AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
205+
; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %ymm0
254206
; X86-AVX512VLCDBW-NEXT: retl
255207
entry:
256208
%0 = icmp eq <16 x i16> %a, %b

0 commit comments

Comments
 (0)