Skip to content

Commit 70127bb

Browse files
esukhovigcbot
authored andcommitted
IGCVectorizer i32 instructions support enabled
IGCVectorizer supports i32 instructions ADD MUL SUB; New checks are implemented to not interfere with RT patterns; Emission is restrictive
1 parent ed150a7 commit 70127bb

File tree

8 files changed

+833
-12
lines changed

8 files changed

+833
-12
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3990,6 +3990,13 @@ void EmitPass::emitVideoAnalyticGRF(llvm::GenIntrinsicInst *inst, const DWORD re
39903990
m_encoder->Push();
39913991
}
39923992

3993+
static bool isVectorTypeAllowed(Value *I) {
3994+
IGCLLVM::FixedVectorType *VecType = llvm::dyn_cast<IGCLLVM::FixedVectorType>(I->getType());
3995+
if (!VecType) return false;
3996+
auto ElType = VecType->getElementType();
3997+
return ElType->isIntegerTy(32) || ElType->isFloatTy();
3998+
}
3999+
39934000
void EmitPass::EmitGenericPointersCmp(llvm::Instruction *inst, const SSource source[2], const DstModifier &modifier,
39944001
uint8_t clearTagMask) {
39954002
Cmp(cast<CmpInst>(inst)->getPredicate(), source, modifier, clearTagMask);
@@ -4026,13 +4033,13 @@ void EmitPass::BinaryUnary(llvm::Instruction *inst, const SSource source[2], con
40264033
Xor(source, modifier);
40274034
break;
40284035
case Instruction::Mul:
4029-
Mul(source, modifier);
4030-
break;
40314036
case Instruction::FMul:
40324037
Mul(source, modifier);
40334038
break;
40344039
case Instruction::FAdd:
4035-
Add(source, modifier);
4040+
case Instruction::Add:
4041+
if (inst->getType()->isVectorTy()) Add(source, modifier);
4042+
else EmitSimpleAlu(inst, source, modifier);
40364043
break;
40374044
case Instruction::Call:
40384045
EmitAluIntrinsic(cast<CallInst>(inst), source, modifier);
@@ -4231,6 +4238,7 @@ static unsigned getVectorSize(Value *I) {
42314238
return NumElements;
42324239
}
42334240

4241+
42344242
void EmitPass::FPTrunc(const SSource sources[2], const DstModifier &modifier) {
42354243

42364244
CVariable *src[2];
@@ -4275,13 +4283,15 @@ void EmitPass::Sub(const SSource sources[2], const DstModifier &modifier) {
42754283
}
42764284
e_modifier mod1 = CombineModifier(EMOD_NEG, sources[1].mod);
42774285

4278-
if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy() &&
4279-
sources[1].value->getType()->isVectorTy()) {
4286+
bool IsPossible = isVectorEmissionPossible(sources, src);
4287+
4288+
if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && IsPossible) {
42804289

42814290
IGC_ASSERT_EXIT_MESSAGE(m_encoder->GetSimdSize() == lanesToSIMDMode(16),
42824291
"As of now Vector Emission is only supported for SIMD16");
42834292

42844293
unsigned VectorSize = getVectorSize(sources[0].value);
4294+
IGC_ASSERT_MESSAGE(VectorSize == getVectorSize(sources[1].value), "operands must have same vector types");
42854295

42864296
bool AllUniform = src[0]->IsUniform() && src[1]->IsUniform() && m_destination->IsUniform();
42874297

@@ -4329,23 +4339,50 @@ void EmitPass::Sub(const SSource sources[2], const DstModifier &modifier) {
43294339
m_encoder->Push();
43304340
}
43314341

4342+
bool EmitPass::isVectorEmissionPossible(const SSource sources[2], CVariable *src[2]) {
4343+
4344+
// any non ordinary state of source modifiers
4345+
// can be a sign of emission of a specific pattern,
4346+
// emission can interfere with
4347+
if (sources[0].elementOffset != 0) return false;
4348+
if (sources[1].elementOffset != 0) return false;
4349+
if (sources[0].SIMDOffset != 0) return false;
4350+
if (sources[1].SIMDOffset != 0) return false;
4351+
4352+
bool AllowedVectorTypes = isVectorTypeAllowed(sources[0].value) && isVectorTypeAllowed(sources[1].value);
4353+
if (!AllowedVectorTypes) return false;
4354+
4355+
bool DestSizeEquals = false;
4356+
// it's possible have a case where two uniform vectors are
4357+
// deliberately not processed uniformly,
4358+
// in this case we will have virtual dest register with not
4359+
// the same size as operands, so we should default to
4360+
// scalar emission
4361+
DestSizeEquals = m_destination->GetNumberElement() == src[0]->GetNumberElement();
4362+
DestSizeEquals |= m_destination->GetNumberElement() == src[1]->GetNumberElement();
4363+
return DestSizeEquals;
4364+
}
4365+
43324366
void EmitPass::Add(const SSource sources[2], const DstModifier &modifier) {
43334367
CVariable *src[2];
43344368
for (int i = 0; i < 2; ++i) {
43354369
src[i] = GetSrcVariable(sources[i]);
43364370
}
43374371

4338-
if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy() &&
4339-
sources[1].value->getType()->isVectorTy()) {
4372+
bool IsPossible = isVectorEmissionPossible(sources, src);
4373+
4374+
if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && IsPossible) {
43404375

43414376
IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
43424377
"As of now Vector Emission is only supported for SIMD16");
4343-
unsigned VectorSize = getVectorSize(sources[0].value);
43444378

43454379
bool AllUniform = src[0]->IsUniform() && src[1]->IsUniform() && m_destination->IsUniform();
43464380
// cannot emit 16 SIMD if SIMD SIZE is set to 8, but can emit 4
43474381
// simple ALU instructions has the same possible width as SIMD, "math"
43484382
// pipeline instructions has reduced width
4383+
unsigned VectorSize = getVectorSize(sources[0].value);
4384+
IGC_ASSERT_MESSAGE(VectorSize == getVectorSize(sources[1].value), "operands must have same vector types");
4385+
43494386
bool CanEmitThisSize = VectorSize <= numLanes(m_currShader->m_SIMDSize);
43504387

43514388
if (IGC_IS_FLAG_ENABLED(VectorizerUniformValueVectorizationEnabled) && AllUniform && CanEmitThisSize) {
@@ -4391,13 +4428,14 @@ void EmitPass::Mul(const SSource sources[2], const DstModifier &modifier) {
43914428
}
43924429

43934430
unsigned SIMDSize = numLanes(m_currShader->m_SIMDSize);
4431+
bool IsPossible = isVectorEmissionPossible(sources, src);
43944432

4395-
if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy() &&
4396-
sources[1].value->getType()->isVectorTy()) {
4433+
if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && IsPossible) {
43974434

43984435
IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
43994436
"As of now Vector Emission is only supported for SIMD16");
44004437
unsigned VectorSize = getVectorSize(sources[0].value);
4438+
IGC_ASSERT_MESSAGE(VectorSize == getVectorSize(sources[1].value), "operands must have same vector types");
44014439

44024440
bool AllUniform = src[0]->IsUniform() && src[1]->IsUniform() && m_destination->IsUniform();
44034441
// cannot emit 16 SIMD if SIMD SIZE is set to 8, but can emit 4
@@ -4685,8 +4723,12 @@ void EmitPass::VectorMad(const SSource sources[3], const DstModifier &modifier)
46854723
}
46864724

46874725
void EmitPass::FDiv(const SSource sources[2], const DstModifier &modifier) {
4688-
if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy() &&
4689-
sources[1].value->getType()->isVectorTy()) {
4726+
4727+
CVariable *src[2];
4728+
for (int i = 0; i < 2; ++i) src[i] = GetSrcVariable(sources[i]);
4729+
bool IsPossible = isVectorEmissionPossible(sources, src);
4730+
4731+
if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && IsPossible) {
46904732

46914733
IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
46924734
"As of now Vector Emission is only supported for SIMD16");

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ class EmitPass : public llvm::FunctionPass {
8484

8585
void CreateKernelShaderMap(CodeGenContext *ctx, IGC::IGCMD::MetaDataUtils *pMdUtils, llvm::Function &F);
8686

87+
88+
bool isVectorEmissionPossible(const SSource sources[2], CVariable *src[2]);
89+
8790
void Frc(const SSource &source, const DstModifier &modifier);
8891
void Floor(const SSource &source, const DstModifier &modifier);
8992
void Mad(const SSource sources[3], const DstModifier &modifier);

IGC/Compiler/CISACodeGen/IGCVectorizer.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,9 @@ bool isBinarySafe(Instruction *I) {
237237
Result |= (OpCode == Instruction::FMul && IGC_GET_FLAG_VALUE(VectorizerAllowFMUL));
238238
Result |= (OpCode == Instruction::FAdd && IGC_GET_FLAG_VALUE(VectorizerAllowFADD));
239239
Result |= (OpCode == Instruction::FSub && IGC_GET_FLAG_VALUE(VectorizerAllowFSUB));
240+
Result |= (OpCode == Instruction::Mul && IGC_GET_FLAG_VALUE(VectorizerAllowMUL));
241+
Result |= (OpCode == Instruction::Add && IGC_GET_FLAG_VALUE(VectorizerAllowADD));
242+
Result |= (OpCode == Instruction::Sub && IGC_GET_FLAG_VALUE(VectorizerAllowSUB));
240243
Result |= isFDivSafe(I);
241244
return Result;
242245
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2025 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; REQUIRES: regkeys
10+
; RUN: igc_opt -S -dce -platformbmg -rev-id B -has-emulated-64-bit-insts -igc-emit-visa --regkey=DumpVISAASMToConsole=1 -simd-mode 16 < %s | FileCheck %s
11+
12+
; CHECK: .decl vectorized_phi v_type=G type=d num_elts=128 align=wordx32
13+
; CHECK: .decl vector v_type=G type=d num_elts=8 align=dword
14+
15+
; CHECK: mul (M1, 16) vectorized_phi(0,0)<1> vector(0,0)<0;1,0> vectorized_phi(0,0)<1;1,0>
16+
; CHECK: mul (M1, 16) vectorized_phi(1,0)<1> vector(0,1)<0;1,0> vectorized_phi(1,0)<1;1,0>
17+
; CHECK: mul (M1, 16) vectorized_phi(2,0)<1> vector(0,2)<0;1,0> vectorized_phi(2,0)<1;1,0>
18+
; CHECK: mul (M1, 16) vectorized_phi(3,0)<1> vector(0,3)<0;1,0> vectorized_phi(3,0)<1;1,0>
19+
; CHECK: mul (M1, 16) vectorized_phi(4,0)<1> vector(0,4)<0;1,0> vectorized_phi(4,0)<1;1,0>
20+
; CHECK: mul (M1, 16) vectorized_phi(5,0)<1> vector(0,5)<0;1,0> vectorized_phi(5,0)<1;1,0>
21+
; CHECK: mul (M1, 16) vectorized_phi(6,0)<1> vector(0,6)<0;1,0> vectorized_phi(6,0)<1;1,0>
22+
; CHECK: mul (M1, 16) vectorized_phi(7,0)<1> vector(0,7)<0;1,0> vectorized_phi(7,0)<1;1,0>
23+
24+
25+
define spir_kernel void @_foo() {
26+
br label %._crit_edge
27+
28+
._crit_edge: ; preds = %._crit_edge.._crit_edge_crit_edge, %6
29+
%1 = phi float [ 0.000000e+00, %0 ], [ %1, %._crit_edge.._crit_edge_crit_edge ]
30+
%vectorized_phi = phi <8 x i32> [ zeroinitializer, %0 ], [ %2, %._crit_edge.._crit_edge_crit_edge ]
31+
%vector = insertelement <8 x i32> zeroinitializer, i32 0, i64 0
32+
%vectorized_binary = mul <8 x i32> %vector, %vectorized_phi
33+
%2 = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32> %vectorized_binary, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
34+
br label %._crit_edge.._crit_edge_crit_edge
35+
36+
._crit_edge.._crit_edge_crit_edge: ; preds = %._crit_edge
37+
br label %._crit_edge
38+
}
39+
40+
declare <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)
41+
42+
!igc.functions = !{!0}
43+
44+
!0 = !{void ()* @_foo, !1}
45+
!1 = !{!2, !4, !3}
46+
!2 = !{!"function_type", i32 0}
47+
!3 = !{!"sub_group_size", i32 16}
48+
!4 = !{!"max_reg_pressure", i32 185}

0 commit comments

Comments
 (0)