intel
diff --git a/‎IGC/Compiler/CISACodeGen/EmitVISAPass.cpp‎
Lines changed: 54 additions & 12 deletions b/‎IGC/Compiler/CISACodeGen/EmitVISAPass.cpp‎
Lines changed: 54 additions & 12 deletions
diff --git a/‎IGC/Compiler/CISACodeGen/EmitVISAPass.hpp‎
Lines changed: 3 additions & 0 deletions b/‎IGC/Compiler/CISACodeGen/EmitVISAPass.hpp‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎IGC/Compiler/CISACodeGen/IGCVectorizer.cpp‎
Lines changed: 3 additions & 0 deletions b/‎IGC/Compiler/CISACodeGen/IGCVectorizer.cpp‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎IGC/Compiler/tests/EmitVISAPass/vectorizer-vector-emission-i32-mul.ll‎
Lines changed: 48 additions & 0 deletions b/‎IGC/Compiler/tests/EmitVISAPass/vectorizer-vector-emission-i32-mul.ll‎
Lines changed: 48 additions & 0 deletions
@@ -3990,6 +3990,13 @@ void EmitPass::emitVideoAnalyticGRF(llvm::GenIntrinsicInst *inst, const DWORD re
   m_encoder->Push();
 }
 
+static bool isVectorTypeAllowed(Value *I) {
+  IGCLLVM::FixedVectorType *VecType = llvm::dyn_cast<IGCLLVM::FixedVectorType>(I->getType());
+  if (!VecType) return false;
+  auto ElType = VecType->getElementType();
+  return ElType->isIntegerTy(32) || ElType->isFloatTy();
+}
+
 void EmitPass::EmitGenericPointersCmp(llvm::Instruction *inst, const SSource source[2], const DstModifier &modifier,
                                       uint8_t clearTagMask) {
   Cmp(cast<CmpInst>(inst)->getPredicate(), source, modifier, clearTagMask);
@@ -4026,13 +4033,13 @@ void EmitPass::BinaryUnary(llvm::Instruction *inst, const SSource source[2], con
     Xor(source, modifier);
     break;
   case Instruction::Mul:
-    Mul(source, modifier);
-    break;
   case Instruction::FMul:
     Mul(source, modifier);
     break;
   case Instruction::FAdd:
-    Add(source, modifier);
+  case Instruction::Add:
+    if (inst->getType()->isVectorTy()) Add(source, modifier);
+    else EmitSimpleAlu(inst, source, modifier);
     break;
   case Instruction::Call:
     EmitAluIntrinsic(cast<CallInst>(inst), source, modifier);
@@ -4231,6 +4238,7 @@ static unsigned getVectorSize(Value *I) {
   return NumElements;
 }
 
+
 void EmitPass::FPTrunc(const SSource sources[2], const DstModifier &modifier) {
 
   CVariable *src[2];
@@ -4275,13 +4283,15 @@ void EmitPass::Sub(const SSource sources[2], const DstModifier &modifier) {
   }
   e_modifier mod1 = CombineModifier(EMOD_NEG, sources[1].mod);
 
-  if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy() &&
-      sources[1].value->getType()->isVectorTy()) {
+  bool IsPossible = isVectorEmissionPossible(sources, src);
+
+  if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && IsPossible) {
 
     IGC_ASSERT_EXIT_MESSAGE(m_encoder->GetSimdSize() == lanesToSIMDMode(16),
                             "As of now Vector Emission is only supported for SIMD16");
 
     unsigned VectorSize = getVectorSize(sources[0].value);
+    IGC_ASSERT_MESSAGE(VectorSize == getVectorSize(sources[1].value), "operands must have same vector types");
 
     bool AllUniform = src[0]->IsUniform() && src[1]->IsUniform() && m_destination->IsUniform();
 
@@ -4329,23 +4339,50 @@ void EmitPass::Sub(const SSource sources[2], const DstModifier &modifier) {
   m_encoder->Push();
 }
 
+bool EmitPass::isVectorEmissionPossible(const SSource sources[2], CVariable *src[2]) {
+
+  // any non ordinary state of source modifiers
+  // can be a sign of emission of a specific pattern,
+  // emission can interfere with
+  if (sources[0].elementOffset != 0) return false;
+  if (sources[1].elementOffset != 0) return false;
+  if (sources[0].SIMDOffset != 0) return false;
+  if (sources[1].SIMDOffset != 0) return false;
+
+  bool AllowedVectorTypes = isVectorTypeAllowed(sources[0].value) && isVectorTypeAllowed(sources[1].value);
+  if (!AllowedVectorTypes) return false;
+
+  bool DestSizeEquals = false;
+  // it's possible have a case where two uniform vectors are
+  // deliberately not processed uniformly,
+  // in this case we will have virtual dest register with not
+  // the same size as operands, so we should default to
+  // scalar emission
+  DestSizeEquals = m_destination->GetNumberElement() == src[0]->GetNumberElement();
+  DestSizeEquals |= m_destination->GetNumberElement() == src[1]->GetNumberElement();
+  return DestSizeEquals;
+}
+
 void EmitPass::Add(const SSource sources[2], const DstModifier &modifier) {
   CVariable *src[2];
   for (int i = 0; i < 2; ++i) {
     src[i] = GetSrcVariable(sources[i]);
   }
 
-  if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy() &&
-      sources[1].value->getType()->isVectorTy()) {
+  bool IsPossible = isVectorEmissionPossible(sources, src);
+
+  if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && IsPossible) {
 
     IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
                             "As of now Vector Emission is only supported for SIMD16");
-    unsigned VectorSize = getVectorSize(sources[0].value);
 
     bool AllUniform = src[0]->IsUniform() && src[1]->IsUniform() && m_destination->IsUniform();
     // cannot emit 16 SIMD if SIMD SIZE is set to 8, but can emit 4
     // simple ALU instructions has the same possible width as SIMD, "math"
     // pipeline instructions has reduced width
+    unsigned VectorSize = getVectorSize(sources[0].value);
+    IGC_ASSERT_MESSAGE(VectorSize == getVectorSize(sources[1].value), "operands must have same vector types");
+
     bool CanEmitThisSize = VectorSize <= numLanes(m_currShader->m_SIMDSize);
 
     if (IGC_IS_FLAG_ENABLED(VectorizerUniformValueVectorizationEnabled) && AllUniform && CanEmitThisSize) {
@@ -4391,13 +4428,14 @@ void EmitPass::Mul(const SSource sources[2], const DstModifier &modifier) {
   }
 
   unsigned SIMDSize = numLanes(m_currShader->m_SIMDSize);
+  bool IsPossible = isVectorEmissionPossible(sources, src);
 
-  if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy() &&
-      sources[1].value->getType()->isVectorTy()) {
+  if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && IsPossible) {
 
     IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
                             "As of now Vector Emission is only supported for SIMD16");
     unsigned VectorSize = getVectorSize(sources[0].value);
+    IGC_ASSERT_MESSAGE(VectorSize == getVectorSize(sources[1].value), "operands must have same vector types");
 
     bool AllUniform = src[0]->IsUniform() && src[1]->IsUniform() && m_destination->IsUniform();
     // cannot emit 16 SIMD if SIMD SIZE is set to 8, but can emit 4
@@ -4685,8 +4723,12 @@ void EmitPass::VectorMad(const SSource sources[3], const DstModifier &modifier)
 }
 
 void EmitPass::FDiv(const SSource sources[2], const DstModifier &modifier) {
-  if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy() &&
-      sources[1].value->getType()->isVectorTy()) {
+
+  CVariable *src[2];
+  for (int i = 0; i < 2; ++i) src[i] = GetSrcVariable(sources[i]);
+  bool IsPossible = isVectorEmissionPossible(sources, src);
+
+  if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && IsPossible) {
 
     IGC_ASSERT_EXIT_MESSAGE(numLanes(m_encoder->GetSimdSize()) == 16,
                             "As of now Vector Emission is only supported for SIMD16");
 
@@ -84,6 +84,9 @@ class EmitPass : public llvm::FunctionPass {
 
   void CreateKernelShaderMap(CodeGenContext *ctx, IGC::IGCMD::MetaDataUtils *pMdUtils, llvm::Function &F);
 
+
+  bool isVectorEmissionPossible(const SSource sources[2], CVariable *src[2]);
+
   void Frc(const SSource &source, const DstModifier &modifier);
   void Floor(const SSource &source, const DstModifier &modifier);
   void Mad(const SSource sources[3], const DstModifier &modifier);
 
@@ -237,6 +237,9 @@ bool isBinarySafe(Instruction *I) {
   Result |= (OpCode == Instruction::FMul && IGC_GET_FLAG_VALUE(VectorizerAllowFMUL));
   Result |= (OpCode == Instruction::FAdd && IGC_GET_FLAG_VALUE(VectorizerAllowFADD));
   Result |= (OpCode == Instruction::FSub && IGC_GET_FLAG_VALUE(VectorizerAllowFSUB));
+  Result |= (OpCode == Instruction::Mul && IGC_GET_FLAG_VALUE(VectorizerAllowMUL));
+  Result |= (OpCode == Instruction::Add && IGC_GET_FLAG_VALUE(VectorizerAllowADD));
+  Result |= (OpCode == Instruction::Sub && IGC_GET_FLAG_VALUE(VectorizerAllowSUB));
   Result |= isFDivSafe(I);
   return Result;
 }
 
@@ -0,0 +1,48 @@
+;=========================== begin_copyright_notice ============================
+;
+; Copyright (C) 2025 Intel Corporation
+;
+; SPDX-License-Identifier: MIT
+;
+;============================ end_copyright_notice =============================
+
+; REQUIRES: regkeys
+; RUN: igc_opt -S -dce -platformbmg -rev-id B -has-emulated-64-bit-insts -igc-emit-visa --regkey=DumpVISAASMToConsole=1 -simd-mode 16 < %s | FileCheck %s
+
+; CHECK: .decl vectorized_phi v_type=G type=d num_elts=128 align=wordx32
+; CHECK: .decl vector v_type=G type=d num_elts=8 align=dword
+
+; CHECK: mul (M1, 16) vectorized_phi(0,0)<1> vector(0,0)<0;1,0> vectorized_phi(0,0)<1;1,0>
+; CHECK: mul (M1, 16) vectorized_phi(1,0)<1> vector(0,1)<0;1,0> vectorized_phi(1,0)<1;1,0>
+; CHECK: mul (M1, 16) vectorized_phi(2,0)<1> vector(0,2)<0;1,0> vectorized_phi(2,0)<1;1,0>
+; CHECK: mul (M1, 16) vectorized_phi(3,0)<1> vector(0,3)<0;1,0> vectorized_phi(3,0)<1;1,0>
+; CHECK: mul (M1, 16) vectorized_phi(4,0)<1> vector(0,4)<0;1,0> vectorized_phi(4,0)<1;1,0>
+; CHECK: mul (M1, 16) vectorized_phi(5,0)<1> vector(0,5)<0;1,0> vectorized_phi(5,0)<1;1,0>
+; CHECK: mul (M1, 16) vectorized_phi(6,0)<1> vector(0,6)<0;1,0> vectorized_phi(6,0)<1;1,0>
+; CHECK: mul (M1, 16) vectorized_phi(7,0)<1> vector(0,7)<0;1,0> vectorized_phi(7,0)<1;1,0>
+
+
+define spir_kernel void @_foo() {
+  br label %._crit_edge
+
+._crit_edge:                                      ; preds = %._crit_edge.._crit_edge_crit_edge, %6
+  %1 = phi float [ 0.000000e+00, %0 ], [ %1, %._crit_edge.._crit_edge_crit_edge ]
+  %vectorized_phi = phi <8 x i32> [ zeroinitializer, %0 ], [ %2, %._crit_edge.._crit_edge_crit_edge ]
+  %vector = insertelement <8 x i32> zeroinitializer, i32 0, i64 0
+  %vectorized_binary = mul <8 x i32> %vector, %vectorized_phi
+  %2 = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32> %vectorized_binary, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
+  br label %._crit_edge.._crit_edge_crit_edge
+
+._crit_edge.._crit_edge_crit_edge:                ; preds = %._crit_edge
+  br label %._crit_edge
+}
+
+declare <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x i32>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)
+
+!igc.functions = !{!0}
+
+!0 = !{void ()* @_foo, !1}
+!1 = !{!2, !4, !3}
+!2 = !{!"function_type", i32 0}
+!3 = !{!"sub_group_size", i32 16}
+!4 = !{!"max_reg_pressure", i32 185}