@@ -3698,12 +3698,21 @@ LLVM_Util::mask_as_int(llvm::Value* mask)
36983698 // Convert <4 x i1> -> <4 x i32>
36993699 llvm::Value* w4_int_mask = builder ().CreateSExt (mask,
37003700 type_wide_int ());
3701+
3702+ // Now we will use the horizontal sign extraction intrinsic
3703+ // to build a 32 bit mask value. However the only 256bit
3704+ // version works on floats, so we will cast from int32 to
3705+ // float beforehand
3706+ llvm::Type* w4_float_type = llvm_vector_type (m_llvm_type_float, 4 );
3707+ llvm::Value* w4_float_mask = builder ().CreateBitCast (w4_int_mask,
3708+ w4_float_type);
3709+
37013710 // Now we will use the horizontal sign extraction intrinsic
37023711 // to build a 32 bit mask value.
37033712 llvm::Function* func = llvm::Intrinsic::getDeclaration (
3704- module (), llvm::Intrinsic::x86_sse2_pmovmskb_128 );
3713+ module (), llvm::Intrinsic::x86_sse_movmsk_ps );
37053714
3706- llvm::Value* args[1 ] = { w4_int_mask };
3715+ llvm::Value* args[1 ] = { w4_float_mask };
37073716 llvm::Value* int8_mask;
37083717 int8_mask = builder ().CreateCall (func, toArrayRef (args));
37093718 return int8_mask;
@@ -3727,18 +3736,28 @@ LLVM_Util::mask_as_int(llvm::Value* mask)
37273736 auto w4_int_masks = op_quarter_16x (wide_int_mask);
37283737
37293738 // Now we will use the horizontal sign extraction intrinsic
3730- // to build a 32 bit mask value.
3739+ // to build a 32 bit mask value. However the only 128bit
3740+ // version works on floats, so we will cast from int32 to
3741+ // float beforehand
3742+ llvm::Type* w4_float_type = llvm_vector_type (m_llvm_type_float, 4 );
3743+ std::array<llvm::Value*, 4 > w4_float_masks = {
3744+ { builder ().CreateBitCast (w4_int_masks[0 ], w4_float_type),
3745+ builder ().CreateBitCast (w4_int_masks[1 ], w4_float_type),
3746+ builder ().CreateBitCast (w4_int_masks[2 ], w4_float_type),
3747+ builder ().CreateBitCast (w4_int_masks[3 ], w4_float_type) }
3748+ };
3749+
37313750 llvm::Function* func = llvm::Intrinsic::getDeclaration (
3732- module (), llvm::Intrinsic::x86_sse2_pmovmskb_128 );
3751+ module (), llvm::Intrinsic::x86_sse_movmsk_ps );
37333752
3734- llvm::Value* args[1 ] = { w4_int_masks [0 ] };
3753+ llvm::Value* args[1 ] = { w4_float_masks [0 ] };
37353754 std::array<llvm::Value*, 4 > int4_masks;
37363755 int4_masks[0 ] = builder ().CreateCall (func, toArrayRef (args));
3737- args[0 ] = w4_int_masks [1 ];
3756+ args[0 ] = w4_float_masks [1 ];
37383757 int4_masks[1 ] = builder ().CreateCall (func, toArrayRef (args));
3739- args[0 ] = w4_int_masks [2 ];
3758+ args[0 ] = w4_float_masks [2 ];
37403759 int4_masks[2 ] = builder ().CreateCall (func, toArrayRef (args));
3741- args[0 ] = w4_int_masks [3 ];
3760+ args[0 ] = w4_float_masks [3 ];
37423761 int4_masks[3 ] = builder ().CreateCall (func, toArrayRef (args));
37433762
37443763 llvm::Value* bits12_15 = op_shl (int4_masks[3 ], constant (12 ));
@@ -3759,14 +3778,22 @@ LLVM_Util::mask_as_int(llvm::Value* mask)
37593778 auto w4_int_masks = op_split_8x (wide_int_mask);
37603779
37613780 // Now we will use the horizontal sign extraction intrinsic
3762- // to build a 32 bit mask value.
3781+ // to build a 32 bit mask value. However the only 128bit
3782+ // version works on floats, so we will cast from int32 to
3783+ // float beforehand
3784+ llvm::Type* w4_float_type = llvm_vector_type (m_llvm_type_float, 4 );
3785+ std::array<llvm::Value*, 2 > w4_float_masks = {
3786+ { builder ().CreateBitCast (w4_int_masks[0 ], w4_float_type),
3787+ builder ().CreateBitCast (w4_int_masks[1 ], w4_float_type) }
3788+ };
3789+
37633790 llvm::Function* func = llvm::Intrinsic::getDeclaration (
3764- module (), llvm::Intrinsic::x86_sse2_pmovmskb_128 );
3791+ module (), llvm::Intrinsic::x86_sse_movmsk_ps );
37653792
3766- llvm::Value* args[1 ] = { w4_int_masks [0 ] };
3793+ llvm::Value* args[1 ] = { w4_float_masks [0 ] };
37673794 std::array<llvm::Value*, 2 > int4_masks;
37683795 int4_masks[0 ] = builder ().CreateCall (func, toArrayRef (args));
3769- args[0 ] = w4_int_masks [1 ];
3796+ args[0 ] = w4_float_masks [1 ];
37703797 int4_masks[1 ] = builder ().CreateCall (func, toArrayRef (args));
37713798
37723799 llvm::Value* bits4_7 = op_shl (int4_masks[1 ], constant (4 ));
@@ -3782,12 +3809,20 @@ LLVM_Util::mask_as_int(llvm::Value* mask)
37823809 llvm::Value* w4_int_mask = builder ().CreateSExt (mask,
37833810 type_wide_int ());
37843811
3812+ // Now we will use the horizontal sign extraction intrinsic
3813+ // to build a 32 bit mask value. However the only 256bit
3814+ // version works on floats, so we will cast from int32 to
3815+ // float beforehand
3816+ llvm::Type* w4_float_type = llvm_vector_type (m_llvm_type_float, 4 );
3817+ llvm::Value* w4_float_mask = builder ().CreateBitCast (w4_int_mask,
3818+ w4_float_type);
3819+
37853820 // Now we will use the horizontal sign extraction intrinsic
37863821 // to build a 32 bit mask value.
37873822 llvm::Function* func = llvm::Intrinsic::getDeclaration (
3788- module (), llvm::Intrinsic::x86_sse2_pmovmskb_128 );
3823+ module (), llvm::Intrinsic::x86_sse_movmsk_ps );
37893824
3790- llvm::Value* args[1 ] = { w4_int_mask };
3825+ llvm::Value* args[1 ] = { w4_float_mask };
37913826 llvm::Value* int4_mask = builder ().CreateCall (func,
37923827 toArrayRef (args));
37933828
@@ -3833,12 +3868,20 @@ LLVM_Util::mask4_as_int8(llvm::Value* mask)
38333868 // Convert <4 x i1> -> <4 x i32>
38343869 llvm::Value* w4_int_mask = builder ().CreateSExt (mask, type_wide_int ());
38353870
3871+ // Now we will use the horizontal sign extraction intrinsic
3872+ // to build a 32 bit mask value. However the only 256bit
3873+ // version works on floats, so we will cast from int32 to
3874+ // float beforehand
3875+ llvm::Type* w4_float_type = llvm_vector_type (m_llvm_type_float, 4 );
3876+ llvm::Value* w4_float_mask = builder ().CreateBitCast (w4_int_mask,
3877+ w4_float_type);
3878+
38363879 // Now we will use the horizontal sign extraction intrinsic
38373880 // to build a 32 bit mask value.
38383881 llvm::Function* func = llvm::Intrinsic::getDeclaration (
3839- module (), llvm::Intrinsic::x86_sse2_pmovmskb_128 );
3882+ module (), llvm::Intrinsic::x86_sse_movmsk_ps );
38403883
3841- llvm::Value* args[1 ] = { w4_int_mask };
3884+ llvm::Value* args[1 ] = { w4_float_mask };
38423885 llvm::Value* int32 = builder ().CreateCall (func, toArrayRef (args));
38433886 llvm::Value* i8 = builder ().CreateIntCast (int32, type_int8 (), true );
38443887
0 commit comments