Skip to content

Commit e55cb73

Browse files
Wunkologibbed
authored andcommitted
[x64] Add AX512 optimization for OPCODE_SELECT(F64)
1 parent ba75a01 commit e55cb73

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

src/xenia/cpu/backend/x64/x64_sequences.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,26 @@ struct SELECT_F64
698698
: Sequence<SELECT_F64, I<OPCODE_SELECT, F64Op, I8Op, F64Op, F64Op>> {
699699
static void Emit(X64Emitter& e, const EmitArgType& i) {
700700
// dest = src1 != 0 ? src2 : src3
701+
702+
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
703+
e.movzx(e.rax, i.src1);
704+
e.vmovq(e.xmm0, e.rax);
705+
e.vptestmq(e.k1, e.xmm0, e.xmm0);
706+
707+
const Xmm src2 = i.src2.is_constant ? e.xmm1 : i.src2;
708+
if (i.src2.is_constant) {
709+
e.LoadConstantXmm(src2, i.src2.constant());
710+
}
711+
712+
const Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
713+
if (i.src3.is_constant) {
714+
e.LoadConstantXmm(src3, i.src3.constant());
715+
}
716+
717+
e.vpblendmq(i.dest.reg() | e.k1, src3, src2);
718+
return;
719+
}
720+
701721
e.movzx(e.eax, i.src1);
702722
e.vmovd(e.xmm1, e.eax);
703723
e.vpxor(e.xmm0, e.xmm0);

0 commit comments

Comments
 (0)