Skip to content

Commit ba75a01

Browse files
Wunkologibbed
authored andcommitted
[x64] Add AX512 optimization for OPCODE_SELECT(V128)
Uses `vpternlogd` to collapse the bitwise select operation into one instruction. Though it needs a `vmovdqa` instruction since `vpternlogd` reads and writes to the first argument.
1 parent 7c21b32 commit ba75a01

File tree

1 file changed

+13
-3
lines changed

1 file changed

+13
-3
lines changed

src/xenia/cpu/backend/x64/x64_sequences.cc

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "xenia/cpu/backend/x64/x64_emitter.h"
3636
#include "xenia/cpu/backend/x64/x64_op.h"
3737
#include "xenia/cpu/backend/x64/x64_tracers.h"
38+
#include "xenia/cpu/backend/x64/x64_util.h"
3839
#include "xenia/cpu/hir/hir_builder.h"
3940
#include "xenia/cpu/processor.h"
4041

@@ -745,21 +746,30 @@ struct SELECT_V128_V128
745746
: Sequence<SELECT_V128_V128,
746747
I<OPCODE_SELECT, V128Op, V128Op, V128Op, V128Op>> {
747748
static void Emit(X64Emitter& e, const EmitArgType& i) {
748-
Xmm src1 = i.src1.is_constant ? e.xmm0 : i.src1;
749+
const Xmm src1 = i.src1.is_constant ? e.xmm0 : i.src1;
749750
if (i.src1.is_constant) {
750751
e.LoadConstantXmm(src1, i.src1.constant());
751752
}
752753

753-
Xmm src2 = i.src2.is_constant ? e.xmm1 : i.src2;
754+
const Xmm src2 = i.src2.is_constant ? e.xmm1 : i.src2;
754755
if (i.src2.is_constant) {
755756
e.LoadConstantXmm(src2, i.src2.constant());
756757
}
757758

758-
Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
759+
const Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
759760
if (i.src3.is_constant) {
760761
e.LoadConstantXmm(src3, i.src3.constant());
761762
}
762763

764+
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
765+
e.vmovdqa(e.xmm3, src1);
766+
e.vpternlogd(e.xmm3, src2, src3,
767+
(~TernaryOperand::a & TernaryOperand::b) |
768+
(TernaryOperand::c & TernaryOperand::a));
769+
e.vmovdqa(i.dest, e.xmm3);
770+
return;
771+
}
772+
763773
// src1 ? src2 : src3;
764774
e.vpandn(e.xmm3, src1, src2);
765775
e.vpand(i.dest, src1, src3);

0 commit comments

Comments
 (0)