Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mono][jit] Enable all X86Base intrinsics #91393

Merged
merged 10 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/mono/mono/arch/amd64/amd64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1206,6 +1206,9 @@ typedef union {
#define amd64_movsb_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_movsb(inst); amd64_codegen_post(inst); } while (0)
#define amd64_movsl_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_movsl(inst); amd64_codegen_post(inst); } while (0)
#define amd64_movsd_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_movsd(inst); amd64_codegen_post(inst); } while (0)
#define amd64_bsf_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_bsf ((inst),(dreg)&0x7,(reg)&0x7); amd64_codegen_post (inst); } while (0)
#define amd64_bsr_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_bsr ((inst),(dreg)&0x7,(reg)&0x7); amd64_codegen_post (inst); } while (0)

#define amd64_prefix_size(inst,p,size) do { x86_prefix((inst), p); } while (0)
#define amd64_rdtsc_size(inst,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),0,0,0); x86_rdtsc(inst); amd64_codegen_post(inst); } while (0)
#define amd64_cmpxchg_reg_reg_size(inst,dreg,reg,size) do { amd64_codegen_pre(inst); amd64_emit_rex ((inst),(size),(dreg),0,(reg)); x86_cmpxchg_reg_reg((inst),((dreg)&0x7),((reg)&0x7)); amd64_codegen_post(inst); } while (0)
Expand Down
3 changes: 3 additions & 0 deletions src/mono/mono/arch/x86/x86-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1963,6 +1963,9 @@ mono_x86_patch_inline (guchar* code, gpointer target)
#define x86_leave(inst) do { x86_byte (inst, 0xc9); } while (0)
#define x86_sahf(inst) do { x86_byte (inst, 0x9e); } while (0)

#define x86_bsf(inst,dreg,reg) do { x86_byte (inst, 0x0f); x86_byte (inst, 0xbc); x86_reg_emit ((inst), (dreg), (reg)); } while (0)
#define x86_bsr(inst,dreg,reg) do { x86_byte (inst, 0x0f); x86_byte (inst, 0xbd); x86_reg_emit ((inst), (dreg), (reg)); } while (0)

#define x86_fsin(inst) do { x86_codegen_pre(&(inst), 2); x86_byte (inst, 0xd9); x86_byte (inst, 0xfe); } while (0)
#define x86_fcos(inst) do { x86_codegen_pre(&(inst), 2); x86_byte (inst, 0xd9); x86_byte (inst, 0xff); } while (0)
#define x86_fabs(inst) do { x86_codegen_pre(&(inst), 2); x86_byte (inst, 0xd9); x86_byte (inst, 0xe1); } while (0)
Expand Down
10 changes: 10 additions & 0 deletions src/mono/mono/mini/cpu-amd64.mdesc
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ long_div: dest:a src1:a src2:i len:16 clob:d
long_div_un: dest:a src1:a src2:i len:16 clob:d
long_rem: dest:d src1:a src2:i len:16 clob:a
long_rem_un: dest:d src1:a src2:i len:16 clob:a
long_divrem: dest:a src1:a src2:d src3:i len:16 clob:x
long_divrem_un: dest:a src1:a src2:d src3:i len:16 clob:x
long_divrem2: dest:d len:3
long_and: dest:i src1:i src2:i len:3 clob:1
long_or: dest:i src1:i src2:i len:3 clob:1
long_xor: dest:i src1:i src2:i len:3 clob:1
Expand Down Expand Up @@ -337,6 +340,10 @@ amd64_lea_membase: dest:i src1:i len:11
x86_xchg: src1:i src2:i clob:x len:2
x86_fpop: src1:f len:3
x86_seteq_membase: src1:b len:9
x86_bsf32: dest:i src1:i len:4
x86_bsf64: dest:l src1:l len:4
x86_bsr32: dest:i src1:i len:4
x86_bsr64: dest:l src1:l len:4

x86_add_reg_membase: dest:i src1:i src2:b clob:1 len:13
x86_sub_reg_membase: dest:i src1:i src2:b clob:1 len:13
Expand Down Expand Up @@ -411,6 +418,9 @@ int_div: dest:a src1:a src2:i clob:d len:32
int_div_un: dest:a src1:a src2:i clob:d len:32
int_rem: dest:d src1:a src2:i clob:a len:32
int_rem_un: dest:d src1:a src2:i clob:a len:32
int_divrem: dest:a src1:a src2:d src3:i clob:x len:15
int_divrem_un: dest:a src1:a src2:d src3:i clob:x len:15
int_divrem2: dest:d len:3
int_and: dest:i src1:i src2:i clob:1 len:4
int_or: dest:i src1:i src2:i clob:1 len:4
int_xor: dest:i src1:i src2:i clob:1 len:4
Expand Down
7 changes: 7 additions & 0 deletions src/mono/mono/mini/cpu-x86.mdesc
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ int_div: dest:a src1:a src2:i len:15 clob:d
int_div_un: dest:a src1:a src2:i len:15 clob:d
int_rem: dest:d src1:a src2:i len:15 clob:a
int_rem_un: dest:d src1:a src2:i len:15 clob:a
int_divrem: dest:a src1:a src2:d src3:i clob:x len:15
int_divrem_un: dest:a src1:a src2:d src3:i clob:x len:15
int_divrem2: dest:d len:3
int_and: dest:i src1:i src2:i clob:1 len:2
int_or: dest:i src1:i src2:i clob:1 len:2
int_xor: dest:i src1:i src2:i clob:1 len:2
Expand Down Expand Up @@ -303,6 +306,10 @@ x86_fp_load_i8: dest:f src1:b len:7
x86_fp_load_i4: dest:f src1:b len:7
x86_seteq_membase: src1:b len:7
x86_setne_membase: src1:b len:7
x86_bsf32: dest:i src1:i len:4
x86_bsf64: dest:l src1:l len:4
x86_bsr32: dest:i src1:i len:4
x86_bsr64: dest:l src1:l len:4

x86_add_reg_membase: dest:i src1:i src2:b clob:1 len:11
x86_sub_reg_membase: dest:i src1:i src2:b clob:1 len:11
Expand Down
34 changes: 34 additions & 0 deletions src/mono/mono/mini/mini-amd64.c
Original file line number Diff line number Diff line change
Expand Up @@ -5126,6 +5126,27 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
amd64_div_reg_size (code, ins->sreg2, FALSE, 4);
}
break;
case OP_X86_LDIVREM:
amd64_div_reg (code, ins->sreg3, TRUE);
break;
case OP_X86_IDIVREM:
amd64_div_reg_size (code, ins->sreg3, TRUE, 4);
break;
case OP_X86_LDIVREMU:
amd64_div_reg (code, ins->sreg3, FALSE);
break;
case OP_X86_IDIVREMU:
amd64_div_reg_size (code, ins->sreg3, FALSE, 4);
break;
case OP_X86_IDIVREM2:
if (ins->dreg != AMD64_RDX)
amd64_mov_reg_reg (code, ins->dreg, AMD64_RDX, 4);
break;
case OP_X86_LDIVREM2:
if (ins->dreg != AMD64_RDX)
amd64_mov_reg_reg (code, ins->dreg, AMD64_RDX, 8);
break;

case OP_LMUL_OVF:
amd64_imul_reg_reg (code, ins->sreg1, ins->sreg2);
EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
Expand Down Expand Up @@ -5685,6 +5706,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
case OP_X86_XCHG:
amd64_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
break;
case OP_X86_BSF32:
amd64_bsf_size (code, ins->dreg, ins->sreg1, 4);
break;
case OP_X86_BSF64:
amd64_bsf_size (code, ins->dreg, ins->sreg1, 8);
break;
case OP_X86_BSR32:
amd64_bsr_size (code, ins->dreg, ins->sreg1, 4);
break;
case OP_X86_BSR64:
amd64_bsr_size (code, ins->dreg, ins->sreg1, 8);
break;

case OP_LOCALLOC:
/* keep alignment */
amd64_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_FRAME_ALIGNMENT - 1);
Expand Down
41 changes: 41 additions & 0 deletions src/mono/mono/mini/mini-llvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -5640,6 +5640,7 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
LLVMBuilderRef builder;
gboolean has_terminator;
LLVMValueRef lhs, rhs, arg3;
LLVMValueRef last_divrem = NULL;
int nins = 0;

cbb = get_end_bb (ctx, bb);
Expand Down Expand Up @@ -6638,6 +6639,44 @@ MONO_RESTORE_WARNING
values [ins->dreg] = LLVMBuildXor (builder, tz, width, dname);
break;
}
case OP_X86_IDIVREM:
case OP_X86_LDIVREM: {
const LLVMTypeRef part_type = ins->opcode==OP_X86_IDIVREM ? LLVMInt32Type () : LLVMInt64Type ();
const LLVMTypeRef full_type = ins->opcode==OP_X86_IDIVREM ? LLVMInt64Type () : LLVMInt128Type ();
const LLVMValueRef shift_amount = ins->opcode==OP_X86_IDIVREM ? const_int32 (32) : const_int32 (64);

LLVMValueRef dividend_low = LLVMBuildZExt (builder, convert (ctx, lhs, part_type), full_type, "");
LLVMValueRef dividend_high = LLVMBuildSExt (builder, convert (ctx, rhs, part_type), full_type, "");
LLVMValueRef dividend = LLVMBuildOr (builder, dividend_low,
LLVMBuildShl (builder, dividend_high, shift_amount, ""), "");
LLVMValueRef divisor = LLVMBuildSExt (builder, convert (ctx, arg3, part_type), full_type, "");
// LLVM should fuse the individual Div and Rem instructions into one DIV/IDIV on x86
values [ins->dreg] = LLVMBuildTrunc (builder, LLVMBuildSDiv (builder, dividend, divisor, ""), part_type, "");
last_divrem = LLVMBuildTrunc (builder, LLVMBuildSRem (builder, dividend, divisor, ""), part_type, "");
break;
}
case OP_X86_IDIVREMU:
case OP_X86_LDIVREMU: {
const LLVMTypeRef part_type = ins->opcode==OP_X86_IDIVREMU ? LLVMInt32Type () : LLVMInt64Type ();
const LLVMTypeRef full_type = ins->opcode==OP_X86_IDIVREMU ? LLVMInt64Type () : LLVMInt128Type ();
const LLVMValueRef shift_amount = ins->opcode==OP_X86_IDIVREMU ? const_int32 (32) : const_int32 (64);

LLVMValueRef dividend_low = LLVMBuildZExt (builder, convert (ctx, lhs, part_type), full_type, "");
LLVMValueRef dividend_high = LLVMBuildZExt (builder, convert (ctx, rhs, part_type), full_type, "");
LLVMValueRef dividend = LLVMBuildOr (builder, dividend_low,
LLVMBuildShl (builder, dividend_high, shift_amount, ""), "");
LLVMValueRef divisor = LLVMBuildZExt (builder, convert (ctx, arg3, part_type), full_type, "");
values [ins->dreg] = LLVMBuildTrunc (builder, LLVMBuildUDiv (builder, dividend, divisor, ""), part_type, "");
last_divrem = LLVMBuildTrunc (builder, LLVMBuildURem (builder, dividend, divisor, ""), part_type, "");
break;
}
case OP_X86_IDIVREM2:
case OP_X86_LDIVREM2: {
g_assert (last_divrem);
values [ins->dreg] = last_divrem;
last_divrem = NULL;
break;
}
#endif

case OP_ICONV_TO_I1:
Expand Down Expand Up @@ -12043,6 +12082,8 @@ MONO_RESTORE_WARNING
}
}

g_assert (last_divrem == NULL);

if (!ctx_ok (ctx))
return;

Expand Down
9 changes: 9 additions & 0 deletions src/mono/mono/mini/mini-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -1184,6 +1184,15 @@ MINI_OP3(OP_MULX_HL64, "mulxhl64", LREG, LREG, LREG, LREG)

#endif

#if defined(TARGET_X86) || defined(TARGET_AMD64)
MINI_OP3(OP_X86_LDIVREM, "long_divrem", LREG, LREG, LREG, LREG)
jandupej marked this conversation as resolved.
Show resolved Hide resolved
MINI_OP3(OP_X86_LDIVREMU, "long_divrem_un", LREG, LREG, LREG, LREG)
MINI_OP3(OP_X86_LDIVREM2, "long_divrem2", LREG, NONE, NONE, NONE)
MINI_OP3(OP_X86_IDIVREM, "int_divrem", IREG, IREG, IREG, IREG)
MINI_OP3(OP_X86_IDIVREMU, "int_divrem_un", IREG, IREG, IREG, IREG)
MINI_OP3(OP_X86_IDIVREM2, "int_divrem2", IREG, NONE, NONE, NONE)
#endif

MINI_OP(OP_CREATE_SCALAR_UNSAFE, "create_scalar_unsafe", XREG, XREG, NONE)
MINI_OP(OP_CREATE_SCALAR, "create_scalar", XREG, XREG, NONE)

Expand Down
19 changes: 19 additions & 0 deletions src/mono/mono/mini/mini-x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -2877,6 +2877,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
}
break;
}
case OP_X86_LDIVREM:
case OP_X86_LDIVREMU:
case OP_X86_LDIVREM2:
g_assert_not_reached ();
break;
case OP_X86_IDIVREM:
case OP_X86_IDIVREMU:
x86_div_reg (code, ins->sreg3, ins->opcode==OP_X86_IDIVREM);
break;
case OP_X86_IDIVREM2:
if (ins->dreg != X86_EDX)
x86_mov_reg_reg (code, ins->dreg, X86_EDX);
break;
case OP_IOR:
x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
break;
Expand Down Expand Up @@ -3309,6 +3322,12 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
case OP_X86_XCHG:
x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
break;
case OP_X86_BSF32:
x86_bsf (code, ins->dreg, ins->sreg1);
break;
case OP_X86_BSR32:
x86_bsr (code, ins->dreg, ins->sreg1);
break;
case OP_LOCALLOC:
/* keep alignment */
x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
Expand Down
46 changes: 45 additions & 1 deletion src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -4599,6 +4599,7 @@ static SimdIntrinsic bmi2_methods [] = {
static SimdIntrinsic x86base_methods [] = {
{SN_BitScanForward},
{SN_BitScanReverse},
{SN_DivRem},
{SN_Pause, OP_XOP, INTRINS_SSE_PAUSE},
{SN_get_IsSupported}
};
Expand All @@ -4620,7 +4621,7 @@ static const IntrinGroup supported_x86_intrinsics [] = {
{ "Sse41", MONO_CPU_X86_SSE41, sse41_methods, sizeof (sse41_methods) },
{ "Sse42", MONO_CPU_X86_SSE42, sse42_methods, sizeof (sse42_methods) },
{ "Ssse3", MONO_CPU_X86_SSSE3, ssse3_methods, sizeof (ssse3_methods) },
{ "X86Base", 0, x86base_methods, sizeof (x86base_methods) },
{ "X86Base", MONO_CPU_INITED, x86base_methods, sizeof (x86base_methods), TRUE },
{ "X86Serialize", 0, unsupported, sizeof (unsupported) },
};

Expand Down Expand Up @@ -5246,6 +5247,49 @@ emit_x86_intrinsics (
ins->type = is_64bit ? STACK_I8 : STACK_I4;
MONO_ADD_INS (cfg->cbb, ins);
return ins;
case SN_DivRem: {
g_assert (!(TARGET_SIZEOF_VOID_P == 4 && is_64bit)); // x86(no -64) cannot do divisions with 64-bit regs
const MonoStackType divtype = is_64bit ? STACK_I8 : STACK_I4;
const int storetype = is_64bit ? OP_STOREI8_MEMBASE_REG : OP_STOREI4_MEMBASE_REG;
const int obj_size = MONO_ABI_SIZEOF (MonoObject);

// We must decide by the second argument, the first is always unsigned here
MonoTypeEnum arg1_type = fsig->param_count > 1 ? get_underlying_type (fsig->params [1]) : MONO_TYPE_VOID;
MonoInst* div;
MonoInst* div2;

if (type_enum_is_unsigned (arg1_type)) {
MONO_INST_NEW (cfg, div, is_64bit ? OP_X86_LDIVREMU : OP_X86_IDIVREMU);
} else {
MONO_INST_NEW (cfg, div, is_64bit ? OP_X86_LDIVREM : OP_X86_IDIVREM);
}
div->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
div->sreg1 = args [0]->dreg; // we can use this directly, reg alloc knows that the contents will be destroyed
div->sreg2 = args [1]->dreg; // same here as ^
div->sreg3 = args [2]->dreg;
div->type = divtype;
MONO_ADD_INS (cfg->cbb, div);

// Protect the contents of edx/rdx by assigning it a vreg. The instruction must
// immediately follow DIV/IDIV so that edx content is not modified.
// In LLVM the remainder is already calculated, just need to capture it in a vreg.
MONO_INST_NEW (cfg, div2, is_64bit ? OP_X86_LDIVREM2 : OP_X86_IDIVREM2);
div2->dreg = is_64bit ? alloc_lreg (cfg) : alloc_ireg (cfg);
div2->type = divtype;
MONO_ADD_INS (cfg->cbb, div2);

// TODO: Can the creation of tuple be elided? (e.g. if deconstruction is used)
MonoInst* tuple = mono_compile_create_var (cfg, fsig->ret, OP_LOCAL);
MonoInst* tuple_addr;
EMIT_NEW_TEMPLOADA (cfg, tuple_addr, tuple->inst_c0);

MonoClassField* field1 = mono_class_get_field_from_name_full (tuple->klass, "Item1", NULL);
MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, tuple_addr->dreg, field1->offset - obj_size, div->dreg);
MonoClassField* field2 = mono_class_get_field_from_name_full (tuple->klass, "Item2", NULL);
MONO_EMIT_NEW_STORE_MEMBASE (cfg, storetype, tuple_addr->dreg, field2->offset - obj_size, div2->dreg);
EMIT_NEW_TEMPLOAD (cfg, ins, tuple->inst_c0);
return ins;
}
default:
g_assert_not_reached ();
}
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/mini/simd-methods.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ METHOD(ComputeCrc32C)
// X86Base
METHOD(BitScanForward)
METHOD(BitScanReverse)
METHOD(DivRem)
METHOD(Pause)
// Crypto
METHOD(FixedRotate)
Expand Down
9 changes: 0 additions & 9 deletions src/tests/issues.targets
Original file line number Diff line number Diff line change
Expand Up @@ -1219,15 +1219,6 @@
<ExcludeList Include = "$(XunitTestBinBase)/JIT/HardwareIntrinsics/X86/Sse42.X64/Crc32_*/**">
<Issue>/~https://github.com/dotnet/runtime/issues/54185</Issue>
</ExcludeList>
<ExcludeList Include = "$(XUnitTestBinBase)/JIT/HardwareIntrinsics/X86/X86Base/X86Base*/**">
<Issue>/~https://github.com/dotnet/runtime/issues/75767</Issue>
</ExcludeList>
<ExcludeList Include = "$(XUnitTestBinBase)/JIT/HardwareIntrinsics/X86/X86Base/DivRem*/**">
<Issue>/~https://github.com/dotnet/runtime/issues/75767</Issue>
</ExcludeList>
<ExcludeList Include = "$(XUnitTestBinBase)/JIT/HardwareIntrinsics/X86/X86Base.X64/X86Base.X64*/**">
<Issue>/~https://github.com/dotnet/runtime/issues/75767</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/Convert/out_of_range_fp_to_int_conversions/*">
<Issue>Mono does not define out of range fp to int conversions</Issue>
</ExcludeList>
Expand Down