From e405e94cdfc8e040c30f40504181a6848a4f4d40 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 10 Aug 2023 15:15:37 +0200 Subject: [PATCH 1/8] JIT: Unspill normalize-on-load variables using exact small type The existing logic would sometimes unspill using the type of the local that is being unspilled. This type is often wider than the exact small type in the LclVarDsc, since NOL locals are normally expanded as CAST(, LCL_VAR). This causes problems since morph will in some cases avoid inserting normalization for NOL locals when it has a subrange assertion available. This optimization relies on the backend always ensuring that the local will be normalized as part of unspilling and args homing. Size-wise regressions are expected on xarch since the encoding of the normalizing load is larger. However, as we have seen before, using wide loads can cause significant store-forwarding stalls which can have large negative impact on performance, so overall there is an expected perf benefit of using the small loads (in addition to the correctness fix). Fix #90219 --- src/coreclr/jit/codegenlinear.cpp | 45 +++++++++++++++---------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 4c512a4a3f72c7..d207ea3d6e0dbb 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1211,24 +1211,25 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) // Reset spilled flag, since we are going to load a local variable from its home location. unspillTree->gtFlags &= ~GTF_SPILLED; - GenTreeLclVar* lcl = unspillTree->AsLclVar(); - LclVarDsc* varDsc = compiler->lvaGetDesc(lcl); - var_types spillType = varDsc->GetRegisterType(lcl); - assert(spillType != TYP_UNDEF); - -// TODO-Cleanup: The following code could probably be further merged and cleaned up. -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // Load local variable from its home location. - // Never allow truncating the locals here, otherwise a subsequent - // use of the local with a wider type would see the truncated - // value. We do allow wider loads as those can be efficient even - // when unaligned and might be smaller encoding wise (on xarch). - var_types lclLoadType = varDsc->lvNormalizeOnLoad() ? varDsc->TypeGet() : varDsc->GetStackSlotHomeType(); - assert(lclLoadType != TYP_UNDEF); - if (genTypeSize(spillType) < genTypeSize(lclLoadType)) - { - spillType = lclLoadType; - } + GenTreeLclVar* lcl = unspillTree->AsLclVar(); + LclVarDsc* varDsc = compiler->lvaGetDesc(lcl); + + // Pick type to reload register from stack with. Note that for + // normalize-on-load variables the type of 'lcl' does not have any + // relation to the type of 'varDsc': + // * It is wider under normal circumstances, where morph has added a cast on top + // * It is the same in many circumstances, e.g. when morph tries to + // guess via a subrange assertion that the upper bits are going to + // be zero. + // * It is narrower in some cases, e.g. when lowering optimizes to + // use a byte `cmp` + // + // For unspilling purposes we should always make sure we get a + // normalized value in the register, so use the type from the + // varDsc. + + var_types unspillType = varDsc->lvNormalizeOnLoad() ? varDsc->TypeGet() : varDsc->GetRegisterType(lcl); + assert(unspillType != TYP_UNDEF); #if defined(TARGET_LOONGARCH64) if (varTypeIsFloating(spillType) && emitter::isGeneralRegister(tree->GetRegNum())) @@ -1236,14 +1237,10 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) spillType = spillType == TYP_FLOAT ? TYP_INT : TYP_LONG; } #endif -#elif defined(TARGET_ARM) -// No normalizing for ARM -#else - NYI("Unspilling not implemented for this target architecture."); -#endif + bool reSpill = ((unspillTree->gtFlags & GTF_SPILL) != 0); bool isLastUse = lcl->IsLastUse(0); - genUnspillLocal(lcl->GetLclNum(), spillType, lcl->AsLclVar(), tree->GetRegNum(), reSpill, isLastUse); + genUnspillLocal(lcl->GetLclNum(), unspillType, lcl->AsLclVar(), tree->GetRegNum(), reSpill, isLastUse); } else if (unspillTree->IsMultiRegLclVar()) { From 15be0fee2a9789be9995a73a6f16938f0600d6b5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 10 Aug 2023 16:08:43 +0200 Subject: [PATCH 2/8] Add regression test --- .../JitBlue/Runtime_90219/Runtime_90219.cs | 110 ++++++++++++++++++ .../Runtime_90219/Runtime_90219.csproj | 8 ++ src/tests/issues.targets | 6 + 3 files changed, 124 insertions(+) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.csproj diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.cs b/src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.cs new file mode 100644 index 00000000000000..b314f3672f8d64 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.cs @@ -0,0 +1,110 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Xunit; + +// Generated by Fuzzlyn v1.5 on 2023-08-05 16:08:41 +// Run on X86 Windows +// Seed: 7610693270284065118 +// Reduced from 12.0 KiB to 2.2 KiB in 00:01:06 +// Debug: Outputs 1 +// Release: Outputs 2 +public interface I0 +{ +} + +public class C0 : I0 +{ + public ulong F0; + public uint F4; + public C0(ulong f0, uint f4) + { + F0 = f0; + F4 = f4; + } +} + +public class Runtime_90219 +{ + public static IRuntime s_rt; + public static I0 s_1; + public static uint s_2; + public static byte[] s_4 = new byte[]{0}; + public static byte Result; + + [Fact] + public static int TestEntryPoint() + { + CollectibleALC alc = new CollectibleALC(); + System.Reflection.Assembly asm = alc.LoadFromAssemblyPath(System.Reflection.Assembly.GetExecutingAssembly().Location); + System.Reflection.MethodInfo mi = asm.GetType(typeof(Runtime_90219).FullName).GetMethod(nameof(MainInner)); + System.Type runtimeTy = asm.GetType(typeof(Runtime).FullName); + return (int)mi.Invoke(null, new object[]{System.Activator.CreateInstance(runtimeTy)}); + } + + public static int MainInner(IRuntime rt) + { + s_rt = rt; + var vr8 = new C0(0, 0); + var vr12 = s_4[0]; + vr8.F0 *= M2(vr12); + long[] vr9 = new long[]{0}; + bool vr10 = (int)M2(0) <= vr9[0]; + return Result == 1 ? 100 : 101; + } + + public static ulong M2(byte arg0) + { + for (int var0 = 0; var0 < 2; var0++) + { + var vr1 = new C0(0, 0); + var vr3 = new C0(0, 0); + uint vr14 = s_2; + s_rt.WriteLine("c_0", vr14); + uint vr15 = vr3.F4; + s_2 = M3(vr1, vr15); + s_1 = s_1; + s_1 = new C0(0, 0); + s_rt.WriteLine("c_6", var0); + } + + var vr5 = new C0(0, 1); + uint vr18 = vr5.F4; + arg0 = (byte)vr18; + var vr7 = new C0(0, 1838341904U); + if ((arg0 >= (byte)M3(vr7, 0))) + { + arg0 <<= 1; + } + + s_rt.WriteLine("c_7", arg0); + return 0; + } + + public static uint M3(C0 argThis, uint arg0) + { + s_rt.WriteLine("c_0", arg0); + return argThis.F4; + } +} + +public interface IRuntime +{ + void WriteLine(string site, T value); +} + +public class Runtime : IRuntime +{ + public void WriteLine(string site, T value) + { + if (typeof(T) == typeof(byte)) + Runtime_90219.Result = (byte)(object)value; + } +} + +public class CollectibleALC : System.Runtime.Loader.AssemblyLoadContext +{ + public CollectibleALC(): base(true) + { + } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.csproj new file mode 100644 index 00000000000000..de6d5e08882e86 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_90219/Runtime_90219.csproj @@ -0,0 +1,8 @@ + + + True + + + + + diff --git a/src/tests/issues.targets b/src/tests/issues.targets index a7a23cd4cdaf3f..b97955a062f093 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -1069,6 +1069,9 @@ /~https://github.com/dotnet/runtimelab/issues/155: Assembly.Load + + /~https://github.com/dotnet/runtimelab/issues/155: Assembly.Load + /~https://github.com/dotnet/runtimelab/issues/155: Reflection.Emit @@ -3388,6 +3391,9 @@ Loads an assembly from file + + Loads an assembly from file + From 9c59fe8defdbb5b2fc91b2fbb9543d5c699e1f58 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 10 Aug 2023 22:21:48 +0200 Subject: [PATCH 3/8] Fix logic, update comment --- src/coreclr/jit/codegenlinear.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index d207ea3d6e0dbb..410ca80cb6b513 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1214,21 +1214,21 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) GenTreeLclVar* lcl = unspillTree->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lcl); - // Pick type to reload register from stack with. Note that for - // normalize-on-load variables the type of 'lcl' does not have any - // relation to the type of 'varDsc': - // * It is wider under normal circumstances, where morph has added a cast on top - // * It is the same in many circumstances, e.g. when morph tries to - // guess via a subrange assertion that the upper bits are going to - // be zero. - // * It is narrower in some cases, e.g. when lowering optimizes to - // use a byte `cmp` + // Pick type to reload register from stack with. Note that in + // general, the type of 'lcl' does not have any relation to the + // type of 'varDsc': // - // For unspilling purposes we should always make sure we get a - // normalized value in the register, so use the type from the - // varDsc. + // * For NOL locals it is wider under normal circumstances, where + // morph has added a cast on top + // * For all locals it can be narrower narrower in some cases, e.g. + // when lowering optimizes to use a smaller typed `cmp` (e.g. + // 32-bit cmp for 64-bit local, or 8-bit cmp for 16-bit local). + // + // Thus, we should always pick the type from varDsc. For NOL locals + // we further want to make sure we always leave a normalized value + // in the register. - var_types unspillType = varDsc->lvNormalizeOnLoad() ? varDsc->TypeGet() : varDsc->GetRegisterType(lcl); + var_types unspillType = varDsc->lvNormalizeOnLoad() ? varDsc->TypeGet() : varDsc->GetStackSlotHomeType(); assert(unspillType != TYP_UNDEF); #if defined(TARGET_LOONGARCH64) From d6d0e08ada28e609c1b239745815868adb28de25 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 11 Aug 2023 10:58:38 +0200 Subject: [PATCH 4/8] Disable test on Mono --- src/tests/issues.targets | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tests/issues.targets b/src/tests/issues.targets index b97955a062f093..4eed8d9f7495c4 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -1976,6 +1976,9 @@ /~https://github.com/dotnet/runtime/issues/88499 + + /~https://github.com/dotnet/runtime/issues/90374 + @@ -2716,7 +2719,7 @@ /~https://github.com/dotnet/runtime/issues/48914 - Fuzzlyn + /~https://github.com/dotnet/runtime/issues/90372 /~https://github.com/dotnet/runtime/issues/73454;/~https://github.com/dotnet/runtime/pull/61707#issuecomment-973122341 From b4b665427b6a2e03438e0c34cfdde3371b8aa28d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 11 Aug 2023 13:17:01 +0200 Subject: [PATCH 5/8] Update src/coreclr/jit/codegenlinear.cpp --- src/coreclr/jit/codegenlinear.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 410ca80cb6b513..c3f6d70d1e6210 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1234,7 +1234,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) #if defined(TARGET_LOONGARCH64) if (varTypeIsFloating(spillType) && emitter::isGeneralRegister(tree->GetRegNum())) { - spillType = spillType == TYP_FLOAT ? TYP_INT : TYP_LONG; + unspillType = unspillType == TYP_FLOAT ? TYP_INT : TYP_LONG; } #endif From ab21c798c5e921c1e4f30bf5cb103d9dfa117c38 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 12 Aug 2023 15:55:23 +0200 Subject: [PATCH 6/8] Address feedback; scope down --- src/coreclr/jit/codegenlinear.cpp | 52 ++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index c3f6d70d1e6210..5a25288c1afee4 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1213,23 +1213,48 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) GenTreeLclVar* lcl = unspillTree->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lcl); + var_types unspillType = varDsc->GetRegisterType(lcl); + assert(unspillType != TYP_UNDEF); + +// TODO-Cleanup: The following code could probably be further merged and cleaned up. +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // Pick type to reload register from stack with. Note that in // general, the type of 'lcl' does not have any relation to the // type of 'varDsc': // - // * For NOL locals it is wider under normal circumstances, where - // morph has added a cast on top - // * For all locals it can be narrower narrower in some cases, e.g. - // when lowering optimizes to use a smaller typed `cmp` (e.g. - // 32-bit cmp for 64-bit local, or 8-bit cmp for 16-bit local). + // * For normalize-on-load (NOL) locals it is wider under normal + // circumstances, where morph has added a cast on top. In some + // cases it is the same, when morph has used a subrange assertion + // to avoid normalizing. // - // Thus, we should always pick the type from varDsc. For NOL locals - // we further want to make sure we always leave a normalized value - // in the register. - - var_types unspillType = varDsc->lvNormalizeOnLoad() ? varDsc->TypeGet() : varDsc->GetStackSlotHomeType(); - assert(unspillType != TYP_UNDEF); + // * For all locals it can be narrower in some cases, when + // lowering optimizes to use a smaller typed `cmp` (e.g. 32-bit cmp + // for 64-bit local, or 8-bit cmp for 16-bit local). + // + // * For byrefs it can differ in GC-ness (TYP_I_IMPL vs TYP_BYREF). + // + // In the NOL case the potential use of subrange assertions means + // we always have to normalize, even if 'lcl' is wide; we could + // have a GTF_SPILLED LCL_VAR(NOL local) with a future + // LCL_VAR(same NOL local), where the latter local then + // relies on the normalization to have happened here as part of + // unspilling. + // + if (varDsc->lvNormalizeOnLoad()) + { + unspillType = varDsc->TypeGet(); + } + else + { + // Potentially narrower -- see if we should widen. + var_types lclLoadType = varDsc->GetStackSlotHomeType(); + assert(lclLoadType != TYP_UNDEF); + if (genTypeSize(unspillType) < genTypeSize(lclLoadType)) + { + unspillType = lclLoadType; + } + } #if defined(TARGET_LOONGARCH64) if (varTypeIsFloating(spillType) && emitter::isGeneralRegister(tree->GetRegNum())) @@ -1237,6 +1262,11 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) unspillType = unspillType == TYP_FLOAT ? TYP_INT : TYP_LONG; } #endif +#elif defined(TARGET_ARM) +// No normalizing for ARM +#else + NYI("Unspilling not implemented for this target architecture."); +#endif bool reSpill = ((unspillTree->gtFlags & GTF_SPILL) != 0); bool isLastUse = lcl->IsLastUse(0); From a7c421078f19aa4303d770e47e9ace21c58b3e94 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 12 Aug 2023 16:07:58 +0200 Subject: [PATCH 7/8] Run jit-format --- src/coreclr/jit/codegenlinear.cpp | 6 +++--- src/coreclr/jit/compiler.hpp | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 5a25288c1afee4..b0048c9d2aa28c 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1211,8 +1211,8 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) // Reset spilled flag, since we are going to load a local variable from its home location. unspillTree->gtFlags &= ~GTF_SPILLED; - GenTreeLclVar* lcl = unspillTree->AsLclVar(); - LclVarDsc* varDsc = compiler->lvaGetDesc(lcl); + GenTreeLclVar* lcl = unspillTree->AsLclVar(); + LclVarDsc* varDsc = compiler->lvaGetDesc(lcl); var_types unspillType = varDsc->GetRegisterType(lcl); assert(unspillType != TYP_UNDEF); @@ -1228,7 +1228,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) // cases it is the same, when morph has used a subrange assertion // to avoid normalizing. // - // * For all locals it can be narrower in some cases, when + // * For all locals it can be narrower in some cases, when // lowering optimizes to use a smaller typed `cmp` (e.g. 32-bit cmp // for 64-bit local, or 8-bit cmp for 16-bit local). // diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index ae9e180dbe83df..c50962b5bb2c07 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1144,6 +1144,11 @@ inline GenTree::GenTree(genTreeOps oper, var_types type DEBUGARG(bool largeNode) gtVNPair.SetBoth(ValueNumStore::NoVN); gtRegTag = GT_REGTAG_NONE; gtOperSave = GT_NONE; + + if (ISMETHOD("M10") && (gtTreeID == 94)) + { + printf("here"); + } #endif } From 803ac86110460cc28d72392c9dc23bceae563090 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 14 Aug 2023 16:48:31 +0200 Subject: [PATCH 8/8] Update src/coreclr/jit/compiler.hpp --- src/coreclr/jit/compiler.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index c50962b5bb2c07..ae9e180dbe83df 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1144,11 +1144,6 @@ inline GenTree::GenTree(genTreeOps oper, var_types type DEBUGARG(bool largeNode) gtVNPair.SetBoth(ValueNumStore::NoVN); gtRegTag = GT_REGTAG_NONE; gtOperSave = GT_NONE; - - if (ISMETHOD("M10") && (gtTreeID == 94)) - { - printf("here"); - } #endif }