Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimized conversions between Half and Single. #81632

Merged
merged 19 commits into from
Jul 7, 2023
Merged
Changes from 7 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
0190f96
Optimized conversions between `Half` and `Single`.
MineCake147E Feb 4, 2023
f36ab2b
Updated `explicit operator Half(float value)`.
MineCake147E Feb 5, 2023
b3b8eee
Removed `[MethodImpl(MethodImplOptions.AggressiveInlining)]` from `ex…
MineCake147E Feb 6, 2023
d26f6d9
Coding convention compliance
MineCake147E Mar 31, 2023
90abc81
Revert "Coding convention compliance"
MineCake147E Mar 31, 2023
a99b0a9
Coding convention compliance #1 redo
MineCake147E Mar 31, 2023
5535e6f
Merge branch 'main' of /~https://github.com/dotnet/runtime into improve…
MineCake147E Mar 31, 2023
c01f2f7
Merge branch 'main' of /~https://github.com/dotnet/runtime into improve…
MineCake147E May 16, 2023
a5142f3
Merge branch 'main' of /~https://github.com/dotnet/runtime into improve…
MineCake147E May 16, 2023
210815f
* Names of variables and constants got slightly more descriptive
MineCake147E May 16, 2023
394f434
Hopefully fixed bugs
MineCake147E May 16, 2023
ddd6880
Added explanation of `explicit operator float`
MineCake147E May 19, 2023
dc30370
Removed error causing whitespaces at the end of lines
MineCake147E May 20, 2023
d874357
+ Added explanation of `explicit operator Half(float value)`
MineCake147E May 24, 2023
7e2fa18
Merge branch 'main' of /~https://github.com/dotnet/runtime into improve…
MineCake147E May 24, 2023
05f7100
Fixed misinformation in comments
MineCake147E May 26, 2023
858499c
Optimized `Single`->`Half` conversion with subnormal result
MineCake147E May 26, 2023
5dd0fec
Merge branch 'main' of /~https://github.com/dotnet/runtime into improve…
MineCake147E May 27, 2023
3e5be81
Update src/libraries/System.Private.CoreLib/src/System/Half.cs
MineCake147E Jul 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 48 additions & 48 deletions src/libraries/System.Private.CoreLib/src/System/Half.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;

namespace System
{
Expand Down Expand Up @@ -607,30 +608,33 @@ public static explicit operator Half(double value)
/// <returns><paramref name="value" /> converted to its nearest representable half-precision floating-point value.</returns>
public static explicit operator Half(float value)
{
const int SingleMaxExponent = 0xFF;

uint floatInt = BitConverter.SingleToUInt32Bits(value);
bool sign = (floatInt & float.SignMask) >> float.SignShift != 0;
int exp = (int)(floatInt & float.BiasedExponentMask) >> float.BiasedExponentShift;
uint sig = floatInt & float.TrailingSignificandMask;

if (exp == SingleMaxExponent)
{
if (sig != 0) // NaN
{
return CreateHalfNaN(sign, (ulong)sig << 41); // Shift the significand bits to the left end
}
return sign ? NegativeInfinity : PositiveInfinity;
}

uint sigHalf = sig >> 9 | ((sig & 0x1FFU) != 0 ? 1U : 0U); // RightShiftJam

if ((exp | (int)sigHalf) == 0)
{
return new Half(sign, 0, 0);
}

return new Half(RoundPackToHalf(sign, (short)(exp - 0x71), (ushort)(sigHalf | 0x4000)));
Vector128<uint> v0 = Vector128.CreateScalarUnsafe(0x3880_0000u); //Minimum exponent for rounding
Vector128<uint> v1 = Vector128.CreateScalarUnsafe(0x3800_0000u); //Exponent displacement #1
Vector128<uint> v3 = Vector128.CreateScalarUnsafe(0x7f80_0000u); //Exponent mask
Vector128<uint> v4 = Vector128.CreateScalarUnsafe(0x0680_0000u); //Exponent displacement #2
Vector128<float> v5 = Vector128.CreateScalarUnsafe(65520.0f); //Maximum value that is not Infinity in Half
MineCake147E marked this conversation as resolved.
Show resolved Hide resolved
uint v = BitConverter.SingleToUInt32Bits(value);
Vector128<float> vval = Vector128.CreateScalarUnsafe(value);
vval = Vector128.Abs(vval); //Clear sign bit
uint s = v & 0x8000_0000u; //Extract sign bit
vval = Vector128.Min(v5, vval); //Rectify values that are Infinity in Half
Vector128<uint> w = Vector128.Equals(vval, vval).AsUInt32(); //Detecting NaN(a != a if a is NaN)
Vector128<uint> y = Vector128.Max(v0, vval.AsUInt32()); //Rectify lower exponent
y &= v3; //Extract exponent
y += v4; //Add exponent by 13
Vector128<uint> z = y - v1; //Subtract exponent from y by 112
z &= w; //Zero whole z if value is NaN
vval += y.AsSingle(); //Round Single into Half's precision(NaN also gets modified here, just setting the MSB of fraction)
danmoseley marked this conversation as resolved.
Show resolved Hide resolved
vval = (vval.AsUInt32() - v1).AsSingle(); //Subtract exponent by 112
vval -= z.AsSingle(); //Clear Extra leading 1 set in rounding
v = vval.AsUInt32().GetElement(0) >> 13; //Now internal representation is the absolute value represented in Half, shifted 13 bits left, with some exceptions like NaN having strange exponents
s >>>= 16; //Match the position of sign bit
uint hc = ~w.GetElement(0) & 0x7C00u; //Only exponent bits will be modified if NaN
v &= 0x7fffu; //Clear the upper unnecessary bits
uint gc = hc | s; //Merge sign bit with possible NaN exponent
v &= ~hc; //Clear exponents if value is NaN
v |= gc; //Merge sign bit and possible NaN exponent
return BitConverter.UInt16BitsToHalf((ushort)v); //The final result
}

/// <summary>Explicitly converts a <see cref="ushort" /> value to its nearest representable half-precision floating-point value.</summary>
Expand Down Expand Up @@ -874,32 +878,28 @@ public static explicit operator double(Half value)
/// <summary>Explicitly converts a half-precision floating-point value to its nearest representable <see cref="float" /> value.</summary>
/// <param name="value">The value to convert.</param>
/// <returns><paramref name="value" /> converted to its nearest representable <see cref="float" /> value.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
MineCake147E marked this conversation as resolved.
Show resolved Hide resolved
public static explicit operator float(Half value)
{
bool sign = IsNegative(value);
int exp = value.BiasedExponent;
uint sig = value.TrailingSignificand;

if (exp == MaxBiasedExponent)
{
if (sig != 0)
{
return CreateSingleNaN(sign, (ulong)sig << 54);
}
return sign ? float.NegativeInfinity : float.PositiveInfinity;
}

if (exp == 0)
{
if (sig == 0)
{
return BitConverter.UInt32BitsToSingle(sign ? float.SignMask : 0); // Positive / Negative zero
}
(exp, sig) = NormSubnormalF16Sig(sig);
exp -= 1;
}

return CreateSingle(sign, (byte)(exp + 0x70), sig << 13);
const uint ExponentLowerBound = 0x3880_0000u; //The smallest positive normal number in Half, converted to Single
const uint ExponentOffset = 0x3800_0000u; //BitConverter.SingleToUInt32Bits(1.0f) - ((uint)BitConverter.HalfToUInt16Bits((Half)1.0f) << 13)
const uint FloatSignMask = 0x8000_0000u; //Mask for sign bit in Single
short h = BitConverter.HalfToInt16Bits(value); //Extract the internal representation of value
uint v = (uint)(int)h; //Copy sign bit to upper bits
uint e = v & 0x7c00u; //Extract exponent bits of value
bool c = e == 0u; //true when value is subnormal
uint hc = (uint)-Unsafe.As<bool, byte>(ref c); //~0u when c is true, 0 otherwise
bool b = e == 0x7c00u; //true when value is either Infinity or NaN
uint hb = (uint)-Unsafe.As<bool, byte>(ref b); //~0u when b is true, 0 otherwise
uint n = hc & ExponentLowerBound; //n is 0x3880_0000u if c is true, 0 otherwise
uint j = ExponentOffset | n; //j is now 0x3880_0000u if value is subnormal, 0x3800_0000u otherwise
v <<= 13; //Match the position of the boundary of exponent bits and fraction bits with IEEE 754 Binary32(Single)
j += j & hb; //Double the j if value is either Infinity or NaN
uint s = v & FloatSignMask; //Extract sign bit of value
v &= 0x0FFF_E000; //Extract exponent bits and fraction bits of value
v += j; //Adjust exponent to match the range of exponent
uint k = BitConverter.SingleToUInt32Bits(BitConverter.UInt32BitsToSingle(v) - BitConverter.UInt32BitsToSingle(n)); //If value is subnormal, remove unnecessary 1 on top of fraction bits.
return BitConverter.UInt32BitsToSingle(k | s); //Merge sign bit with rest
}

// IEEE 754 specifies NaNs to be propagated
Expand Down