Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimized conversions between Half and Single. #81632

Merged
merged 19 commits into from
Jul 7, 2023
Merged
Changes from 13 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
0190f96
Optimized conversions between `Half` and `Single`.
MineCake147E Feb 4, 2023
f36ab2b
Updated `explicit operator Half(float value)`.
MineCake147E Feb 5, 2023
b3b8eee
Removed `[MethodImpl(MethodImplOptions.AggressiveInlining)]` from `ex…
MineCake147E Feb 6, 2023
d26f6d9
Coding convention compliance
MineCake147E Mar 31, 2023
90abc81
Revert "Coding convention compliance"
MineCake147E Mar 31, 2023
a99b0a9
Coding convention compliance #1 redo
MineCake147E Mar 31, 2023
5535e6f
Merge branch 'main' of /~https://github.com/dotnet/runtime into improve…
MineCake147E Mar 31, 2023
c01f2f7
Merge branch 'main' of /~https://github.com/dotnet/runtime into improve…
MineCake147E May 16, 2023
a5142f3
Merge branch 'main' of /~https://github.com/dotnet/runtime into improve…
MineCake147E May 16, 2023
210815f
* Names of variables and constants got slightly more descriptive
MineCake147E May 16, 2023
394f434
Hopefully fixed bugs
MineCake147E May 16, 2023
ddd6880
Added explanation of `explicit operator float`
MineCake147E May 19, 2023
dc30370
Removed error causing whitespaces at the end of lines
MineCake147E May 20, 2023
d874357
+ Added explanation of `explicit operator Half(float value)`
MineCake147E May 24, 2023
7e2fa18
Merge branch 'main' of /~https://github.com/dotnet/runtime into improve…
MineCake147E May 24, 2023
05f7100
Fixed misinformation in comments
MineCake147E May 26, 2023
858499c
Optimized `Single`->`Half` conversion with subnormal result
MineCake147E May 26, 2023
5dd0fec
Merge branch 'main' of /~https://github.com/dotnet/runtime into improve…
MineCake147E May 27, 2023
3e5be81
Update src/libraries/System.Private.CoreLib/src/System/Half.cs
MineCake147E Jul 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 140 additions & 48 deletions src/libraries/System.Private.CoreLib/src/System/Half.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;

namespace System
{
Expand Down Expand Up @@ -614,30 +615,76 @@ public static explicit operator Half(double value)
/// <returns><paramref name="value" /> converted to its nearest representable half-precision floating-point value.</returns>
public static explicit operator Half(float value)
{
const int SingleMaxExponent = 0xFF;

uint floatInt = BitConverter.SingleToUInt32Bits(value);
bool sign = (floatInt & float.SignMask) >> float.SignShift != 0;
int exp = (int)(floatInt & float.BiasedExponentMask) >> float.BiasedExponentShift;
uint sig = floatInt & float.TrailingSignificandMask;

if (exp == SingleMaxExponent)
{
if (sig != 0) // NaN
{
return CreateHalfNaN(sign, (ulong)sig << 41); // Shift the significand bits to the left end
}
return sign ? NegativeInfinity : PositiveInfinity;
}

uint sigHalf = sig >> 9 | ((sig & 0x1FFU) != 0 ? 1U : 0U); // RightShiftJam

if ((exp | (int)sigHalf) == 0)
{
return new Half(sign, 0, 0);
}

return new Half(RoundPackToHalf(sign, (short)(exp - 0x71), (ushort)(sigHalf | 0x4000)));
// TODO: Detailed explanation of this branchless conversion algorithm here
MineCake147E marked this conversation as resolved.
Show resolved Hide resolved
#region Explanation of this algorithm
// This algorithm converts a single-precision floating-point number to a half-precision floating-point number by multiplying it as a floating-point number and rearranging the bit sequence.
// However, it introduces some tricks to implement rounding correctly, to avoid multiplying denormalized numbers and to deal with exceptions such as infinity and NaN without using branch instructions.
//
// The bit sequence of a half-precision floating-point number is as follows
// seee_eeff_ffff_ffff
// The bit sequence of a single-precision floating-point number is as follows
// seee_eeee_efff_ffff_ffff_ffff_ffff_ffff
// In both cases, "_" is the hexadecimal separator, "s" is the sign, "e" is the exponent part, and "f" is the mantissa part.
// In half-precision, the exponent part is 5 bits and the mantissa part is 10 bits. In single precision, the exponent is 8 bits and the mantissa is 23 bits.
// Both formats use an offset binary representation for the exponent part: the exponent part for 1.0 is half of the maximum value for either precision, i.e., 127 for single-precision and 15 for half-precision.
// The mantissa part is normalized when the exponent part is nonzero, since in binary numbers, 1 appears as the most significant digit for any nonzero number.
//
//
#endregion
// Minimum exponent for rounding
const uint MinExp = 0x3880_0000u;
// Exponent displacement #1
const uint Exponent112 = 0x3800_0000u;
// Exponent mask
const uint SingleBiasedExponentMask = float.BiasedExponentMask;
// Exponent displacement #2
const uint Exponent13 = 0x0680_0000u;
// Maximum value that is not Infinity in Half
const float MaxHalfValueBelowInfinity = 65520.0f;
uint bitValue = BitConverter.SingleToUInt32Bits(value);
// Extract sign bit
uint sign = bitValue & float.SignMask;
// Clear sign bit
value = float.Abs(value);
// Rectify values that are Infinity in Half. (float.Min now emits vminps instruction if one of two arguments is a constant)

value = float.Min(MaxHalfValueBelowInfinity, value);
bitValue = BitConverter.SingleToUInt32Bits(value);
// Detecting NaN (~0u if a is not NaN)
uint realMask = (uint)(Unsafe.BitCast<bool, sbyte>(float.IsNaN(value)) - 1);
uint underflowMask = (uint)-Unsafe.BitCast<bool, byte>(MinExp > bitValue);
// Rectify lower exponent
uint exponentOffset0 = (MinExp & underflowMask) | (~underflowMask & bitValue);
// Extract exponent
exponentOffset0 &= SingleBiasedExponentMask;
// Add exponent by 13
exponentOffset0 += Exponent13;
// Subtract exponent from exponentOffset0 by 112
uint exponentOffset1 = exponentOffset0 - Exponent112;
// Zero whole exponentOffset1 if value is NaN
exponentOffset1 &= realMask;
// Round Single into Half's precision (NaN also gets modified here, just setting the MSB of fraction)
value += BitConverter.UInt32BitsToSingle(exponentOffset0);
// Subtract exponent by 112
value = BitConverter.UInt32BitsToSingle(BitConverter.SingleToUInt32Bits(value) - Exponent112);
// Clear Extra leading 1 set in rounding
value -= BitConverter.UInt32BitsToSingle(exponentOffset1);
// Now internal representation is the absolute value represented in Half, shifted 13 bits left, with some exceptions like NaN having strange exponents
bitValue = BitConverter.SingleToUInt32Bits(value) >> 13;
// Match the position of sign bit
sign >>>= 16;
// Only exponent bits will be modified if NaN
uint maskedHalfExponentForNaN = ~realMask & 0x7C00u;
// Clear the upper unnecessary bits
bitValue &= 0x7fffu;
// Merge sign bit with possible NaN exponent
uint signAndMaskedExponent = maskedHalfExponentForNaN | sign;
// Clear exponents if value is NaN
bitValue &= ~maskedHalfExponentForNaN;
// Merge sign bit and possible NaN exponent
bitValue |= signAndMaskedExponent;
// The final result
return BitConverter.UInt16BitsToHalf((ushort)bitValue);
}

/// <summary>Explicitly converts a <see cref="ushort" /> value to its nearest representable half-precision floating-point value.</summary>
Expand Down Expand Up @@ -881,32 +928,77 @@ public static explicit operator double(Half value)
/// <summary>Explicitly converts a half-precision floating-point value to its nearest representable <see cref="float" /> value.</summary>
/// <param name="value">The value to convert.</param>
/// <returns><paramref name="value" /> converted to its nearest representable <see cref="float" /> value.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
MineCake147E marked this conversation as resolved.
Show resolved Hide resolved
public static explicit operator float(Half value)
{
bool sign = IsNegative(value);
int exp = value.BiasedExponent;
uint sig = value.TrailingSignificand;

if (exp == MaxBiasedExponent)
{
if (sig != 0)
{
return CreateSingleNaN(sign, (ulong)sig << 54);
}
return sign ? float.NegativeInfinity : float.PositiveInfinity;
}

if (exp == 0)
{
if (sig == 0)
{
return BitConverter.UInt32BitsToSingle(sign ? float.SignMask : 0); // Positive / Negative zero
}
(exp, sig) = NormSubnormalF16Sig(sig);
exp -= 1;
}

return CreateSingle(sign, (byte)(exp + 0x70), sig << 13);
#region Explanation of this algorithm
// This algorithm converts a half-precision floating-point number to a single-precision floating-point number by rearranging the bit sequence and multiplying it as a floating-point number.
// However, it introduces some tricks to avoid multiplying denormalized numbers and to deal with exceptions such as infinity and NaN without using branch instructions.
//
// The bit sequence of a half-precision floating-point number is as follows
// seee_eeff_ffff_ffff
// The bit sequence of a single-precision floating-point number is as follows
// seee_eeee_efff_ffff_ffff_ffff_ffff_ffff
// In both cases, "_" is the hexadecimal separator, "s" is the sign, "e" is the exponent part, and "f" is the mantissa part.
// In half-precision, the exponent part is 5 bits and the mantissa part is 10 bits. In single precision, the exponent is 8 bits and the mantissa is 23 bits.
// Both formats use an offset binary representation for the exponent part: the exponent part for 1.0 is half of the maximum value for either precision, i.e., 127 for single-precision and 15 for half-precision.
// The mantissa part is normalized when the exponent part is nonzero, since in binary numbers, 1 appears as the most significant digit for any nonzero number.
//
// This conversion algorithm takes advantage of the similarity between the two formats.
// By isolating the sign part from the half-precision bitstring and shifting it 13 bits to the left, the boundary between the exponent and mantissa parts matches with that of single-precision.
// In other words,
// 0eeeeeffffffffff is rearranged to
// 0000eeeeeffffffffff0000000000000
// which matches the boundary between the exponent and mantissa parts of single-precision floating-point number:
// seeeeeeeefffffffffffffffffffffff
//
// After rearrangement, this bit sequence is multiplied by the constant 5.192297E+33f in the floating-point number multiplication unit.
// However, most hardware cannot efficiently handle the multiplication of denormalized numbers.
// Denormalized numbers are more common in half-precision than in single-precision, so they cannot be ignored.
//
// First, if the value is a denormalized number, the constant 0x3880_0000u is added beforehand in the integer addition unit to make it behave as a normalized number.
// For Infinity or NaN, the constant 0x7000_0000u is added beforehand in the integer adder.
// These numbers are then converted to single-precision floating-point numbers as per the IEEE754 specification by the following operations.
// Next, regardless of whether the value is a denormalized number or not, add the constant 0x3800_0000u to this bit string in the integer addition unit. The constant is chosen to add 112 to the exponent part; 112 is 127 subtracted by 15.
// Then, if the value is a denormalized number, the constant 6.1035156E-05f is subtracted in the floating-point number subtraction unit.
// The above operation produces the same result as if the rearranged bit sequence were multiplied by the constant 5.192297E+33f.
// Finally, merging the isolated sign bits completes the conversion.
#endregion

// The smallest positive normal number in Half, converted to Single
const uint ExponentLowerBound = 0x3880_0000u;
// BitConverter.SingleToUInt32Bits(1.0f) - ((uint)BitConverter.HalfToUInt16Bits((Half)1.0f) << 13)
const uint ExponentOffset = 0x3800_0000u;
// Mask for sign bit in Single
const uint FloatSignMask = float.SignMask;
// Extract the internal representation of value
short valueInInt16Bits = BitConverter.HalfToInt16Bits(value);
// Copy sign bit to upper bits
uint bitValueInProcess = (uint)(int)valueInInt16Bits;
// Extract exponent bits of value (BiasedExponent is not for here as it performs unnecessary shift)
uint offsetExponent = bitValueInProcess & 0x7c00u;
// ~0u when value is subnormal, 0 otherwise
uint subnormalMask = (uint)-Unsafe.BitCast<bool, byte>(offsetExponent == 0u);
// ~0u when value is either Infinity or NaN, 0 otherwise
int infinityOrNaNMask = Unsafe.BitCast<bool, byte>(offsetExponent == 0x7c00u);
// 0x3880_0000u if value is subnormal, 0 otherwise
uint maskedExponentLowerBound = subnormalMask & ExponentLowerBound;
MineCake147E marked this conversation as resolved.
Show resolved Hide resolved
// 0x3880_0000u if value is subnormal, 0x3800_0000u otherwise
uint offsetMaskedExponentLowerBound = ExponentOffset | maskedExponentLowerBound;
// Match the position of the boundary of exponent bits and fraction bits with IEEE 754 Binary32(Single)
bitValueInProcess <<= 13;
// Double the offsetMaskedExponentLowerBound if value is either Infinity or NaN
offsetMaskedExponentLowerBound <<= infinityOrNaNMask;
// Extract sign bit of value
uint sign = bitValueInProcess & FloatSignMask;
// Extract exponent bits and fraction bits of value
bitValueInProcess &= 0x0FFF_E000;
// Adjust exponent to match the range of exponent
bitValueInProcess += offsetMaskedExponentLowerBound;
// If value is subnormal, remove unnecessary 1 on top of fraction bits.
uint absoluteValue = BitConverter.SingleToUInt32Bits(BitConverter.UInt32BitsToSingle(bitValueInProcess) - BitConverter.UInt32BitsToSingle(maskedExponentLowerBound));
// Merge sign bit with rest
return BitConverter.UInt32BitsToSingle(absoluteValue | sign);
}

// IEEE 754 specifies NaNs to be propagated
Expand Down