dotnet · adamsitnik · Jul 7, 2023 · Feb 4, 2023 · Feb 5, 2023 · Feb 6, 2023
diff --git a/src/libraries/System.Private.CoreLib/src/System/Half.cs b/src/libraries/System.Private.CoreLib/src/System/Half.cs
@@ -8,6 +8,7 @@
 using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
 
 namespace System
 {
@@ -614,30 +615,76 @@ public static explicit operator Half(double value)
         /// <returns><paramref name="value" /> converted to its nearest representable half-precision floating-point value.</returns>
         public static explicit operator Half(float value)
         {
-            const int SingleMaxExponent = 0xFF;
-
-            uint floatInt = BitConverter.SingleToUInt32Bits(value);
-            bool sign = (floatInt & float.SignMask) >> float.SignShift != 0;
-            int exp = (int)(floatInt & float.BiasedExponentMask) >> float.BiasedExponentShift;
-            uint sig = floatInt & float.TrailingSignificandMask;
-
-            if (exp == SingleMaxExponent)
-            {
-                if (sig != 0) // NaN
-                {
-                    return CreateHalfNaN(sign, (ulong)sig << 41); // Shift the significand bits to the left end
-                }
-                return sign ? NegativeInfinity : PositiveInfinity;
-            }
-
-            uint sigHalf = sig >> 9 | ((sig & 0x1FFU) != 0 ? 1U : 0U); // RightShiftJam
-
-            if ((exp | (int)sigHalf) == 0)
-            {
-                return new Half(sign, 0, 0);
-            }
-
-            return new Half(RoundPackToHalf(sign, (short)(exp - 0x71), (ushort)(sigHalf | 0x4000)));
+            // TODO: Detailed explanation of this branchless conversion algorithm here
+            #region Explanation of this algorithm
+            // This algorithm converts a single-precision floating-point number to a half-precision floating-point number by multiplying it as a floating-point number and rearranging the bit sequence.
+            // However, it introduces some tricks to implement rounding correctly, to avoid multiplying denormalized numbers and to deal with exceptions such as infinity and NaN without using branch instructions.
+            //
+            // The bit sequence of a half-precision floating-point number is as follows
+            // seee_eeff_ffff_ffff
+            // The bit sequence of a single-precision floating-point number is as follows
+            // seee_eeee_efff_ffff_ffff_ffff_ffff_ffff
+            // In both cases, "_" is the hexadecimal separator, "s" is the sign, "e" is the exponent part, and "f" is the mantissa part.
+            // In half-precision, the exponent part is 5 bits and the mantissa part is 10 bits. In single precision, the exponent is 8 bits and the mantissa is 23 bits.
+            // Both formats use an offset binary representation for the exponent part: the exponent part for 1.0 is half of the maximum value for either precision, i.e., 127 for single-precision and 15 for half-precision.
+            // The mantissa part is normalized when the exponent part is nonzero, since in binary numbers, 1 appears as the most significant digit for any nonzero number.
+            //
+            //
+            #endregion
+            // Minimum exponent for rounding
+            const uint MinExp = 0x3880_0000u;
+            // Exponent displacement #1
+            const uint Exponent112 = 0x3800_0000u;
+            // Exponent mask
+            const uint SingleBiasedExponentMask = float.BiasedExponentMask;
+            // Exponent displacement #2
+            const uint Exponent13 = 0x0680_0000u;
+            // Maximum value that is not Infinity in Half
+            const float MaxHalfValueBelowInfinity = 65520.0f;
+            uint bitValue = BitConverter.SingleToUInt32Bits(value);
+            // Extract sign bit
+            uint sign = bitValue & float.SignMask;
+            // Clear sign bit
+            value = float.Abs(value);
+            // Rectify values that are Infinity in Half. (float.Min now emits vminps instruction if one of two arguments is a constant)
+
+            value = float.Min(MaxHalfValueBelowInfinity, value);
+            bitValue = BitConverter.SingleToUInt32Bits(value);
+            // Detecting NaN (~0u if a is not NaN)
+            uint realMask = (uint)(Unsafe.BitCast<bool, sbyte>(float.IsNaN(value)) - 1);
+            uint underflowMask = (uint)-Unsafe.BitCast<bool, byte>(MinExp > bitValue);
+            // Rectify lower exponent
+            uint exponentOffset0 = (MinExp & underflowMask) | (~underflowMask & bitValue);
+            // Extract exponent
+            exponentOffset0 &= SingleBiasedExponentMask;
+            // Add exponent by 13
+            exponentOffset0 += Exponent13;
+            // Subtract exponent from exponentOffset0 by 112
+            uint exponentOffset1 = exponentOffset0 - Exponent112;
+            // Zero whole exponentOffset1 if value is NaN
+            exponentOffset1 &= realMask;
+            // Round Single into Half's precision (NaN also gets modified here, just setting the MSB of fraction)
+            value += BitConverter.UInt32BitsToSingle(exponentOffset0);
+            // Subtract exponent by 112
+            value = BitConverter.UInt32BitsToSingle(BitConverter.SingleToUInt32Bits(value) - Exponent112);
+            // Clear Extra leading 1 set in rounding
+            value -= BitConverter.UInt32BitsToSingle(exponentOffset1);
+            // Now internal representation is the absolute value represented in Half, shifted 13 bits left, with some exceptions like NaN having strange exponents
+            bitValue = BitConverter.SingleToUInt32Bits(value) >> 13;
+            // Match the position of sign bit
+            sign >>>= 16;
+            // Only exponent bits will be modified if NaN
+            uint maskedHalfExponentForNaN = ~realMask & 0x7C00u;
+            // Clear the upper unnecessary bits
+            bitValue &= 0x7fffu;
+            // Merge sign bit with possible NaN exponent
+            uint signAndMaskedExponent = maskedHalfExponentForNaN | sign;
+            // Clear exponents if value is NaN
+            bitValue &= ~maskedHalfExponentForNaN;
+            // Merge sign bit and possible NaN exponent
+            bitValue |= signAndMaskedExponent;
+            // The final result
+            return BitConverter.UInt16BitsToHalf((ushort)bitValue);
         }
 
         /// <summary>Explicitly converts a <see cref="ushort" /> value to its nearest representable half-precision floating-point value.</summary>
@@ -881,32 +928,77 @@ public static explicit operator double(Half value)
         /// <summary>Explicitly converts a half-precision floating-point value to its nearest representable <see cref="float" /> value.</summary>
         /// <param name="value">The value to convert.</param>
         /// <returns><paramref name="value" /> converted to its nearest representable <see cref="float" /> value.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static explicit operator float(Half value)
         {
-            bool sign = IsNegative(value);
-            int exp = value.BiasedExponent;
-            uint sig = value.TrailingSignificand;
-
-            if (exp == MaxBiasedExponent)
-            {
-                if (sig != 0)
-                {
-                    return CreateSingleNaN(sign, (ulong)sig << 54);
-                }
-                return sign ? float.NegativeInfinity : float.PositiveInfinity;
-            }
-
-            if (exp == 0)
-            {
-                if (sig == 0)
-                {
-                    return BitConverter.UInt32BitsToSingle(sign ? float.SignMask : 0); // Positive / Negative zero
-                }
-                (exp, sig) = NormSubnormalF16Sig(sig);
-                exp -= 1;
-            }
-
-            return CreateSingle(sign, (byte)(exp + 0x70), sig << 13);
+            #region Explanation of this algorithm
+            // This algorithm converts a half-precision floating-point number to a single-precision floating-point number by rearranging the bit sequence and multiplying it as a floating-point number.
+            // However, it introduces some tricks to avoid multiplying denormalized numbers and to deal with exceptions such as infinity and NaN without using branch instructions.
+            //
+            // The bit sequence of a half-precision floating-point number is as follows
+            // seee_eeff_ffff_ffff
+            // The bit sequence of a single-precision floating-point number is as follows
+            // seee_eeee_efff_ffff_ffff_ffff_ffff_ffff
+            // In both cases, "_" is the hexadecimal separator, "s" is the sign, "e" is the exponent part, and "f" is the mantissa part.
+            // In half-precision, the exponent part is 5 bits and the mantissa part is 10 bits. In single precision, the exponent is 8 bits and the mantissa is 23 bits.
+            // Both formats use an offset binary representation for the exponent part: the exponent part for 1.0 is half of the maximum value for either precision, i.e., 127 for single-precision and 15 for half-precision.
+            // The mantissa part is normalized when the exponent part is nonzero, since in binary numbers, 1 appears as the most significant digit for any nonzero number.
+            //
+            // This conversion algorithm takes advantage of the similarity between the two formats.
+            // By isolating the sign part from the half-precision bitstring and shifting it 13 bits to the left, the boundary between the exponent and mantissa parts matches with that of single-precision.
+            // In other words,
+            //    0eeeeeffffffffff              is rearranged to
+            // 0000eeeeeffffffffff0000000000000
+            // which matches the boundary between the exponent and mantissa parts of single-precision floating-point number:
+            // seeeeeeeefffffffffffffffffffffff
+            //
+            // After rearrangement, this bit sequence is multiplied by the constant 5.192297E+33f in the floating-point number multiplication unit.
+            // However, most hardware cannot efficiently handle the multiplication of denormalized numbers.
+            // Denormalized numbers are more common in half-precision than in single-precision, so they cannot be ignored.
+            //
+            // First, if the value is a denormalized number, the constant 0x3880_0000u is added beforehand in the integer addition unit to make it behave as a normalized number.
+            // For Infinity or NaN, the constant 0x7000_0000u is added beforehand in the integer adder.
+            // These numbers are then converted to single-precision floating-point numbers as per the IEEE754 specification by the following operations.
+            // Next, regardless of whether the value is a denormalized number or not, add the constant 0x3800_0000u to this bit string in the integer addition unit. The constant is chosen to add 112 to the exponent part; 112 is 127 subtracted by 15.
+            // Then, if the value is a denormalized number, the constant 6.1035156E-05f is subtracted in the floating-point number subtraction unit.
+            // The above operation produces the same result as if the rearranged bit sequence were multiplied by the constant 5.192297E+33f.
+            // Finally, merging the isolated sign bits completes the conversion.
+            #endregion
+
+            // The smallest positive normal number in Half, converted to Single
+            const uint ExponentLowerBound = 0x3880_0000u;
+            // BitConverter.SingleToUInt32Bits(1.0f) - ((uint)BitConverter.HalfToUInt16Bits((Half)1.0f) << 13)
+            const uint ExponentOffset = 0x3800_0000u;
+            // Mask for sign bit in Single
+            const uint FloatSignMask = float.SignMask;
+            // Extract the internal representation of value
+            short valueInInt16Bits = BitConverter.HalfToInt16Bits(value);
+            // Copy sign bit to upper bits
+            uint bitValueInProcess = (uint)(int)valueInInt16Bits;
+            // Extract exponent bits of value (BiasedExponent is not for here as it performs unnecessary shift)
+            uint offsetExponent = bitValueInProcess & 0x7c00u;
+            // ~0u when value is subnormal, 0 otherwise
+            uint subnormalMask = (uint)-Unsafe.BitCast<bool, byte>(offsetExponent == 0u);
+            // ~0u when value is either Infinity or NaN, 0 otherwise
+            int infinityOrNaNMask = Unsafe.BitCast<bool, byte>(offsetExponent == 0x7c00u);
+            // 0x3880_0000u if value is subnormal, 0 otherwise
+            uint maskedExponentLowerBound = subnormalMask & ExponentLowerBound;
+            // 0x3880_0000u if value is subnormal, 0x3800_0000u otherwise
+            uint offsetMaskedExponentLowerBound = ExponentOffset | maskedExponentLowerBound;
+            // Match the position of the boundary of exponent bits and fraction bits with IEEE 754 Binary32(Single)
+            bitValueInProcess <<= 13;
+            // Double the offsetMaskedExponentLowerBound if value is either Infinity or NaN
+            offsetMaskedExponentLowerBound <<= infinityOrNaNMask;
+            // Extract sign bit of value
+            uint sign = bitValueInProcess & FloatSignMask;
+            // Extract exponent bits and fraction bits of value
+            bitValueInProcess &= 0x0FFF_E000;
+            // Adjust exponent to match the range of exponent
+            bitValueInProcess += offsetMaskedExponentLowerBound;
+            // If value is subnormal, remove unnecessary 1 on top of fraction bits.
+            uint absoluteValue = BitConverter.SingleToUInt32Bits(BitConverter.UInt32BitsToSingle(bitValueInProcess) - BitConverter.UInt32BitsToSingle(maskedExponentLowerBound));
+            // Merge sign bit with rest
+            return BitConverter.UInt32BitsToSingle(absoluteValue | sign);
         }
 
         // IEEE 754 specifies NaNs to be propagated