-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Format/Parse binary from/to BigInteger #85392
Changes from 10 commits
3c127ea
64a0ead
4216498
1bcf7f8
63923da
19c701d
56e701f
817c58c
24d88c7
5a90e15
3987f71
8b58eb7
6cea91a
3007b48
cd0a03d
f22d2e9
17434aa
bdfddf5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -440,11 +440,11 @@ private static unsafe bool ParseNumber(ref char* str, char* strEnd, NumberStyles | |
int digEnd = 0; | ||
while (true) | ||
{ | ||
if (char.IsAsciiDigit(ch) || (((options & NumberStyles.AllowHexSpecifier) != 0) && char.IsBetween((char)(ch | 0x20), 'a', 'f'))) | ||
if ((options & NumberStyles.AllowBinarySpecifier) == 0 ? char.IsAsciiDigit(ch) || ((options & NumberStyles.AllowHexSpecifier) != 0 && char.IsBetween((char)(ch | 0x20), 'a', 'f')) : char.IsBetween(ch, '0', '1')) | ||
{ | ||
state |= StateDigits; | ||
|
||
if (ch != '0' || (state & StateNonZero) != 0 || (bigNumber && ((options & NumberStyles.AllowHexSpecifier) != 0))) | ||
if (ch != '0' || (state & StateNonZero) != 0 || (bigNumber && (options & (NumberStyles.AllowHexSpecifier | NumberStyles.AllowBinarySpecifier)) != 0)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we hoist this Perhaps insert around line 440... const bool allowBinary = (options & NumberStyles.AllowBinarySpecifier) != 0 ;
const bool allowHex = (options & NumberStyles.AllowHexSpecifier) != 0 ;
const bool allowLeadingZero = allowBinary || allowHex; Then inside the loop, at line 443 looks like if (allowBinary
? (char == '0' || char == '1)
: char.IsAsciiDigit(ch) || (allowHex && char.IsBetween((char)(ch | 0x20), 'a', 'f'))) and line 447 becomes if (ch != '0' || (state & StateNonZero) != 0 || (bigNumber && allowLeadingZero)) The same thing could be done for options tested elsewhere inside the loop if performance testing makes that wise... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am also not sure what's going on with the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. love all the new tests :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Got your point. As you found all the existing options tests code were inside whole loop, and during run time some option tests may happen only when data is met, so not sure whether we should hoist 'options' test which will turn to be called once but always happen. That is why I did not change this convention :)
I am also not sure, so that I just added Binary stuff behind it like Hex :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Splitting it apart for readability is generally helpful. The logic is getting quite verbose to have on a single line IMO. For the primitive types, we have it split apart into some early checks that go to The check for "IsValidChar" can then be specialized by use of the generics and made a lot cheaper. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tannergooding Fixed. After introducing Hex/Bin parser types, here simplifies not only prev line 447, but also prev line 443 which involved hex/binary checks previously. Please check, thanks. |
||
{ | ||
if (digCount < maxParseDigits) | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -285,7 +285,8 @@ internal static class BigNumber | |
| NumberStyles.AllowLeadingSign | NumberStyles.AllowTrailingSign | ||
| NumberStyles.AllowParentheses | NumberStyles.AllowDecimalPoint | ||
| NumberStyles.AllowThousands | NumberStyles.AllowExponent | ||
| NumberStyles.AllowCurrencySymbol | NumberStyles.AllowHexSpecifier); | ||
| NumberStyles.AllowCurrencySymbol | NumberStyles.AllowHexSpecifier | ||
| NumberStyles.AllowBinarySpecifier); | ||
|
||
private static ReadOnlySpan<uint> UInt32PowersOfTen => new uint[] { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 }; | ||
|
||
|
@@ -371,10 +372,13 @@ internal static ParsingStatus TryParseBigInteger(ReadOnlySpan<char> value, Numbe | |
{ | ||
return HexNumberToBigInteger(ref bigNumber, out result); | ||
} | ||
else | ||
|
||
if ((style & NumberStyles.AllowBinarySpecifier) != 0) | ||
{ | ||
return NumberToBigInteger(ref bigNumber, out result); | ||
return BinNumberToBigInteger(ref bigNumber, out result); | ||
} | ||
|
||
return NumberToBigInteger(ref bigNumber, out result); | ||
} | ||
|
||
internal static BigInteger ParseBigInteger(string value, NumberStyles style, NumberFormatInfo info) | ||
|
@@ -511,6 +515,96 @@ private static ParsingStatus HexNumberToBigInteger(ref BigNumberBuffer number, o | |
} | ||
} | ||
|
||
private static ParsingStatus BinNumberToBigInteger(ref BigNumberBuffer number, out BigInteger result) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: can we expand "Bin" to "Binary" in the name? I realize this is likely an attempt to match the conciseness of "Hex", but "Bin" and "Big" are so close that this keeps making me do a double-take to know which it was. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Renamed method names, also for other 'Bin' related ones. |
||
{ | ||
if (number.digits is null || number.digits.Length < 2) | ||
{ | ||
result = default; | ||
return ParsingStatus.Failed; | ||
} | ||
|
||
const int DigitsPerBlock = 32; | ||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
int totalDigitCount = number.digits.Length - 1; // Ignore trailing '\0' | ||
|
||
int blockCount = Math.DivRem(totalDigitCount, DigitsPerBlock, out int remainingDigitsInBlock); | ||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (remainingDigitsInBlock == 0) | ||
{ | ||
remainingDigitsInBlock = DigitsPerBlock; | ||
} | ||
else | ||
{ | ||
blockCount++; | ||
} | ||
|
||
Debug.Assert(number.digits[0] is '0' or '1'); | ||
bool isNegative = number.digits[0] == '1'; | ||
|
||
uint[]? arrayFromPool = null; | ||
Span<uint> bufferSpan = blockCount <= BigIntegerCalculator.StackAllocThreshold ? | ||
stackalloc uint[blockCount] : (arrayFromPool = ArrayPool<uint>.Shared.Rent(blockCount)).AsSpan(0, blockCount); | ||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
try | ||
{ | ||
uint currentBlock = isNegative ? 0xFF_FF_FF_FF : 0x0; | ||
int bufferPos = blockCount; | ||
foreach (ReadOnlyMemory<char> chunkMem in number.digits.GetChunks()) | ||
tannergooding marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
ReadOnlySpan<char> chunk = chunkMem.Span; | ||
foreach (char c in chunk) | ||
{ | ||
if (c == '\0') | ||
{ | ||
break; | ||
} | ||
|
||
Debug.Assert(c is '0' or '1'); | ||
currentBlock = (currentBlock << 1) | (uint)(c - '0'); | ||
|
||
if (--remainingDigitsInBlock == 0) | ||
{ | ||
bufferSpan[--bufferPos] = currentBlock; | ||
remainingDigitsInBlock = DigitsPerBlock; | ||
|
||
// we do not need to reset currentBlock now, because it should always set all its bits by left shift in subsequent iterations | ||
} | ||
} | ||
|
||
Debug.Assert(bufferPos > 0 || remainingDigitsInBlock == DigitsPerBlock); | ||
} | ||
|
||
Debug.Assert(bufferPos == 0 && remainingDigitsInBlock == DigitsPerBlock); | ||
|
||
if (isNegative) | ||
{ | ||
NumericsHelpers.DangerousMakeTwosComplement(bufferSpan); | ||
} | ||
|
||
bufferSpan = bufferSpan.TrimEnd(0u); | ||
if (bufferSpan.IsEmpty) | ||
{ | ||
result = BigInteger.Zero; | ||
} | ||
else if (bufferSpan.Length == 1 && bufferSpan[0] <= int.MaxValue) | ||
{ | ||
result = new BigInteger((int)(isNegative ? -bufferSpan[0] : bufferSpan[0]), (uint[]?)null); | ||
} | ||
else | ||
{ | ||
result = new BigInteger(isNegative ? -1 : 1, bufferSpan.ToArray()); | ||
} | ||
|
||
return ParsingStatus.OK; | ||
} | ||
finally | ||
{ | ||
if (arrayFromPool is not null) | ||
{ | ||
ArrayPool<uint>.Shared.Return(arrayFromPool); | ||
} | ||
} | ||
} | ||
|
||
// | ||
// This threshold is for choosing the algorithm to use based on the number of digits. | ||
// | ||
|
@@ -1002,66 +1096,93 @@ internal static char ParseFormatSpecifier(ReadOnlySpan<char> format, out int dig | |
} | ||
} | ||
|
||
private static string? FormatBigIntegerToBin(BigInteger value, int digits) | ||
private static string? FormatBigIntegerToBin(bool targetSpan, BigInteger value, int digits, Span<char> destination, out int charsWritten, out bool spanSuccess) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unlike existing
Please give advice if not proper, thanks ! |
||
{ | ||
// Get the bytes that make up the BigInteger. | ||
byte[]? arrayToReturnToPool = null; | ||
Span<byte> bits = stackalloc byte[64]; // arbitrary threshold | ||
if (!value.TryWriteOrCountBytes(bits, out int bytesWrittenOrNeeded)) | ||
Span<byte> bytes = stackalloc byte[64]; // arbitrary threshold | ||
stephentoub marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (!value.TryWriteOrCountBytes(bytes, out int bytesWrittenOrNeeded)) | ||
{ | ||
bits = arrayToReturnToPool = ArrayPool<byte>.Shared.Rent(bytesWrittenOrNeeded); | ||
bool success = value.TryWriteBytes(bits, out bytesWrittenOrNeeded); | ||
bytes = arrayToReturnToPool = ArrayPool<byte>.Shared.Rent(bytesWrittenOrNeeded); | ||
bool success = value.TryWriteBytes(bytes, out _); | ||
Debug.Assert(success); | ||
stephentoub marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
bits = bits.Slice(0, bytesWrittenOrNeeded); | ||
bytes = bytes.Slice(0, bytesWrittenOrNeeded); | ||
|
||
Debug.Assert(!bits.IsEmpty); | ||
Debug.Assert(!bytes.IsEmpty); | ||
|
||
byte highByte = bits[^1]; | ||
byte highByte = bytes[^1]; | ||
|
||
long tmpCharCount = highByte == 0 ? 1 : 8 - byte.LeadingZeroCount(highByte); | ||
tmpCharCount += (long)(bits.Length - 1) << 3; | ||
int charsInHighByte = 9 - byte.LeadingZeroCount(value._sign >= 0 ? highByte : (byte)~highByte); | ||
long tmpCharCount = charsInHighByte + ((long)(bytes.Length - 1) << 3); | ||
|
||
if (tmpCharCount > Array.MaxLength) | ||
{ | ||
Debug.Assert(arrayToReturnToPool is not null); | ||
ArrayPool<byte>.Shared.Return(arrayToReturnToPool); | ||
|
||
throw GetException(ParsingStatus.Overflow); | ||
throw new FormatException(SR.Format_TooLarge); | ||
} | ||
|
||
int charsForBits = (int)tmpCharCount; | ||
|
||
Debug.Assert(digits < Array.MaxLength); | ||
int charsIncludeDigits = digits > charsForBits ? digits : charsForBits; | ||
|
||
// each byte is typically eight chars | ||
ValueStringBuilder sb = charsIncludeDigits > 512 ? new ValueStringBuilder(charsIncludeDigits) : new ValueStringBuilder(stackalloc char[charsIncludeDigits]); | ||
int charsIncludeDigits = Math.Max(digits, charsForBits); | ||
|
||
if (digits > charsForBits) | ||
try | ||
{ | ||
sb.Append(value._sign >= 0 ? '0' : '1', digits - charsForBits); | ||
} | ||
scoped ValueStringBuilder sb; | ||
if (targetSpan) | ||
{ | ||
if (charsIncludeDigits > destination.Length) | ||
{ | ||
charsWritten = 0; | ||
spanSuccess = false; | ||
return null; | ||
} | ||
|
||
if (highByte == 0) | ||
{ | ||
sb.Append('0'); | ||
} | ||
else | ||
{ | ||
AppendByte(ref sb, highByte, 7 - byte.LeadingZeroCount(highByte)); | ||
} | ||
// Because we have ensured destination can take actual char length, so now just use ValueStringBuilder as wrapper so that subsequent logic can be reused by 2 flows (targetSpan and non-targetSpan); | ||
// meanwhile there is no need to copy to destination again after format data for targetSpan flow. | ||
sb = new ValueStringBuilder(destination); | ||
} | ||
else | ||
{ | ||
// each byte is typically eight chars | ||
sb = charsIncludeDigits > 512 ? new ValueStringBuilder(charsIncludeDigits) : new ValueStringBuilder(stackalloc char[charsIncludeDigits]); | ||
} | ||
|
||
for (int i = bits.Length - 2; i >= 0; i--) | ||
{ | ||
AppendByte(ref sb, bits[i]); | ||
} | ||
if (digits > charsForBits) | ||
{ | ||
sb.Append(value._sign >= 0 ? '0' : '1', digits - charsForBits); | ||
} | ||
|
||
AppendByte(ref sb, highByte, charsInHighByte - 1); | ||
|
||
for (int i = bytes.Length - 2; i >= 0; i--) | ||
{ | ||
AppendByte(ref sb, bytes[i]); | ||
} | ||
|
||
Debug.Assert(sb.Length == charsIncludeDigits); | ||
|
||
if (targetSpan) | ||
{ | ||
charsWritten = charsIncludeDigits; | ||
spanSuccess = true; | ||
return null; | ||
} | ||
|
||
if (arrayToReturnToPool is not null) | ||
charsWritten = 0; | ||
spanSuccess = false; | ||
return sb.ToString(); | ||
} | ||
finally | ||
{ | ||
ArrayPool<byte>.Shared.Return(arrayToReturnToPool); | ||
if (arrayToReturnToPool is not null) | ||
{ | ||
ArrayPool<byte>.Shared.Return(arrayToReturnToPool); | ||
} | ||
} | ||
return sb.ToString(); | ||
|
||
static void AppendByte(ref ValueStringBuilder sb, byte b, int startHighBit = 7) | ||
{ | ||
|
@@ -1096,7 +1217,10 @@ internal static bool TryFormatBigInteger(BigInteger value, ReadOnlySpan<char> fo | |
{ | ||
return FormatBigIntegerToHex(targetSpan, value, fmt, digits, info, destination, out charsWritten, out spanSuccess); | ||
} | ||
|
||
if (fmt == 'b' || fmt == 'B') | ||
{ | ||
return FormatBigIntegerToBin(targetSpan, value, digits, destination, out charsWritten, out spanSuccess); | ||
} | ||
|
||
if (value._bits == null) | ||
{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would that last bit of the ternary be better as
: (char == '0' || char == '1)
instead of thechar.IsBetween(ch, '0', '1')
? Seems we're really driving toward optimal performance :)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I also struggled on these 2, but selected to use char.IsBetween() based on its implementation because it can always just use one check but need 2 arithmetic operation..
Np, changed to
(char == '0' || char == '1)
now, thanks.