From e16000059e23ffc13bf17919cc054ec3e4a09764 Mon Sep 17 00:00:00 2001 From: slozier Date: Thu, 26 Dec 2024 22:21:49 -0500 Subject: [PATCH] Backport some changes from the 3.6 branch (#1852) * Backport some changes from the 3.6 branch * Fix failing re tests * Backport underscores in integer literals --- Src/IronPython.Modules/_ssl.cs | 38 +- Src/IronPython.Modules/mmap.cs | 13 +- Src/IronPython.Modules/re.cs | 142 ++++--- Src/IronPython/Compiler/Tokenizer.cs | 78 ++-- Src/IronPython/Runtime/FormattingHelper.cs | 62 ++- Src/IronPython/Runtime/LiteralParser.cs | 361 ++++++++---------- .../Runtime/Operations/BigIntegerOps.cs | 83 ++-- Src/IronPython/Runtime/Operations/FloatOps.cs | 7 +- Src/IronPython/Runtime/Operations/IntOps.cs | 120 +++--- Src/IronPython/Runtime/StringFormatSpec.cs | 15 +- Src/IronPython/Runtime/StringFormatter.cs | 10 +- Src/IronPythonTest/Stress/Engine.cs | 2 + Tests/test_strformat.py | 82 +++- 13 files changed, 571 insertions(+), 442 deletions(-) diff --git a/Src/IronPython.Modules/_ssl.cs b/Src/IronPython.Modules/_ssl.cs index 499dc3789..08ade85f6 100644 --- a/Src/IronPython.Modules/_ssl.cs +++ b/Src/IronPython.Modules/_ssl.cs @@ -120,7 +120,8 @@ public class _SSLContext { public _SSLContext(CodeContext context, int protocol) { if (protocol != PROTOCOL_SSLv2 && protocol != PROTOCOL_SSLv23 && protocol != PROTOCOL_SSLv3 && - protocol != PROTOCOL_TLSv1 && protocol != PROTOCOL_TLSv1_1 && protocol != PROTOCOL_TLSv1_2) { + protocol != PROTOCOL_TLSv1 && protocol != PROTOCOL_TLSv1_1 && protocol != PROTOCOL_TLSv1_2 && + protocol != PROTOCOL_TLS_CLIENT && protocol != PROTOCOL_TLS_SERVER) { throw PythonOps.ValueError("invalid protocol version"); } @@ -131,8 +132,8 @@ public _SSLContext(CodeContext context, int protocol) { if (protocol != PROTOCOL_SSLv3) options |= OP_NO_SSLv3; - verify_mode = SSL_VERIFY_NONE; - check_hostname = false; + verify_mode = protocol == PROTOCOL_TLS_CLIENT ? CERT_REQUIRED : SSL_VERIFY_NONE; + check_hostname = protocol == PROTOCOL_TLS_CLIENT; } public void set_ciphers(CodeContext context, string ciphers) { @@ -200,11 +201,7 @@ public void set_ecdh_curve(CodeContext context, [NotNone] Bytes curve) { public void load_cert_chain(CodeContext context, string certfile, string keyfile = null, object password = null) { if (keyfile is not null) throw new NotImplementedException(nameof(keyfile)); if (password is not null) throw new NotImplementedException(nameof(password)); -#if NET - _cert = X509Certificate2.CreateFromPemFile(certfile, keyfile); -#else _cert = ReadCertificate(context, certfile, readKey: true); -#endif } public PythonList get_ca_certs(CodeContext context, bool binary_form = false) { @@ -766,6 +763,17 @@ public void write_eof() { #nullable restore + [PythonType] + public class SSLSession { + public object has_ticket { get; } + public object id { get; } + public object ticket_lifetime_hint { get; } + public object time { get; } + public object timeout { get; } + + private SSLSession() { } + } + public static object txt2obj(CodeContext context, string txt, bool name = false) { Asn1Object obj = null; if (name) { @@ -995,7 +1003,11 @@ private static PythonTuple IssuerFieldToPython(CodeContext context, string p) { private static X509Certificate2 ReadCertificate(CodeContext context, string filename, bool readKey = false) { #if NET if (readKey) { - return X509Certificate2.CreateFromPemFile(filename); + try { + return X509Certificate2.CreateFromPemFile(filename); + } catch (Exception e) { + throw ErrorDecoding(context, filename, e); + } } #endif @@ -1239,16 +1251,19 @@ private static Exception ErrorDecoding(CodeContext context, params object[] args public const int PROTOCOL_TLSv1 = 3; public const int PROTOCOL_TLSv1_1 = 4; public const int PROTOCOL_TLSv1_2 = 5; + public const int PROTOCOL_TLS_CLIENT = 16; + public const int PROTOCOL_TLS_SERVER = 17; public const int OP_ALL = unchecked((int)0x800003FF); - public const int OP_CIPHER_SERVER_PREFERENCE = 0x400000; - public const int OP_SINGLE_DH_USE = 0x100000; - public const int OP_SINGLE_ECDH_USE = 0x80000; + public const int OP_CIPHER_SERVER_PREFERENCE = 0; // 0x400000; + public const int OP_SINGLE_DH_USE = 0; // 0x100000; + public const int OP_SINGLE_ECDH_USE = 0; // 0x80000; public const int OP_NO_SSLv2 = 0x01000000; public const int OP_NO_SSLv3 = 0x02000000; public const int OP_NO_TLSv1 = 0x04000000; public const int OP_NO_TLSv1_1 = 0x10000000; public const int OP_NO_TLSv1_2 = 0x08000000; + public const int OP_NO_TLSv1_3 = 0; // 0x20000000; internal const int OP_NO_COMPRESSION = 0x20000; internal const int OP_NO_ALL = OP_NO_SSLv2 | OP_NO_SSLv3 | OP_NO_TLSv1 | OP_NO_TLSv1_1 | OP_NO_TLSv1_2 | OP_NO_COMPRESSION; @@ -1274,6 +1289,7 @@ private static Exception ErrorDecoding(CodeContext context, params object[] args public const bool HAS_NPN = false; public const bool HAS_ALPN = false; public const bool HAS_TLS_UNIQUE = false; + public const bool HAS_TLSv1_3 = false; private const int SSL_VERIFY_NONE = 0x00; private const int SSL_VERIFY_PEER = 0x01; diff --git a/Src/IronPython.Modules/mmap.cs b/Src/IronPython.Modules/mmap.cs index d15f61157..c512385c8 100644 --- a/Src/IronPython.Modules/mmap.cs +++ b/Src/IronPython.Modules/mmap.cs @@ -727,20 +727,23 @@ public object tell() { } } - public void write([BytesLike] IList s) { + public int write([NotNone] IBufferProtocol s) { + using var buffer = s.GetBuffer(); using (new MmapLocker(this)) { EnsureWritable(); long pos = Position; - if (_view.Capacity - pos < s.Count) { + if (_view.Capacity - pos < buffer.AsReadOnlySpan().Length) { throw PythonOps.ValueError("data out of range"); } - byte[] data = s as byte[] ?? (s is Bytes b ? b.UnsafeByteArray : s.ToArray()); - _view.WriteArray(pos, data, 0, s.Count); + byte[] data = buffer.AsUnsafeArray() ?? buffer.ToArray(); + _view.WriteArray(pos, data, 0, data.Length); - Position = pos + s.Count; + Position = pos + data.Length; + + return data.Length; } } diff --git a/Src/IronPython.Modules/re.cs b/Src/IronPython.Modules/re.cs index 0046a66be..c3df0eb71 100644 --- a/Src/IronPython.Modules/re.cs +++ b/Src/IronPython.Modules/re.cs @@ -74,26 +74,21 @@ internal enum ReFlags : int { public const int A = (int)ReFlags.ASCII; // long forms - public const int TEMPLATE = (int)ReFlags.TEMPLATE; + public const int TEMPLATE = (int)ReFlags.TEMPLATE; public const int IGNORECASE = (int)ReFlags.IGNORECASE; - public const int LOCALE = (int)ReFlags.LOCALE; - public const int MULTILINE = (int)ReFlags.MULTILINE; - public const int DOTALL = (int)ReFlags.DOTALL; - public const int UNICODE = (int)ReFlags.UNICODE; - public const int VERBOSE = (int)ReFlags.VERBOSE; - public const int ASCII = (int)ReFlags.ASCII; + public const int LOCALE = (int)ReFlags.LOCALE; + public const int MULTILINE = (int)ReFlags.MULTILINE; + public const int DOTALL = (int)ReFlags.DOTALL; + public const int UNICODE = (int)ReFlags.UNICODE; + public const int VERBOSE = (int)ReFlags.VERBOSE; + public const int ASCII = (int)ReFlags.ASCII; #endregion #region Public API Surface - public static Pattern compile(CodeContext/*!*/ context, object? pattern, int flags = 0) { - try { - return GetPattern(context, pattern, flags, true); - } catch (ArgumentException e) { - throw PythonExceptions.CreateThrowable(error(context), e.Message); - } - } + public static Pattern compile(CodeContext/*!*/ context, object? pattern, int flags = 0) + => GetPattern(context, pattern, flags, true); public const string engine = "cli reg ex"; @@ -170,8 +165,11 @@ public class Pattern : IWeakReferenceable { private WeakRefTracker? _weakRefTracker; internal Pattern(CodeContext/*!*/ context, object pattern, ReFlags flags = 0, bool compiled = false) { - _prePattern = PreParseRegex(context, PatternAsString(pattern, ref flags), (flags & ReFlags.VERBOSE) != 0, out ReFlags options); + _prePattern = PreParseRegex(context, PatternAsString(pattern, ref flags), verbose: flags.HasFlag(ReFlags.VERBOSE), isBytes: !flags.HasFlag(ReFlags.UNICODE), out ReFlags options); flags |= options; + // TODO: re-enable in 3.6 + // if (flags.HasFlag(ReFlags.UNICODE | ReFlags.LOCALE)) throw PythonOps.ValueError("cannot use LOCALE flag with a str pattern"); + if (flags.HasFlag(ReFlags.ASCII | ReFlags.LOCALE)) throw PythonOps.ValueError("ASCII and LOCALE flags are incompatible"); _re = GenRegex(context, _prePattern, flags, compiled, false); this.pattern = pattern; this.flags = (int)flags; @@ -425,7 +423,7 @@ public object sub(CodeContext/*!*/ context, object? repl, object? @string, int c }; prevEnd = match.Index + match.Length; - if (replacement != null) return UnescapeGroups(match, replacement); + if (replacement != null) return UnescapeGroups(context, match, replacement); return ValidateString(PythonCalls.Call(context, repl, Match.Make(match, this, input))); }, count)); @@ -453,7 +451,7 @@ public PythonTuple subn(CodeContext/*!*/ context, object? repl, object? @string, prevEnd = match.Index + match.Length; totalCount++; - if (replacement != null) return UnescapeGroups(match, replacement); + if (replacement != null) return UnescapeGroups(context, match, replacement); return ValidateString(PythonCalls.Call(context, repl, Match.Make(match, this, input))); }, @@ -464,7 +462,7 @@ public PythonTuple subn(CodeContext/*!*/ context, object? repl, object? @string, public int flags { get; } - public PythonDictionary groupindex { + public MappingProxy groupindex { get { if (_groups == null) { PythonDictionary d = new PythonDictionary(); @@ -480,7 +478,7 @@ public PythonDictionary groupindex { } _groups = d; } - return _groups; + return new MappingProxy(_groups); } } @@ -489,7 +487,7 @@ public PythonDictionary groupindex { public object pattern { get; } public override bool Equals(object? obj) - => obj is Pattern other && other.pattern == pattern && other.flags == flags; + => obj is Pattern other && PythonOps.IsOrEqualsRetBool(other.pattern, pattern) && other.flags == flags; public override int GetHashCode() => pattern.GetHashCode() ^ flags; @@ -646,6 +644,8 @@ private Match(RegExpMatch m, Pattern pattern, string text, int pos, int endpos) #region Public API Surface + public object? this[object? index] => group(index); + public string __repr__(CodeContext context) => $""; @@ -856,7 +856,7 @@ private Group GetGroup(object? group) { int GetGroupIndex(object? group) { int grpIndex; - if (!Converter.TryConvertToInt32(group, out grpIndex)) { + if (!Converter.TryConvertToIndex(group, out grpIndex, throwOverflowError: false, throwTypeError: false)) { if (group is string s) { grpIndex = re._re.GroupNumberFromName(s); } else if (group is ExtensibleString es) { @@ -929,7 +929,7 @@ private static RegexOptions FlagsToOption(ReFlags flags) { /// Preparses a regular expression text returning a ParsedRegex class /// that can be used for further regular expressions. /// - private static string PreParseRegex(CodeContext/*!*/ context, string pattern, bool verbose, out ReFlags options) { + private static string PreParseRegex(CodeContext/*!*/ context, string pattern, bool verbose, bool isBytes, out ReFlags options) { var userPattern = pattern; options = default; if (verbose) options |= ReFlags.VERBOSE; @@ -1079,39 +1079,37 @@ static string ApplyVerbose(string pattern) { break; case 'a': - options |= ReFlags.ASCII; - RemoveOption(ref pattern, ref nameIndex); - break; case 'i': - options |= ReFlags.IGNORECASE; - RemoveOption(ref pattern, ref nameIndex); - break; case 'L': - options |= ReFlags.LOCALE; - RemoveOption(ref pattern, ref nameIndex); - break; case 'm': - options |= ReFlags.MULTILINE; - RemoveOption(ref pattern, ref nameIndex); - break; case 's': - options |= ReFlags.DOTALL; - RemoveOption(ref pattern, ref nameIndex); - break; case 'u': - options |= ReFlags.UNICODE; - RemoveOption(ref pattern, ref nameIndex); - break; case 'x': - if (!verbose) return PreParseRegex(context, userPattern, true, out options); - options |= ReFlags.VERBOSE; - RemoveOption(ref pattern, ref nameIndex); - break; + if (MaybeParseFlags(pattern.AsSpan().Slice(nameIndex), out int consumed, out ReFlags flags)) { + nameIndex -= 2; + if (nameIndex != 0) { + // error in 3.11 + if (userPattern.Length > 20) { + PythonOps.Warn(context, PythonExceptions.DeprecationWarning, $"Flags not at the start of the expression {(isBytes ? "b" : string.Empty)}{PythonOps.Repr(context, userPattern.Substring(0, 20))} (truncated)"); + } else { + PythonOps.Warn(context, PythonExceptions.DeprecationWarning, $"Flags not at the start of the expression {(isBytes ? "b" : string.Empty)}{PythonOps.Repr(context, userPattern)}"); + } + } + if (flags.HasFlag(ReFlags.VERBOSE) && !verbose) return PreParseRegex(context, userPattern, verbose: true, isBytes: isBytes, out options); + options |= flags; + pattern = pattern.Remove(nameIndex, consumed + 3); + break; + } + if (pattern[nameIndex + consumed] != ':') { + throw PythonExceptions.CreateThrowable(error(context), "Unrecognized flag " + pattern[nameIndex + consumed]); + } + break; // grouping construct case ':': break; // non-capturing case '=': break; // look ahead assertion case '<': break; // positive look behind assertion case '!': break; // negative look ahead assertion case '#': break; // inline comment + case '-': break; // grouping construct case '(': // conditional match alternation (?(id/name)yes-pattern|no-pattern) // move past ?( so we don't preparse the name. @@ -1187,9 +1185,7 @@ static string ApplyVerbose(string pattern) { case System.Globalization.UnicodeCategory.LetterNumber: case System.Globalization.UnicodeCategory.OtherNumber: case System.Globalization.UnicodeCategory.ConnectorPunctuation: - pattern = pattern.Remove(nameIndex - 1, 1); - cur--; - break; + throw PythonExceptions.CreateThrowable(error(context), "bad escape \\" + curChar); case System.Globalization.UnicodeCategory.DecimalDigitNumber: // actually don't want to unescape '\1', '\2' etc. which are references to groups break; @@ -1202,21 +1198,50 @@ static string ApplyVerbose(string pattern) { } return pattern; - } - private static void RemoveOption(ref string pattern, ref int nameIndex) { - if (pattern[nameIndex - 1] == '?' && nameIndex < (pattern.Length - 1) && pattern[nameIndex + 1] == ')') { - pattern = pattern.Remove(nameIndex - 2, 4); - nameIndex -= 2; - } else { - pattern = pattern.Remove(nameIndex, 1); - nameIndex -= 2; + bool MaybeParseFlags(ReadOnlySpan pattern, out int consumed, out ReFlags flags) { + consumed = default; + flags = default; + foreach (char c in pattern) { + switch (c) { + case 'a': + flags |= ReFlags.ASCII; + break; + case 'i': + flags |= ReFlags.IGNORECASE; + break; + case 'L': + flags |= ReFlags.LOCALE; + break; + case 'm': + flags |= ReFlags.MULTILINE; + break; + case 's': + flags |= ReFlags.DOTALL; + break; + case 'u': + flags |= ReFlags.UNICODE; + break; + case 'x': + flags |= ReFlags.VERBOSE; + break; + case ')': + return true; + case ':': + return false; + default: + return false; + } + consumed++; + } + consumed = 0; + return false; } } private static string GetRandomString() => r.Next(int.MaxValue / 2, int.MaxValue).ToString(); - private static string UnescapeGroups(RegExpMatch m, string text) { + private static string UnescapeGroups(CodeContext context, RegExpMatch m, string text) { for (int i = 0; i < text.Length; i++) { if (text[i] == '\\') { StringBuilder sb = new StringBuilder(text, 0, i, text.Length); @@ -1224,7 +1249,9 @@ private static string UnescapeGroups(RegExpMatch m, string text) { do { if (text[i] == '\\') { i++; - if (i == text.Length) { sb.Append('\\'); break; } + if (i == text.Length) { + throw PythonExceptions.CreateThrowable(error(context), $"bad escape (end of pattern) at position {i - 1}"); + } switch (text[i]) { case 'n': sb.Append('\n'); break; @@ -1285,6 +1312,7 @@ private static string UnescapeGroups(RegExpMatch m, string text) { sb.Append((char)val); } } else { + PythonOps.Warn(context, PythonExceptions.DeprecationWarning, $"bad escape \\{text[i]}"); // error in 3.7 sb.Append('\\'); sb.Append((char)text[i]); } diff --git a/Src/IronPython/Compiler/Tokenizer.cs b/Src/IronPython/Compiler/Tokenizer.cs index 07b0ed5e5..a5c1b5a05 100644 --- a/Src/IronPython/Compiler/Tokenizer.cs +++ b/Src/IronPython/Compiler/Tokenizer.cs @@ -735,11 +735,16 @@ private Token ReadNumber(int start) { } isPrefix0 = true; - while (NextChar('0')) { } // skip leading zeroes + // skip leading zeroes + while (true) { + NextChar('_'); + if (!NextChar('0')) break; + } } bool isFirstChar = true; while (true) { + NextChar('_'); int ch = NextChar(); switch (ch) { @@ -755,7 +760,7 @@ private Token ReadNumber(int start) { MarkTokenEnd(); // TODO: parse in place - return new ConstantValueToken(ParseInteger(GetTokenString(), 10)); + return new ConstantValueToken(ParseInteger(GetTokenSpan(), 10)); case 'j': case 'J': @@ -784,7 +789,7 @@ private Token ReadNumber(int start) { } // TODO: parse in place - return new ConstantValueToken(ParseInteger(GetTokenString(), 10)); + return new ConstantValueToken(ParseInteger(GetTokenSpan(), 10)); } isFirstChar = false; } @@ -795,8 +800,9 @@ private Token ReadBinaryNumber() { int iVal = 0; bool useBigInt = false; BigInteger bigInt = BigInteger.Zero; - bool first = true; + bool isFirstChar = true; while (true) { + NextChar('_'); int ch = NextChar(); switch (ch) { case '0': @@ -812,7 +818,7 @@ private Token ReadBinaryNumber() { bigInt = (BigInteger)iVal; } - if (bits >= 32) { + if (useBigInt) { bigInt = (bigInt << 1) | (ch - '0'); } else { iVal = iVal << 1 | (ch - '0'); @@ -822,22 +828,21 @@ private Token ReadBinaryNumber() { BufferBack(); MarkTokenEnd(); - if (first) { - ReportSyntaxError( - new SourceSpan(new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1), - BufferTokenEnd), - Resources.InvalidToken, ErrorCodes.SyntaxError); + if (isFirstChar) { + var errorStart = new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1); + ReportSyntaxError(new SourceSpan(errorStart, BufferTokenEnd), Resources.InvalidToken, ErrorCodes.SyntaxError); } return new ConstantValueToken(useBigInt ? bigInt : (object)iVal); } - first = false; + isFirstChar = false; } } private Token ReadOctalNumber() { - bool first = true; + bool isFirstChar = true; while (true) { + NextChar('_'); int ch = NextChar(); switch (ch) { @@ -855,23 +860,24 @@ private Token ReadOctalNumber() { BufferBack(); MarkTokenEnd(); - if (first) { - ReportSyntaxError( - new SourceSpan(new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1), - BufferTokenEnd), - Resources.InvalidToken, ErrorCodes.SyntaxError); + if (isFirstChar) { + var errorStart = new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1); + ReportSyntaxError(new SourceSpan(errorStart, BufferTokenEnd), Resources.InvalidToken, ErrorCodes.SyntaxError); } // TODO: parse in place - return new ConstantValueToken(ParseInteger(GetTokenSubstring(2), 8)); + var span = GetTokenSpan().Slice(2); + if (!span.IsEmpty && span[0] == '_') span = span.Slice(1); + return new ConstantValueToken(ParseInteger(span, 8)); } - first = false; + isFirstChar = false; } } private Token ReadHexNumber() { - bool first = true; + bool isFirstChar = true; while (true) { + NextChar('_'); int ch = NextChar(); switch (ch) { @@ -903,17 +909,17 @@ private Token ReadHexNumber() { BufferBack(); MarkTokenEnd(); - if (first) { - ReportSyntaxError( - new SourceSpan(new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1), - BufferTokenEnd), - Resources.InvalidToken, ErrorCodes.SyntaxError); + if (isFirstChar) { + var errorStart = new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1); + ReportSyntaxError(new SourceSpan(errorStart, BufferTokenEnd), Resources.InvalidToken, ErrorCodes.SyntaxError); } // TODO: parse in place - return new ConstantValueToken(ParseInteger(GetTokenSubstring(2), 16)); + var span = GetTokenSpan().Slice(2); + if (!span.IsEmpty && span[0] == '_') span = span.Slice(1); + return new ConstantValueToken(ParseInteger(span, 16)); } - first = false; + isFirstChar = false; } } @@ -1431,10 +1437,8 @@ private void SetIndent(int spaces, StringBuilder chars) { current = DoDedent(spaces, current); if (spaces != current) { - ReportSyntaxError( - new SourceSpan(new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1), - BufferTokenEnd), - Resources.IndentationMismatch, ErrorCodes.IndentationError); + var errorStart = new SourceLocation(_tokenEndIndex, IndexToLocation(_tokenEndIndex).Line, IndexToLocation(_tokenEndIndex).Column - 1); + ReportSyntaxError(new SourceSpan(errorStart, BufferTokenEnd), Resources.IndentationMismatch, ErrorCodes.IndentationError); } } } @@ -1448,12 +1452,11 @@ private int DoDedent(int spaces, int current) { return current; } - private object ParseInteger(string s, int radix) { - try { - return LiteralParser.ParseInteger(s, radix); - } catch (ArgumentException e) { - ReportSyntaxError(BufferTokenSpan, e.Message, ErrorCodes.SyntaxError); + private object ParseInteger(ReadOnlySpan s, int radix) { + if (LiteralParser.TryParseIntegerSign(s, radix, out object result)) { + return result; } + ReportSyntaxError(BufferTokenSpan, "invalid token", ErrorCodes.SyntaxError); return ScriptingRuntimeHelpers.Int32ToObject(0); } @@ -1673,6 +1676,9 @@ private string GetTokenSubstring(int offset, int length) { return new String(_buffer, _start + offset, length); } + private ReadOnlySpan GetTokenSpan() + => _buffer.AsSpan(_start, _tokenEnd - _start); + [Conditional("DEBUG")] [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Performance", "CA1822:MarkMembersAsStatic")] private void CheckInvariants() { diff --git a/Src/IronPython/Runtime/FormattingHelper.cs b/Src/IronPython/Runtime/FormattingHelper.cs index 13909fd2b..21be97843 100644 --- a/Src/IronPython/Runtime/FormattingHelper.cs +++ b/Src/IronPython/Runtime/FormattingHelper.cs @@ -2,14 +2,17 @@ // The .NET Foundation licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information. +#nullable enable + using System; +using System.Diagnostics; using System.Globalization; using System.Text; using System.Threading; namespace IronPython.Runtime { internal static class FormattingHelper { - private static NumberFormatInfo _invariantUnderscoreSeperatorInfo; + private static NumberFormatInfo? _invariantUnderscoreSeperatorInfo; /// /// Helper NumberFormatInfo for use by int/BigInteger __format__ routines @@ -33,10 +36,10 @@ public static NumberFormatInfo InvariantUnderscoreNumberInfo { } } - public static string/*!*/ ToCultureString(T/*!*/ val, NumberFormatInfo/*!*/ nfi, StringFormatSpec spec, int? overrideWidth = null) { + public static string/*!*/ ToCultureString(T/*!*/ val, NumberFormatInfo/*!*/ nfi, StringFormatSpec spec, int? overrideWidth = null) where T : notnull { string separator = nfi.NumberGroupSeparator; int[] separatorLocations = nfi.NumberGroupSizes; - string digits = val.ToString(); + string digits = val.ToString()!; // If we're adding leading zeros, we need to know how // many we need. @@ -126,5 +129,58 @@ public static NumberFormatInfo InvariantUnderscoreNumberInfo { return digits; } + + public static string AddUnderscores(string digits, StringFormatSpec spec, bool isNegative) { + var length = digits.Length + (digits.Length - 1) / 4; // length including minimum number of underscores + + int idx; + var fillLength = 0; + if (spec.Fill == '0') { + if (spec.Width > length) { + var width = spec.Width.Value; + if (isNegative || spec.Sign != null && spec.Sign != '-') width--; + fillLength = width - length; + length = width; + } + + // index of first underscore + idx = length % 5; + if (idx == 0) { + idx = 1; + fillLength++; + length++; + } + } else { + // index of first underscore + idx = length % 5; + if (idx == 0) { + idx = 1; + length++; + } + } + + var sb = new StringBuilder(length); + + for (int i = 0; i < fillLength; i++, idx--) { + if (idx == 0) { + sb.Append('_'); + idx = 5; + } else { + sb.Append('0'); + } + } + int j = 0; + for (int i = fillLength; i < length; i++, idx--) { + if (idx == 0) { + sb.Append('_'); + idx = 5; + } else { + sb.Append(digits[j++]); + } + } + Debug.Assert(j == digits.Length); + + return sb.ToString(); + } } } diff --git a/Src/IronPython/Runtime/LiteralParser.cs b/Src/IronPython/Runtime/LiteralParser.cs index 25006489f..1ef1adb68 100644 --- a/Src/IronPython/Runtime/LiteralParser.cs +++ b/Src/IronPython/Runtime/LiteralParser.cs @@ -155,10 +155,10 @@ private static void HandleEscape(ReadOnlySpan data, ref int i, StringBuild case '6': case '7': { val = ch - '0'; - if (i < length && HexValue(data[i].ToChar(null), out int onechar) && onechar < 8) { + if (i < length && TryConvertDigit(data[i].ToChar(null), 8, out int onechar)) { val = val * 8 + onechar; i++; - if (i < length && HexValue(data[i].ToChar(null), out onechar) && onechar < 8) { + if (i < length && TryConvertDigit(data[i].ToChar(null), 8, out onechar)) { val = val * 8 + onechar; i++; } @@ -562,10 +562,10 @@ internal static List ParseBytes(ReadOnlySpan data, bool isRaw, bool case '6': case '7': { val = ch - '0'; - if (i < length && HexValue(data[i].ToChar(null), out int onechar) && onechar < 8) { + if (i < length && TryConvertDigit(data[i].ToChar(null), 8, out int onechar)) { val = val * 8 + onechar; i++; - if (i < length && HexValue(data[i].ToChar(null), out onechar) && onechar < 8) { + if (i < length && TryConvertDigit(data[i].ToChar(null), 8, out onechar)) { val = val * 8 + onechar; i++; } @@ -598,7 +598,7 @@ internal static List ParseBytes(ReadOnlySpan data, bool isRaw, bool return buf; } - private static bool HexValue(char ch, out int value) { + private static bool TryConvertDigit(char ch, int b, out int value) { switch (ch) { case '0': case '\x660': value = 0; break; @@ -631,50 +631,13 @@ private static bool HexValue(char ch, out int value) { } break; } - return true; - } - - private static int HexValue(char ch) { - int value; - if (!HexValue(ch, out value)) { - throw new ValueErrorException("bad char for integer value: " + ch); - } - return value; - } - - private static int CharValue(char ch, int b) { - int val = HexValue(ch); - if (val >= b) { - throw new ValueErrorException(String.Format("bad char for the integer value: '{0}' (base {1})", ch, b)); - } - return val; - } - - private static bool ParseInt(string text, int b, out int ret) { - ret = 0; - long m = 1; - for (int i = text.Length - 1; i >= 0; i--) { - // avoid the exception here. Not only is throwing it expensive, - // but loading the resources for it is also expensive - long lret = ret + m * CharValue(text[i], b); - if (Int32.MinValue <= lret && lret <= Int32.MaxValue) { - ret = (int)lret; - } else { - return false; - } - - m *= b; - if (Int32.MinValue > m || m > Int32.MaxValue) { - return false; - } - } - return true; + return value < b; } private static bool TryParseInt(in ReadOnlySpan text, int start, int length, int b, out int value, out int consumed) where T : IConvertible { value = 0; for (int i = start, end = start + length; i < end; i++) { - if (i < text.Length && HexValue(text[i].ToChar(null), out int onechar) && onechar < b) { + if (i < text.Length && TryConvertDigit(text[i].ToChar(null), b, out int onechar)) { value = value * b + onechar; } else { consumed = i - start; @@ -686,218 +649,192 @@ private static bool TryParseInt(in ReadOnlySpan text, int start, int lengt } public static object ParseInteger(string text, int b) { - Debug.Assert(b != 0); - int iret; - if (!ParseInt(text, b, out iret)) { - BigInteger ret = ParseBigInteger(text, b); - if (!ret.AsInt32(out iret)) { - return ret; - } - } - return ScriptingRuntimeHelpers.Int32ToObject(iret); - } - - public static object ParseIntegerSign(string text, int b, int start = 0) { - if (TryParseIntegerSign(text, b, start, out object val)) + if (TryParseInteger(text.AsSpan(), b, false, out object val)) { return val; - - throw new ValueErrorException(string.Format("invalid literal for int() with base {0}: {1}", b, StringOps.__repr__(text))); + } + throw new ValueErrorException($"invalid literal with base {b}: {text}"); } - internal static bool TryParseIntegerSign(string text, int b, int start, out object val) { - int end = text.Length, saveb = b, savestart = start; - if (start < 0 || start > end) throw new ArgumentOutOfRangeException(nameof(start)); - short sign = 1; - + internal static bool TryParseIntegerSign(ReadOnlySpan text, int b, out object val) { if (b < 0 || b == 1 || b > 36) { throw new ValueErrorException("int() base must be >= 2 and <= 36, or 0"); } - ParseIntegerStart(text, ref b, ref start, end, ref sign); + text = text.Trim(); - if (start < end && char.IsWhiteSpace(text, start)) { + if (TryParseIntegerStart(text, ref b, out bool isNegative, out int consumed)) { + text = text.Slice(consumed); + } else { val = default; return false; } - int ret = 0; - try { - int saveStart = start; - for (; ; ) { - int digit; - if (start >= end) { - if (saveStart == start) { - val = default; - return false; - } - break; - } - if (!HexValue(text[start], out digit)) break; - if (!(digit < b)) { + Debug.Assert(!text.IsEmpty); + + return TryParseInteger(text, b, isNegative, out val); + } + + private static bool TryParseInteger(ReadOnlySpan text, int b, bool isNegative, out object val) { + long ret = 0; + int underscore = 1; + + for (int i = 0; i < text.Length; i++) { + var ch = text[i]; + + if (ch == '_') { + underscore++; + if (underscore > 1) { val = default; return false; } - - checked { - // include sign here so that System.Int32.MinValue won't overflow - ret = ret * b + sign * digit; - } - start++; + continue; + } else { + underscore = 0; } - } catch (OverflowException) { - if (TryParseBigIntegerSign(text, saveb, savestart, out var bi)) { - val = bi; - return true; + + if (!TryConvertDigit(ch, b, out int digit)) { + val = default; + return false; } - val = default; - return false; - } - ParseIntegerEnd(text, ref start, ref end); + ret = ret * b + digit; - if (start < end) { - val = default; - return false; - } + if (ret > int.MaxValue) { + BigInteger retBi = ret; - val = ScriptingRuntimeHelpers.Int32ToObject(ret); - return true; - } + // Repeated integer multiplication is expensive so use a grouping strategy. + // We pick group sizes that ensure our numbers stay in the Int32 range. + int groupMax = 5; // zzzzzz (base 36) = 2_176_782_335 > int.MaxValue + if (b <= 10) groupMax = 9; // 2_147_483_647 - private static void ParseIntegerStart(string text, ref int b, ref int start, int end, ref short sign) { - // Skip whitespace - while (start < end && Char.IsWhiteSpace(text, start)) start++; - // Sign? - if (start < end) { - switch (text[start]) { - case '-': - sign = -1; - goto case '+'; - case '+': - start++; - break; - } - } + int buffer = 0; + int cnt = 0; + int smallMult = 1; - // Determine base - if (b == 0) { - if (start < end && text[start] == '0') { - // Hex, oct, or bin - if (++start < end) { - switch (text[start]) { - case 'x': - case 'X': - start++; - b = 16; - break; - case 'o': - case 'O': - b = 8; - start++; - break; - case 'b': - case 'B': - start++; - b = 2; - break; + for (i++; i < text.Length; i++) { + ch = text[i]; + + if (ch == '_') { + underscore++; + if (underscore > 1) { + val = default; + return false; + } + continue; + } else { + underscore = 0; } - } - if (b == 0) { - // Keep the leading zero - start--; - b = 8; - } - } else { - b = 10; - } - } - } + if (!TryConvertDigit(ch, b, out digit)) { + val = default; + return false; + } - private static void ParseIntegerEnd(string text, ref int start, ref int end) { - // Skip whitespace - while (start < end && char.IsWhiteSpace(text, start)) start++; - } + buffer = buffer * b + digit; + cnt++; + smallMult *= b; - internal static BigInteger ParseBigInteger(string text, int b) { - Debug.Assert(b != 0); - BigInteger ret = BigInteger.Zero; - BigInteger m = BigInteger.One; + Debug.Assert(smallMult > 0); // no overflows! - int i = text.Length - 1; + if (cnt >= groupMax) { + retBi = retBi * smallMult + buffer; + // reset buffer + buffer = 0; + cnt = 0; + smallMult = 1; + } + } - int groupMax = 7; - if (b <= 10) groupMax = 9;// 2 147 483 647 + if (cnt > 0) { + retBi = retBi * smallMult + buffer; + } - while (i >= 0) { - // extract digits in a batch - int smallMultiplier = 1; - uint uval = 0; + if (isNegative) { + if (retBi == (BigInteger)int.MaxValue + 1) { + val = ScriptingRuntimeHelpers.Int32ToObject(int.MinValue); + return true; + } + val = -retBi; + return true; + } - for (int j = 0; j < groupMax && i >= 0; j++) { - uval = (uint)(CharValue(text[i--], b) * smallMultiplier + uval); - smallMultiplier *= b; + if (underscore != 0) { + val = default; + return false; + } + + val = retBi; + return true; } + } - // this is more generous than needed - ret += m * (BigInteger)uval; - if (i >= 0) m = m * (smallMultiplier); + if (underscore != 0) { + val = default; + return false; } - return ret; + int res = unchecked((int)ret); + res = isNegative ? -res : res; + val = ScriptingRuntimeHelpers.Int32ToObject(res); + return true; } - internal static BigInteger ParseBigIntegerSign(string text, int b, int start = 0) { - if (TryParseBigIntegerSign(text, b, start, out var val)) - return val; - - throw new ValueErrorException(string.Format("invalid literal for int() with base {0}: {1}", b, StringOps.__repr__(text))); - } + private static bool TryParseIntegerStart(ReadOnlySpan text, ref int b, out bool isNegative, out int consumed) { + // set defaults + isNegative = false; + consumed = 0; - private static bool TryParseBigIntegerSign(string text, int b, int start, out BigInteger val) { - int end = text.Length; - if (start < 0 || start > end) throw new ArgumentOutOfRangeException(nameof(start)); - short sign = 1; + if (text.IsEmpty) return false; - if (b < 0 || b == 1 || b > 36) { - throw new ValueErrorException("int() base must be >= 2 and <= 36, or 0"); - } + var start = 0; + var end = text.Length; - ParseIntegerStart(text, ref b, ref start, end, ref sign); + // assumes a Trim has already been preformed + Debug.Assert(!char.IsWhiteSpace(text[start])); - if (start < end && char.IsWhiteSpace(text, start)) { - val = default; - return false; + // sign? + switch (text[start]) { + case '-': + isNegative = true; + if (++start >= end) return false; + break; + case '+': + if (++start >= end) return false; + break; } - BigInteger ret = BigInteger.Zero; - int saveStart = start; - for (; ; ) { - int digit; - if (start >= end) { - if (start == saveStart) { - val = default; - return false; + // determine base + if (b == 0) { + if (start + 1 < end && text[start] == '0') { + char ch = text[++start]; + switch (ch) { + case 'x': + case 'X': + b = 16; + break; + case 'o': + case 'O': + b = 8; + break; + case 'b': + case 'B': + b = 2; + break; + default: + b = 1; + consumed = start - 1; + return true; } - break; - } - if (!HexValue(text[start], out digit)) break; - if (!(digit < b)) { - val = default; - return false; + if (++start >= end) return false; + if (text[start] == '_') { + if (++start >= end) return false; + } + } else { + b = 10; } - ret = ret * b + digit; - start++; - } - - ParseIntegerEnd(text, ref start, ref end); - - if (start < end) { - val = default; - return false; } - val = sign < 0 ? -ret : ret; + consumed = start; return true; } @@ -1028,7 +965,7 @@ public static Complex ParseComplex(string s) { imag += "1"; // convert +/- to +1/-1 } - return new Complex(String.IsNullOrEmpty(real) ? 0 : ParseFloat(real), ParseFloat(imag)); + return new Complex(string.IsNullOrEmpty(real) ? 0 : ParseFloat(real), ParseFloat(imag)); } else { throw ExnMalformed(); } @@ -1044,7 +981,7 @@ public static Complex ParseImaginary(string text) { System.Globalization.CultureInfo.InvariantCulture.NumberFormat )); } catch (OverflowException) { - return new Complex(0, Double.PositiveInfinity); + return new Complex(0, double.PositiveInfinity); } } } diff --git a/Src/IronPython/Runtime/Operations/BigIntegerOps.cs b/Src/IronPython/Runtime/Operations/BigIntegerOps.cs index 6ed328bf2..dc7e1e203 100644 --- a/Src/IronPython/Runtime/Operations/BigIntegerOps.cs +++ b/Src/IronPython/Runtime/Operations/BigIntegerOps.cs @@ -75,9 +75,17 @@ private static object FastNew(CodeContext/*!*/ context, object? o, int @base = 1 throw new InvalidOperationException(); // unreachable } case string s: - return LiteralParser.ParseIntegerSign(s, @base, FindStart(s, @base)); + if (LiteralParser.TryParseIntegerSign(s.AsSpan(FindStart(s, @base)), @base, out result)) { + return result; + } else { + throw PythonOps.ValueError($"invalid literal for int() with base {@base}: {PythonOps.Repr(context, s)}"); + } case Extensible es: - return TryInvokeInt(context, o, out result) ? result : LiteralParser.ParseIntegerSign(es.Value, @base, FindStart(es.Value, @base)); + if (TryInvokeInt(context, o, out result) || LiteralParser.TryParseIntegerSign(es.Value.AsSpan(FindStart(es.Value, @base)), @base, out result)) { + return result; + } else { + throw PythonOps.ValueError($"invalid literal for int() with base {@base}: {PythonOps.Repr(context, es)}"); + } default: break; } @@ -168,7 +176,7 @@ public static object __new__(CodeContext/*!*/ context, [NotNone] PythonType cls, ?? throw PythonOps.TypeErrorForBadInstance("int() argument must be a string, a bytes-like object or a number, not '{0}'", x); var text = buf.AsReadOnlySpan().MakeString(); - if (!LiteralParser.TryParseIntegerSign(text, @base, FindStart(text, @base), out value)) + if (!LiteralParser.TryParseIntegerSign(text.AsSpan(FindStart(text, @base)), @base, out value)) throw PythonOps.ValueError($"invalid literal for int() with base {@base}: {new Bytes(x).__repr__(context)}"); } @@ -203,6 +211,8 @@ private static object ReturnObject(CodeContext context, PythonType cls, object v => cls == TypeCache.BigInteger ? value : cls.CreateInstance(context, value); private static int FindStart(string s, int radix) { + if (radix == 10) return 0; + int i = 0; // skip whitespace @@ -757,8 +767,7 @@ public static BigInteger ToBigInteger(BigInteger self) { if (spec.Fill == '0' && spec.Width > 1) { digits = FormattingHelper.ToCultureString(val, culture.NumberFormat, spec, (spec.Sign != null && spec.Sign != '-' || self < 0) ? spec.Width - 1 : null); - } - else { + } else { digits = FormattingHelper.ToCultureString(val, culture.NumberFormat, spec); } break; @@ -795,25 +804,37 @@ public static BigInteger ToBigInteger(BigInteger self) { if (spec.Precision != null) { throw PythonOps.ValueError("Precision not allowed in integer format specifier"); } - digits = AbsToHex(val, lowercase: false); + digits = ToHexDigits(val, lowercase: false); + if (spec.ThousandsUnderscore) { + digits = FormattingHelper.AddUnderscores(digits, spec, self.IsNegative()); + } break; case 'x': if (spec.Precision != null) { throw PythonOps.ValueError("Precision not allowed in integer format specifier"); } - digits = AbsToHex(val, lowercase: true); + digits = ToHexDigits(val, lowercase: true); + if (spec.ThousandsUnderscore) { + digits = FormattingHelper.AddUnderscores(digits, spec, self.IsNegative()); + } break; case 'o': // octal if (spec.Precision != null) { throw PythonOps.ValueError("Precision not allowed in integer format specifier"); } - digits = ToOctal(val, lowercase: true); + digits = ToOctalDigits(val); + if (spec.ThousandsUnderscore) { + digits = FormattingHelper.AddUnderscores(digits, spec, self.IsNegative()); + } break; case 'b': // binary if (spec.Precision != null) { throw PythonOps.ValueError("Precision not allowed in integer format specifier"); } - digits = ToBinary(val, includeType: false, lowercase: true); + digits = ToBinaryDigits(val); + if (spec.ThousandsUnderscore) { + digits = FormattingHelper.AddUnderscores(digits, spec, self.IsNegative()); + } break; case 'c': // single char if (spec.Precision != null) { @@ -825,7 +846,7 @@ public static BigInteger ToBigInteger(BigInteger self) { int iVal; if (!self.AsInt32(out iVal)) { throw PythonOps.OverflowError("Python int too large to convert to System.Int32"); - } else if(iVal < 0 || iVal > 0x10ffff) { + } else if (iVal < 0 || iVal > 0x10ffff) { throw PythonOps.OverflowError("%c arg not in range(0x110000)"); } @@ -906,10 +927,9 @@ public static object from_bytes(CodeContext context, PythonType type, object? by var val = new BigInteger(bytesArr); #endif - // prevents a TypeError: int.__new__(bool) is not safe - if (type == TypeCache.Boolean) return val == 0 ? ScriptingRuntimeHelpers.False : ScriptingRuntimeHelpers.True; + if (type == TypeCache.BigInteger) return val; - return __new__(context, type, val); + return PythonTypeOps.CallParams(context, type, val); } #endregion @@ -1044,8 +1064,10 @@ public static TypeCode GetTypeCode(BigInteger self) { #region Helpers + /// + /// Unlike ConvertToDouble, this method produces a Python-specific overflow error messge. + /// internal static double ToDouble(BigInteger self) { - // Unlike ConvertToDouble, this method produces a Python-specific overflow error messge. if (MathUtils.TryToFloat64(self, out double res)) { return res; } @@ -1056,27 +1078,24 @@ internal static string AbsToHex(BigInteger val, bool lowercase) { return ToDigits(val, 16, lowercase); } - private static string ToOctal(BigInteger val, bool lowercase) { - return ToDigits(val, 8, lowercase); + private static string ToHexDigits(BigInteger val, bool lowercase) { + Debug.Assert(val >= 0); + return ToDigits(val, 16, lower: lowercase); } - internal static string ToBinary(BigInteger val) { - string res = ToBinary(BigInteger.Abs(val), true, true); - if (val.IsNegative()) { - res = "-" + res; - } - return res; + private static string ToOctalDigits(BigInteger val) { + Debug.Assert(val >= 0); + return ToDigits(val, 8, lower: false); } - private static string ToBinary(BigInteger val, bool includeType, bool lowercase) { - Debug.Assert(!val.IsNegative()); - - string digits = ToDigits(val, 2, lowercase); + private static string ToBinaryDigits(BigInteger val) { + Debug.Assert(val >= 0); + return ToDigits(val, 2, lower: false); + } - if (includeType) { - digits = (lowercase ? "0b" : "0B") + digits; - } - return digits; + internal static string ToBinary(BigInteger val) { + var digits = ToBinaryDigits(BigInteger.Abs(val)); + return ((val < 0) ? "-0b" : "0b") + digits; } private static string/*!*/ ToDigits(BigInteger/*!*/ val, int radix, bool lower) { @@ -1085,12 +1104,12 @@ private static string ToBinary(BigInteger val, bool includeType, bool lowercase) } StringBuilder str = new StringBuilder(); + char a = lower ? 'a' : 'A'; while (val != 0) { int digit = (int)(val % radix); if (digit < 10) str.Append((char)((digit) + '0')); - else if (lower) str.Append((char)((digit - 10) + 'a')); - else str.Append((char)((digit - 10) + 'A')); + else str.Append((char)((digit - 10) + a)); val /= radix; } diff --git a/Src/IronPython/Runtime/Operations/FloatOps.cs b/Src/IronPython/Runtime/Operations/FloatOps.cs index fa02bc04e..0ca673710 100644 --- a/Src/IronPython/Runtime/Operations/FloatOps.cs +++ b/Src/IronPython/Runtime/Operations/FloatOps.cs @@ -115,9 +115,9 @@ static void Warn(CodeContext context, object result) { public static PythonTuple as_integer_ratio(double self) { if (Double.IsInfinity(self)) { - throw PythonOps.OverflowError("Cannot pass infinity to float.as_integer_ratio."); + throw PythonOps.OverflowError("cannot convert Infinity to integer ratio"); } else if (Double.IsNaN(self)) { - throw PythonOps.ValueError("Cannot pass nan to float.as_integer_ratio."); + throw PythonOps.ValueError("cannot convert NaN to integer ratio"); } BigInteger dem = 1; @@ -162,7 +162,8 @@ public static object fromhex(CodeContext/*!*/ context, PythonType/*!*/ cls, stri BigInteger intVal; if (integer.Success) { - intVal = LiteralParser.ParseBigInteger(integer.Value, 16); + // prefix with 0 to get positive number + intVal = BigInteger.Parse("0" + integer.Value, NumberStyles.HexNumber, CultureInfo.InvariantCulture); } else { intVal = BigInteger.Zero; } diff --git a/Src/IronPython/Runtime/Operations/IntOps.cs b/Src/IronPython/Runtime/Operations/IntOps.cs index 075e0bd41..a4698a642 100644 --- a/Src/IronPython/Runtime/Operations/IntOps.cs +++ b/Src/IronPython/Runtime/Operations/IntOps.cs @@ -267,25 +267,37 @@ public static string __format__(CodeContext/*!*/ context, int self, [NotNone] st if (spec.Precision != null) { throw PythonOps.ValueError("Precision not allowed in integer format specifier"); } - digits = ToHex(val, lowercase: false); + digits = ToHexDigits(val, lowercase: false); + if (spec.ThousandsUnderscore) { + digits = FormattingHelper.AddUnderscores(digits, spec, self < 0); + } break; case 'x': if (spec.Precision != null) { throw PythonOps.ValueError("Precision not allowed in integer format specifier"); } - digits = ToHex(val, lowercase: true); + digits = ToHexDigits(val, lowercase: true); + if (spec.ThousandsUnderscore) { + digits = FormattingHelper.AddUnderscores(digits, spec, self < 0); + } break; case 'o': // octal if (spec.Precision != null) { throw PythonOps.ValueError("Precision not allowed in integer format specifier"); } - digits = ToOctal(val, lowercase: true); + digits = ToOctalDigits(val); + if (spec.ThousandsUnderscore) { + digits = FormattingHelper.AddUnderscores(digits, spec, self < 0); + } break; case 'b': // binary if (spec.Precision != null) { throw PythonOps.ValueError("Precision not allowed in integer format specifier"); } - digits = ToBinary(val, includeType: false); + digits = ToBinaryDigits(val) ; + if (spec.ThousandsUnderscore) { + digits = FormattingHelper.AddUnderscores(digits, spec, self < 0); + } break; case 'c': // single char if (spec.Precision != null) { @@ -294,6 +306,11 @@ public static string __format__(CodeContext/*!*/ context, int self, [NotNone] st if (spec.Sign != null) { throw PythonOps.ValueError("Sign not allowed with integer format specifier 'c'"); } + + if (spec.AlternateForm) { + throw PythonOps.ValueError("Alternate form(#) not allowed with integer format specifier 'c'"); + } + if (self < 0 || self > 0x10ffff) { throw PythonOps.OverflowError("%c arg not in range(0x110000)"); } @@ -318,86 +335,47 @@ public static object from_bytes(CodeContext context, PythonType type, object byt #region Helpers - private static string ToHex(int self, bool lowercase) { - string digits; - if (self != Int32.MinValue) { - int val = self; - if (self < 0) { - val = -self; - } - digits = val.ToString(lowercase ? "x" : "X", CultureInfo.InvariantCulture); - } else { - digits = "80000000"; - } - - return digits; + private static string ToHexDigits(int val, bool lowercase) { + Debug.Assert(val >= 0); + return val.ToString(lowercase ? "x" : "X", CultureInfo.InvariantCulture); } - private static string ToOctal(int self, bool lowercase) { - string digits; - if (self == 0) { - digits = "0"; - } else if (self != Int32.MinValue) { - int val = self; - if (self < 0) { - val = -self; - } + private static string ToOctalDigits(int val) { + Debug.Assert(val >= 0); + if (val == 0) return "0"; - StringBuilder sbo = new StringBuilder(); - for (int i = 30; i >= 0; i -= 3) { - char value = (char)('0' + (val >> i & 0x07)); - if (value != '0' || sbo.Length > 0) { - sbo.Append(value); - } + StringBuilder sb = new StringBuilder(); + for (int i = 30; i >= 0; i -= 3) { + char value = (char)('0' + (val >> i & 0x07)); + if (value != '0' || sb.Length > 0) { + sb.Append(value); } - digits = sbo.ToString(); - } else { - digits = "20000000000"; } - - return digits; + return sb.ToString(); } - internal static string ToBinary(int self) { - if (self == Int32.MinValue) { - return "-0b10000000000000000000000000000000"; - } + private static string ToBinaryDigits(int val) { + Debug.Assert(val >= 0); + if (val == 0) return "0"; - string res = ToBinary(self, true); - if (self < 0) { - res = "-" + res; + StringBuilder sb = new StringBuilder(); + for (int i = 31; i >= 0; i--) { + if ((val & (1 << i)) != 0) { + sb.Append('1'); + } else if (sb.Length != 0) { + sb.Append('0'); + } } - return res; + return sb.ToString(); } - private static string ToBinary(int self, bool includeType) { - string digits; - if (self == 0) { - digits = "0"; - } else if (self != Int32.MinValue) { - StringBuilder sbb = new StringBuilder(); - - int val = self; - if (self < 0) { - val = -self; - } - - for (int i = 31; i >= 0; i--) { - if ((val & (1 << i)) != 0) { - sbb.Append('1'); - } else if (sbb.Length != 0) { - sbb.Append('0'); - } - } - digits = sbb.ToString(); - } else { - digits = "10000000000000000000000000000000"; + internal static string ToBinary(int val) { + if (val == int.MinValue) { + return "-0b10000000000000000000000000000000"; } - if (includeType) { - digits = "0b" + digits; - } - return digits; + var digits = ToBinaryDigits(Math.Abs(val)); + return ((val < 0) ? "-0b" : "0b") + digits; } #endregion diff --git a/Src/IronPython/Runtime/StringFormatSpec.cs b/Src/IronPython/Runtime/StringFormatSpec.cs index 054ac58c9..1a9b5b01c 100644 --- a/Src/IronPython/Runtime/StringFormatSpec.cs +++ b/Src/IronPython/Runtime/StringFormatSpec.cs @@ -131,7 +131,15 @@ private StringFormatSpec(char? fill, char? alignment, char? sign, int? width, bo curOffset++; } - // TODO: read optional underscore (new in 3.6) + // read optional underscore + if (curOffset != formatSpec.Length && + formatSpec[curOffset] == '_') { + thousandsUnderscore = true; + curOffset++; + if (thousandsComma || curOffset != formatSpec.Length && formatSpec[curOffset] == ',') { + throw PythonOps.ValueError("Cannot specify both ',' and '_'"); + } + } // read precision if (curOffset != formatSpec.Length && @@ -191,11 +199,14 @@ private StringFormatSpec(char? fill, char? alignment, char? sign, int? width, bo break; default: throw PythonOps.ValueError("Cannot specify '_' with '{0}'", type); - } } } + if (curOffset != formatSpec.Length) { + throw PythonOps.ValueError("Invalid format specifier '{0}'", formatSpec); + } + return new StringFormatSpec( fill, align, diff --git a/Src/IronPython/Runtime/StringFormatter.cs b/Src/IronPython/Runtime/StringFormatter.cs index d55dce007..bd62147f4 100644 --- a/Src/IronPython/Runtime/StringFormatter.cs +++ b/Src/IronPython/Runtime/StringFormatter.cs @@ -464,7 +464,7 @@ private void CheckDataUsed() { if (!PythonOps.IsMappingType(DefaultContext.Default, _data)) { if ((!(_data is PythonTuple) && _dataIndex != 1) || (_data is PythonTuple && _dataIndex != ((PythonTuple)_data).__len__())) { - throw PythonOps.TypeError("not all arguments converted during string formatting"); + throw PythonOps.TypeError("not all arguments converted during {0} formatting", _asBytes ? "bytes" : "string"); } } } @@ -870,10 +870,12 @@ private void AppendBytes() { Debug.Assert(_asBytes); if (_opts.Value is Bytes bytes || Bytes.TryInvokeBytesOperator(_context, _opts.Value, out bytes!)) { AppendString(StringOps.Latin1Encoding.GetString(bytes.UnsafeByteArray)); - } else if (_opts.Value is ByteArray byteArray) { - AppendString(StringOps.Latin1Encoding.GetString(byteArray.UnsafeByteList.AsByteSpan())); + } else if (_opts.Value is IBufferProtocol bufferProtocol) { + using var buffer = bufferProtocol.GetBuffer(BufferFlags.FullRO); + var span = buffer.IsCContiguous() ? buffer.AsReadOnlySpan() : buffer.ToArray(); + AppendString(StringOps.Latin1Encoding.GetString(span)); } else { - throw PythonOps.TypeError($"%b requires bytes, or an object that implements __bytes__, not '{PythonOps.GetPythonTypeName(_opts.Value)}'"); + throw PythonOps.TypeError($"%b requires a bytes-like object, or an object that implements __bytes__, not '{PythonOps.GetPythonTypeName(_opts.Value)}'"); } } diff --git a/Src/IronPythonTest/Stress/Engine.cs b/Src/IronPythonTest/Stress/Engine.cs index f44c3a842..3fe565880 100644 --- a/Src/IronPythonTest/Stress/Engine.cs +++ b/Src/IronPythonTest/Stress/Engine.cs @@ -44,6 +44,8 @@ private static long GetTotalMemory() { #if FEATURE_REFEMIT [Test] public void ScenarioXGC() { + Assert.AreEqual(1, 1); // prevents nunit from counting towards memory usage + long initialMemory = GetTotalMemory(); // Create multiple scopes: diff --git a/Tests/test_strformat.py b/Tests/test_strformat.py index 4b497e434..1a7a4c5bd 100644 --- a/Tests/test_strformat.py +++ b/Tests/test_strformat.py @@ -226,13 +226,27 @@ class bad2(object): def __format__(self, *args): return 42 - self.assertRaisesMessage(TypeError, "bad.__format__ must return a str, not NoneType" if is_cli else "__format__ method did not return string", '{0}'.format, bad()) - self.assertRaisesMessage(TypeError, "bad2.__format__ must return a str, not int" if is_cli else "__format__ method did not return string", '{0}'.format, bad2()) + if is_cli: + self.assertRaisesMessage(TypeError, "bad.__format__ must return a str, not NoneType", '{0}'.format, bad()) + self.assertRaisesMessage(TypeError, "bad2.__format__ must return a str, not int", '{0}'.format, bad2()) + elif sys.version_info >= (3, 5): + self.assertRaisesMessage(TypeError, "__format__ must return a str, not NoneType", '{0}'.format, bad()) + self.assertRaisesMessage(TypeError, "__format__ must return a str, not int", '{0}'.format, bad2()) + else: + self.assertRaisesMessage(TypeError, "__format__ method did not return string", '{0}'.format, bad()) + self.assertRaisesMessage(TypeError, "__format__ method did not return string", '{0}'.format, bad2()) self.assertRaisesMessage(ValueError, "Unknown conversion specifier x", '{0!x}'.format, 'abc') - self.assertRaisesMessage(TypeError, "bad.__format__ must return a str, not NoneType" if is_cli else "__format__ method did not return string", format, bad()) - self.assertRaisesMessage(TypeError, "bad2.__format__ must return a str, not int" if is_cli else "__format__ method did not return string", format, bad2()) + if is_cli: + self.assertRaisesMessage(TypeError, "bad.__format__ must return a str, not NoneType", format, bad()) + self.assertRaisesMessage(TypeError, "bad2.__format__ must return a str, not int", format, bad2()) + elif sys.version_info >= (3, 5): + self.assertRaisesMessage(TypeError, "__format__ must return a str, not NoneType", format, bad()) + self.assertRaisesMessage(TypeError, "__format__ must return a str, not int", format, bad2()) + else: + self.assertRaisesMessage(TypeError, "__format__ method did not return string", format, bad()) + self.assertRaisesMessage(TypeError, "__format__ method did not return string", format, bad2()) def test_object__format__(self): self.assertEqual(object.__format__("aaa", ""), "aaa") @@ -278,8 +292,13 @@ def test_str___format___errors(self): for char in allChars: if char != 's' and (char < '0' or char > '9'): x = ord(char) - if char==',': + if char == ',': errors.append(('10' + char, "Cannot specify ',' with 's'.")) + elif char == '_': + if is_cli or sys.version_info >= (3, 6): + errors.append(('10' + char, "Cannot specify '_' with 's'.")) + else: + errors.append(('10' + char, "Unknown format code '_' for object of type 'str'")) elif 0x20 < x < 0x80: errors.append(('10' + char, "Unknown format code '%s' for object of type 'str'" % char)) else: @@ -569,6 +588,9 @@ def test_float___format___errors(self): errors = [] okChars = set(['\0', '%', 'E', 'F', 'G', 'e', 'f', 'g', 'n', ','] + [chr(x) for x in range(ord('0'), ord('9') + 1)]) + if is_cli or sys.version_info >= (3, 6): + okChars.add('_') + # verify the okChars are actually ok for char in okChars: 2.0.__format__('10' + char) @@ -868,12 +890,58 @@ def test_int___format__(self): (1, '.0F', '1'), (1, '.0g', '1'), (1, '.0G', '1'), - ] + ] for value, spec, result in tests: self.assertEqual(value.__format__(spec), result) self.assertEqual(big(value).__format__(spec), result) + tests_thousands = [ + # thousands separator + (1000, ',', '1,000'), + (1000, '+,', '+1,000'), + (1000, '-,', '1,000'), + (1000, ' ,', ' 1,000'), + (-1000, ',', '-1,000'), + (-1000, '+,', '-1,000'), + (-1000, '-,', '-1,000'), + (-1000, ' ,', '-1,000'), + (1000, '_', '1_000'), + (1000, '+_', '+1_000'), + (1000, '-_', '1_000'), + (1000, ' _', ' 1_000'), + (-1000, '_', '-1_000'), + (-1000, '+_', '-1_000'), + (-1000, '-_', '-1_000'), + (-1000, ' _', '-1_000'), + + # thousands separator (zero padded) + (1000, '08,', '0,001,000'), + (1000, '+08,', '+001,000'), + (1000, '-08,', '0,001,000'), + (1000, ' 08,', ' 001,000'), + (-1000, '08,', '-001,000'), + (-1000, '+08,', '-001,000'), + (-1000, '-08,', '-001,000'), + (-1000, ' 08,', '-001,000'), + (-1000, '08_', '-001_000'), + (-1000, '+08_', '-001_000'), + (-1000, '-08_', '-001_000'), + (-1000, ' 08_', '-001_000'), + ] + + for value, spec, result in tests_thousands: + if not is_cli and sys.version_info < (3, 6) and "_" in spec: + continue + + assert "d" not in spec + self.assertEqual(value.__format__(spec), result) + self.assertEqual(big(value).__format__(spec), result) + + # also test with an explicit "d" + self.assertEqual(value.__format__(spec + "d"), result) + self.assertEqual(big(value).__format__(spec + "d"), result) + locale_tests = [ (1000, 'n', '1000', '1,000'), (-1000, 'n', '-1000', '-1,000'), @@ -921,6 +989,8 @@ def test_int___format___errors(self): errors.append((OverflowError, sys.maxsize + 1, 'c', "Python int too large to convert to C long")) okChars = set(['%', 'E', 'F', 'G', 'X', 'x', 'b', 'c', 'd', 'o', 'e', 'f', 'g', 'n', ','] + [chr(x) for x in range(ord('0'), ord('9') + 1)]) + if is_cli or sys.version_info >= (3, 6): + okChars.add('_') # verify the okChars are actually ok for char in okChars: