From 064c3866b02a6478da06f2147639c8d5607742d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Lozier?= Date: Tue, 24 Dec 2024 16:01:44 -0500 Subject: [PATCH] Backport some changes from the 3.6 branch --- Src/IronPython.Modules/_ssl.cs | 38 +++-- Src/IronPython.Modules/mmap.cs | 13 +- Src/IronPython.Modules/re.cs | 141 +++++++++++------- Src/IronPython/Runtime/FormattingHelper.cs | 62 +++++++- .../Runtime/Operations/BigIntegerOps.cs | 81 ++++++---- Src/IronPython/Runtime/StringFormatSpec.cs | 15 +- Src/IronPython/Runtime/StringFormatter.cs | 10 +- Src/IronPythonTest/Stress/Engine.cs | 2 + Tests/test_strformat.py | 82 +++++++++- 9 files changed, 325 insertions(+), 119 deletions(-) diff --git a/Src/IronPython.Modules/_ssl.cs b/Src/IronPython.Modules/_ssl.cs index 499dc3789..08ade85f6 100644 --- a/Src/IronPython.Modules/_ssl.cs +++ b/Src/IronPython.Modules/_ssl.cs @@ -120,7 +120,8 @@ public class _SSLContext { public _SSLContext(CodeContext context, int protocol) { if (protocol != PROTOCOL_SSLv2 && protocol != PROTOCOL_SSLv23 && protocol != PROTOCOL_SSLv3 && - protocol != PROTOCOL_TLSv1 && protocol != PROTOCOL_TLSv1_1 && protocol != PROTOCOL_TLSv1_2) { + protocol != PROTOCOL_TLSv1 && protocol != PROTOCOL_TLSv1_1 && protocol != PROTOCOL_TLSv1_2 && + protocol != PROTOCOL_TLS_CLIENT && protocol != PROTOCOL_TLS_SERVER) { throw PythonOps.ValueError("invalid protocol version"); } @@ -131,8 +132,8 @@ public _SSLContext(CodeContext context, int protocol) { if (protocol != PROTOCOL_SSLv3) options |= OP_NO_SSLv3; - verify_mode = SSL_VERIFY_NONE; - check_hostname = false; + verify_mode = protocol == PROTOCOL_TLS_CLIENT ? CERT_REQUIRED : SSL_VERIFY_NONE; + check_hostname = protocol == PROTOCOL_TLS_CLIENT; } public void set_ciphers(CodeContext context, string ciphers) { @@ -200,11 +201,7 @@ public void set_ecdh_curve(CodeContext context, [NotNone] Bytes curve) { public void load_cert_chain(CodeContext context, string certfile, string keyfile = null, object password = null) { if (keyfile is not null) throw new NotImplementedException(nameof(keyfile)); if (password is not null) throw new NotImplementedException(nameof(password)); -#if NET - _cert = X509Certificate2.CreateFromPemFile(certfile, keyfile); -#else _cert = ReadCertificate(context, certfile, readKey: true); -#endif } public PythonList get_ca_certs(CodeContext context, bool binary_form = false) { @@ -766,6 +763,17 @@ public void write_eof() { #nullable restore + [PythonType] + public class SSLSession { + public object has_ticket { get; } + public object id { get; } + public object ticket_lifetime_hint { get; } + public object time { get; } + public object timeout { get; } + + private SSLSession() { } + } + public static object txt2obj(CodeContext context, string txt, bool name = false) { Asn1Object obj = null; if (name) { @@ -995,7 +1003,11 @@ private static PythonTuple IssuerFieldToPython(CodeContext context, string p) { private static X509Certificate2 ReadCertificate(CodeContext context, string filename, bool readKey = false) { #if NET if (readKey) { - return X509Certificate2.CreateFromPemFile(filename); + try { + return X509Certificate2.CreateFromPemFile(filename); + } catch (Exception e) { + throw ErrorDecoding(context, filename, e); + } } #endif @@ -1239,16 +1251,19 @@ private static Exception ErrorDecoding(CodeContext context, params object[] args public const int PROTOCOL_TLSv1 = 3; public const int PROTOCOL_TLSv1_1 = 4; public const int PROTOCOL_TLSv1_2 = 5; + public const int PROTOCOL_TLS_CLIENT = 16; + public const int PROTOCOL_TLS_SERVER = 17; public const int OP_ALL = unchecked((int)0x800003FF); - public const int OP_CIPHER_SERVER_PREFERENCE = 0x400000; - public const int OP_SINGLE_DH_USE = 0x100000; - public const int OP_SINGLE_ECDH_USE = 0x80000; + public const int OP_CIPHER_SERVER_PREFERENCE = 0; // 0x400000; + public const int OP_SINGLE_DH_USE = 0; // 0x100000; + public const int OP_SINGLE_ECDH_USE = 0; // 0x80000; public const int OP_NO_SSLv2 = 0x01000000; public const int OP_NO_SSLv3 = 0x02000000; public const int OP_NO_TLSv1 = 0x04000000; public const int OP_NO_TLSv1_1 = 0x10000000; public const int OP_NO_TLSv1_2 = 0x08000000; + public const int OP_NO_TLSv1_3 = 0; // 0x20000000; internal const int OP_NO_COMPRESSION = 0x20000; internal const int OP_NO_ALL = OP_NO_SSLv2 | OP_NO_SSLv3 | OP_NO_TLSv1 | OP_NO_TLSv1_1 | OP_NO_TLSv1_2 | OP_NO_COMPRESSION; @@ -1274,6 +1289,7 @@ private static Exception ErrorDecoding(CodeContext context, params object[] args public const bool HAS_NPN = false; public const bool HAS_ALPN = false; public const bool HAS_TLS_UNIQUE = false; + public const bool HAS_TLSv1_3 = false; private const int SSL_VERIFY_NONE = 0x00; private const int SSL_VERIFY_PEER = 0x01; diff --git a/Src/IronPython.Modules/mmap.cs b/Src/IronPython.Modules/mmap.cs index d15f61157..c512385c8 100644 --- a/Src/IronPython.Modules/mmap.cs +++ b/Src/IronPython.Modules/mmap.cs @@ -727,20 +727,23 @@ public object tell() { } } - public void write([BytesLike] IList s) { + public int write([NotNone] IBufferProtocol s) { + using var buffer = s.GetBuffer(); using (new MmapLocker(this)) { EnsureWritable(); long pos = Position; - if (_view.Capacity - pos < s.Count) { + if (_view.Capacity - pos < buffer.AsReadOnlySpan().Length) { throw PythonOps.ValueError("data out of range"); } - byte[] data = s as byte[] ?? (s is Bytes b ? b.UnsafeByteArray : s.ToArray()); - _view.WriteArray(pos, data, 0, s.Count); + byte[] data = buffer.AsUnsafeArray() ?? buffer.ToArray(); + _view.WriteArray(pos, data, 0, data.Length); - Position = pos + s.Count; + Position = pos + data.Length; + + return data.Length; } } diff --git a/Src/IronPython.Modules/re.cs b/Src/IronPython.Modules/re.cs index 0046a66be..9d1405332 100644 --- a/Src/IronPython.Modules/re.cs +++ b/Src/IronPython.Modules/re.cs @@ -74,26 +74,21 @@ internal enum ReFlags : int { public const int A = (int)ReFlags.ASCII; // long forms - public const int TEMPLATE = (int)ReFlags.TEMPLATE; + public const int TEMPLATE = (int)ReFlags.TEMPLATE; public const int IGNORECASE = (int)ReFlags.IGNORECASE; - public const int LOCALE = (int)ReFlags.LOCALE; - public const int MULTILINE = (int)ReFlags.MULTILINE; - public const int DOTALL = (int)ReFlags.DOTALL; - public const int UNICODE = (int)ReFlags.UNICODE; - public const int VERBOSE = (int)ReFlags.VERBOSE; - public const int ASCII = (int)ReFlags.ASCII; + public const int LOCALE = (int)ReFlags.LOCALE; + public const int MULTILINE = (int)ReFlags.MULTILINE; + public const int DOTALL = (int)ReFlags.DOTALL; + public const int UNICODE = (int)ReFlags.UNICODE; + public const int VERBOSE = (int)ReFlags.VERBOSE; + public const int ASCII = (int)ReFlags.ASCII; #endregion #region Public API Surface - public static Pattern compile(CodeContext/*!*/ context, object? pattern, int flags = 0) { - try { - return GetPattern(context, pattern, flags, true); - } catch (ArgumentException e) { - throw PythonExceptions.CreateThrowable(error(context), e.Message); - } - } + public static Pattern compile(CodeContext/*!*/ context, object? pattern, int flags = 0) + => GetPattern(context, pattern, flags, true); public const string engine = "cli reg ex"; @@ -170,8 +165,10 @@ public class Pattern : IWeakReferenceable { private WeakRefTracker? _weakRefTracker; internal Pattern(CodeContext/*!*/ context, object pattern, ReFlags flags = 0, bool compiled = false) { - _prePattern = PreParseRegex(context, PatternAsString(pattern, ref flags), (flags & ReFlags.VERBOSE) != 0, out ReFlags options); + _prePattern = PreParseRegex(context, PatternAsString(pattern, ref flags), verbose: flags.HasFlag(ReFlags.VERBOSE), isBytes: !flags.HasFlag(ReFlags.UNICODE), out ReFlags options); flags |= options; + if (flags.HasFlag(ReFlags.UNICODE | ReFlags.LOCALE)) throw PythonOps.ValueError("cannot use LOCALE flag with a str pattern"); + if (flags.HasFlag(ReFlags.ASCII | ReFlags.LOCALE)) throw PythonOps.ValueError("ASCII and LOCALE flags are incompatible"); _re = GenRegex(context, _prePattern, flags, compiled, false); this.pattern = pattern; this.flags = (int)flags; @@ -425,7 +422,7 @@ public object sub(CodeContext/*!*/ context, object? repl, object? @string, int c }; prevEnd = match.Index + match.Length; - if (replacement != null) return UnescapeGroups(match, replacement); + if (replacement != null) return UnescapeGroups(context, match, replacement); return ValidateString(PythonCalls.Call(context, repl, Match.Make(match, this, input))); }, count)); @@ -453,7 +450,7 @@ public PythonTuple subn(CodeContext/*!*/ context, object? repl, object? @string, prevEnd = match.Index + match.Length; totalCount++; - if (replacement != null) return UnescapeGroups(match, replacement); + if (replacement != null) return UnescapeGroups(context, match, replacement); return ValidateString(PythonCalls.Call(context, repl, Match.Make(match, this, input))); }, @@ -464,7 +461,7 @@ public PythonTuple subn(CodeContext/*!*/ context, object? repl, object? @string, public int flags { get; } - public PythonDictionary groupindex { + public MappingProxy groupindex { get { if (_groups == null) { PythonDictionary d = new PythonDictionary(); @@ -480,7 +477,7 @@ public PythonDictionary groupindex { } _groups = d; } - return _groups; + return new MappingProxy(_groups); } } @@ -489,7 +486,7 @@ public PythonDictionary groupindex { public object pattern { get; } public override bool Equals(object? obj) - => obj is Pattern other && other.pattern == pattern && other.flags == flags; + => obj is Pattern other && PythonOps.IsOrEqualsRetBool(other.pattern, pattern) && other.flags == flags; public override int GetHashCode() => pattern.GetHashCode() ^ flags; @@ -646,6 +643,8 @@ private Match(RegExpMatch m, Pattern pattern, string text, int pos, int endpos) #region Public API Surface + public object? this[object? index] => group(index); + public string __repr__(CodeContext context) => $""; @@ -856,7 +855,7 @@ private Group GetGroup(object? group) { int GetGroupIndex(object? group) { int grpIndex; - if (!Converter.TryConvertToInt32(group, out grpIndex)) { + if (!Converter.TryConvertToIndex(group, out grpIndex, throwOverflowError: false, throwTypeError: false)) { if (group is string s) { grpIndex = re._re.GroupNumberFromName(s); } else if (group is ExtensibleString es) { @@ -929,7 +928,7 @@ private static RegexOptions FlagsToOption(ReFlags flags) { /// Preparses a regular expression text returning a ParsedRegex class /// that can be used for further regular expressions. /// - private static string PreParseRegex(CodeContext/*!*/ context, string pattern, bool verbose, out ReFlags options) { + private static string PreParseRegex(CodeContext/*!*/ context, string pattern, bool verbose, bool isBytes, out ReFlags options) { var userPattern = pattern; options = default; if (verbose) options |= ReFlags.VERBOSE; @@ -1079,39 +1078,37 @@ static string ApplyVerbose(string pattern) { break; case 'a': - options |= ReFlags.ASCII; - RemoveOption(ref pattern, ref nameIndex); - break; case 'i': - options |= ReFlags.IGNORECASE; - RemoveOption(ref pattern, ref nameIndex); - break; case 'L': - options |= ReFlags.LOCALE; - RemoveOption(ref pattern, ref nameIndex); - break; case 'm': - options |= ReFlags.MULTILINE; - RemoveOption(ref pattern, ref nameIndex); - break; case 's': - options |= ReFlags.DOTALL; - RemoveOption(ref pattern, ref nameIndex); - break; case 'u': - options |= ReFlags.UNICODE; - RemoveOption(ref pattern, ref nameIndex); - break; case 'x': - if (!verbose) return PreParseRegex(context, userPattern, true, out options); - options |= ReFlags.VERBOSE; - RemoveOption(ref pattern, ref nameIndex); - break; + if (MaybeParseFlags(pattern.AsSpan().Slice(nameIndex), out int consumed, out ReFlags flags)) { + nameIndex -= 2; + if (nameIndex != 0) { + // error in 3.11 + if (userPattern.Length > 20) { + PythonOps.Warn(context, PythonExceptions.DeprecationWarning, $"Flags not at the start of the expression {(isBytes ? "b" : string.Empty)}{PythonOps.Repr(context, userPattern.Substring(0, 20))} (truncated)"); + } else { + PythonOps.Warn(context, PythonExceptions.DeprecationWarning, $"Flags not at the start of the expression {(isBytes ? "b" : string.Empty)}{PythonOps.Repr(context, userPattern)}"); + } + } + if (flags.HasFlag(ReFlags.VERBOSE) && !verbose) return PreParseRegex(context, userPattern, verbose: true, isBytes: isBytes, out options); + options |= flags; + pattern = pattern.Remove(nameIndex, consumed + 3); + break; + } + if (pattern[nameIndex + consumed] != ':') { + throw PythonExceptions.CreateThrowable(error(context), "Unrecognized flag " + pattern[nameIndex + consumed]); + } + break; // grouping construct case ':': break; // non-capturing case '=': break; // look ahead assertion case '<': break; // positive look behind assertion case '!': break; // negative look ahead assertion case '#': break; // inline comment + case '-': break; // grouping construct case '(': // conditional match alternation (?(id/name)yes-pattern|no-pattern) // move past ?( so we don't preparse the name. @@ -1187,9 +1184,7 @@ static string ApplyVerbose(string pattern) { case System.Globalization.UnicodeCategory.LetterNumber: case System.Globalization.UnicodeCategory.OtherNumber: case System.Globalization.UnicodeCategory.ConnectorPunctuation: - pattern = pattern.Remove(nameIndex - 1, 1); - cur--; - break; + throw PythonExceptions.CreateThrowable(error(context), "bad escape \\" + curChar); case System.Globalization.UnicodeCategory.DecimalDigitNumber: // actually don't want to unescape '\1', '\2' etc. which are references to groups break; @@ -1202,21 +1197,50 @@ static string ApplyVerbose(string pattern) { } return pattern; - } - private static void RemoveOption(ref string pattern, ref int nameIndex) { - if (pattern[nameIndex - 1] == '?' && nameIndex < (pattern.Length - 1) && pattern[nameIndex + 1] == ')') { - pattern = pattern.Remove(nameIndex - 2, 4); - nameIndex -= 2; - } else { - pattern = pattern.Remove(nameIndex, 1); - nameIndex -= 2; + bool MaybeParseFlags(ReadOnlySpan pattern, out int consumed, out ReFlags flags) { + consumed = default; + flags = default; + foreach (char c in pattern) { + switch (c) { + case 'a': + flags |= ReFlags.ASCII; + break; + case 'i': + flags |= ReFlags.IGNORECASE; + break; + case 'L': + flags |= ReFlags.LOCALE; + break; + case 'm': + flags |= ReFlags.MULTILINE; + break; + case 's': + flags |= ReFlags.DOTALL; + break; + case 'u': + flags |= ReFlags.UNICODE; + break; + case 'x': + flags |= ReFlags.VERBOSE; + break; + case ')': + return true; + case ':': + return false; + default: + return false; + } + consumed++; + } + consumed = 0; + return false; } } private static string GetRandomString() => r.Next(int.MaxValue / 2, int.MaxValue).ToString(); - private static string UnescapeGroups(RegExpMatch m, string text) { + private static string UnescapeGroups(CodeContext context, RegExpMatch m, string text) { for (int i = 0; i < text.Length; i++) { if (text[i] == '\\') { StringBuilder sb = new StringBuilder(text, 0, i, text.Length); @@ -1224,7 +1248,9 @@ private static string UnescapeGroups(RegExpMatch m, string text) { do { if (text[i] == '\\') { i++; - if (i == text.Length) { sb.Append('\\'); break; } + if (i == text.Length) { + throw PythonExceptions.CreateThrowable(error(context), $"bad escape (end of pattern) at position {i - 1}"); + } switch (text[i]) { case 'n': sb.Append('\n'); break; @@ -1285,6 +1311,7 @@ private static string UnescapeGroups(RegExpMatch m, string text) { sb.Append((char)val); } } else { + PythonOps.Warn(context, PythonExceptions.DeprecationWarning, $"bad escape \\{text[i]}"); // error in 3.7 sb.Append('\\'); sb.Append((char)text[i]); } diff --git a/Src/IronPython/Runtime/FormattingHelper.cs b/Src/IronPython/Runtime/FormattingHelper.cs index 13909fd2b..21be97843 100644 --- a/Src/IronPython/Runtime/FormattingHelper.cs +++ b/Src/IronPython/Runtime/FormattingHelper.cs @@ -2,14 +2,17 @@ // The .NET Foundation licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information. +#nullable enable + using System; +using System.Diagnostics; using System.Globalization; using System.Text; using System.Threading; namespace IronPython.Runtime { internal static class FormattingHelper { - private static NumberFormatInfo _invariantUnderscoreSeperatorInfo; + private static NumberFormatInfo? _invariantUnderscoreSeperatorInfo; /// /// Helper NumberFormatInfo for use by int/BigInteger __format__ routines @@ -33,10 +36,10 @@ public static NumberFormatInfo InvariantUnderscoreNumberInfo { } } - public static string/*!*/ ToCultureString(T/*!*/ val, NumberFormatInfo/*!*/ nfi, StringFormatSpec spec, int? overrideWidth = null) { + public static string/*!*/ ToCultureString(T/*!*/ val, NumberFormatInfo/*!*/ nfi, StringFormatSpec spec, int? overrideWidth = null) where T : notnull { string separator = nfi.NumberGroupSeparator; int[] separatorLocations = nfi.NumberGroupSizes; - string digits = val.ToString(); + string digits = val.ToString()!; // If we're adding leading zeros, we need to know how // many we need. @@ -126,5 +129,58 @@ public static NumberFormatInfo InvariantUnderscoreNumberInfo { return digits; } + + public static string AddUnderscores(string digits, StringFormatSpec spec, bool isNegative) { + var length = digits.Length + (digits.Length - 1) / 4; // length including minimum number of underscores + + int idx; + var fillLength = 0; + if (spec.Fill == '0') { + if (spec.Width > length) { + var width = spec.Width.Value; + if (isNegative || spec.Sign != null && spec.Sign != '-') width--; + fillLength = width - length; + length = width; + } + + // index of first underscore + idx = length % 5; + if (idx == 0) { + idx = 1; + fillLength++; + length++; + } + } else { + // index of first underscore + idx = length % 5; + if (idx == 0) { + idx = 1; + length++; + } + } + + var sb = new StringBuilder(length); + + for (int i = 0; i < fillLength; i++, idx--) { + if (idx == 0) { + sb.Append('_'); + idx = 5; + } else { + sb.Append('0'); + } + } + int j = 0; + for (int i = fillLength; i < length; i++, idx--) { + if (idx == 0) { + sb.Append('_'); + idx = 5; + } else { + sb.Append(digits[j++]); + } + } + Debug.Assert(j == digits.Length); + + return sb.ToString(); + } } } diff --git a/Src/IronPython/Runtime/Operations/BigIntegerOps.cs b/Src/IronPython/Runtime/Operations/BigIntegerOps.cs index 6ed328bf2..94dcb9cb1 100644 --- a/Src/IronPython/Runtime/Operations/BigIntegerOps.cs +++ b/Src/IronPython/Runtime/Operations/BigIntegerOps.cs @@ -75,9 +75,17 @@ private static object FastNew(CodeContext/*!*/ context, object? o, int @base = 1 throw new InvalidOperationException(); // unreachable } case string s: - return LiteralParser.ParseIntegerSign(s, @base, FindStart(s, @base)); + if (LiteralParser.TryParseIntegerSign(s, @base, FindStart(s, @base), out result)) { + return result; + } else { + throw PythonOps.ValueError($"invalid literal for int() with base {@base}: {PythonOps.Repr(context, s)}"); + } case Extensible es: - return TryInvokeInt(context, o, out result) ? result : LiteralParser.ParseIntegerSign(es.Value, @base, FindStart(es.Value, @base)); + if (TryInvokeInt(context, o, out result) || LiteralParser.TryParseIntegerSign(es.Value, @base, FindStart(es.Value, @base), out result)) { + return result; + } else { + throw PythonOps.ValueError($"invalid literal for int() with base {@base}: {PythonOps.Repr(context, es)}"); + } default: break; } @@ -203,6 +211,8 @@ private static object ReturnObject(CodeContext context, PythonType cls, object v => cls == TypeCache.BigInteger ? value : cls.CreateInstance(context, value); private static int FindStart(string s, int radix) { + if (radix == 10) return 0; + int i = 0; // skip whitespace @@ -757,8 +767,7 @@ public static BigInteger ToBigInteger(BigInteger self) { if (spec.Fill == '0' && spec.Width > 1) { digits = FormattingHelper.ToCultureString(val, culture.NumberFormat, spec, (spec.Sign != null && spec.Sign != '-' || self < 0) ? spec.Width - 1 : null); - } - else { + } else { digits = FormattingHelper.ToCultureString(val, culture.NumberFormat, spec); } break; @@ -795,25 +804,37 @@ public static BigInteger ToBigInteger(BigInteger self) { if (spec.Precision != null) { throw PythonOps.ValueError("Precision not allowed in integer format specifier"); } - digits = AbsToHex(val, lowercase: false); + digits = ToHexDigits(val, lowercase: false); + if (spec.ThousandsUnderscore) { + digits = FormattingHelper.AddUnderscores(digits, spec, self.IsNegative()); + } break; case 'x': if (spec.Precision != null) { throw PythonOps.ValueError("Precision not allowed in integer format specifier"); } - digits = AbsToHex(val, lowercase: true); + digits = ToHexDigits(val, lowercase: true); + if (spec.ThousandsUnderscore) { + digits = FormattingHelper.AddUnderscores(digits, spec, self.IsNegative()); + } break; case 'o': // octal if (spec.Precision != null) { throw PythonOps.ValueError("Precision not allowed in integer format specifier"); } - digits = ToOctal(val, lowercase: true); + digits = ToOctalDigits(val); + if (spec.ThousandsUnderscore) { + digits = FormattingHelper.AddUnderscores(digits, spec, self.IsNegative()); + } break; case 'b': // binary if (spec.Precision != null) { throw PythonOps.ValueError("Precision not allowed in integer format specifier"); } - digits = ToBinary(val, includeType: false, lowercase: true); + digits = ToBinaryDigits(val); + if (spec.ThousandsUnderscore) { + digits = FormattingHelper.AddUnderscores(digits, spec, self.IsNegative()); + } break; case 'c': // single char if (spec.Precision != null) { @@ -825,7 +846,7 @@ public static BigInteger ToBigInteger(BigInteger self) { int iVal; if (!self.AsInt32(out iVal)) { throw PythonOps.OverflowError("Python int too large to convert to System.Int32"); - } else if(iVal < 0 || iVal > 0x10ffff) { + } else if (iVal < 0 || iVal > 0x10ffff) { throw PythonOps.OverflowError("%c arg not in range(0x110000)"); } @@ -906,10 +927,9 @@ public static object from_bytes(CodeContext context, PythonType type, object? by var val = new BigInteger(bytesArr); #endif - // prevents a TypeError: int.__new__(bool) is not safe - if (type == TypeCache.Boolean) return val == 0 ? ScriptingRuntimeHelpers.False : ScriptingRuntimeHelpers.True; + if (type == TypeCache.BigInteger) return val; - return __new__(context, type, val); + return PythonTypeOps.CallParams(context, type, val); } #endregion @@ -1044,8 +1064,10 @@ public static TypeCode GetTypeCode(BigInteger self) { #region Helpers + /// + /// Unlike ConvertToDouble, this method produces a Python-specific overflow error messge. + /// internal static double ToDouble(BigInteger self) { - // Unlike ConvertToDouble, this method produces a Python-specific overflow error messge. if (MathUtils.TryToFloat64(self, out double res)) { return res; } @@ -1056,27 +1078,24 @@ internal static string AbsToHex(BigInteger val, bool lowercase) { return ToDigits(val, 16, lowercase); } - private static string ToOctal(BigInteger val, bool lowercase) { - return ToDigits(val, 8, lowercase); + private static string ToHexDigits(BigInteger val, bool lowercase) { + Debug.Assert(val >= 0); + return ToDigits(val, 16, lower: lowercase); } - internal static string ToBinary(BigInteger val) { - string res = ToBinary(BigInteger.Abs(val), true, true); - if (val.IsNegative()) { - res = "-" + res; - } - return res; + private static string ToOctalDigits(BigInteger val) { + Debug.Assert(val >= 0); + return ToDigits(val, 8, lower: false); } - private static string ToBinary(BigInteger val, bool includeType, bool lowercase) { - Debug.Assert(!val.IsNegative()); - - string digits = ToDigits(val, 2, lowercase); + private static string ToBinaryDigits(BigInteger val) { + Debug.Assert(val >= 0); + return ToDigits(val, 2, lower: false); + } - if (includeType) { - digits = (lowercase ? "0b" : "0B") + digits; - } - return digits; + internal static string ToBinary(BigInteger val) { + var digits = ToBinaryDigits(BigInteger.Abs(val)); + return ((val < 0) ? "-0b" : "0b") + digits; } private static string/*!*/ ToDigits(BigInteger/*!*/ val, int radix, bool lower) { @@ -1085,12 +1104,12 @@ private static string ToBinary(BigInteger val, bool includeType, bool lowercase) } StringBuilder str = new StringBuilder(); + char a = lower ? 'a' : 'A'; while (val != 0) { int digit = (int)(val % radix); if (digit < 10) str.Append((char)((digit) + '0')); - else if (lower) str.Append((char)((digit - 10) + 'a')); - else str.Append((char)((digit - 10) + 'A')); + else str.Append((char)((digit - 10) + a)); val /= radix; } diff --git a/Src/IronPython/Runtime/StringFormatSpec.cs b/Src/IronPython/Runtime/StringFormatSpec.cs index 054ac58c9..1a9b5b01c 100644 --- a/Src/IronPython/Runtime/StringFormatSpec.cs +++ b/Src/IronPython/Runtime/StringFormatSpec.cs @@ -131,7 +131,15 @@ private StringFormatSpec(char? fill, char? alignment, char? sign, int? width, bo curOffset++; } - // TODO: read optional underscore (new in 3.6) + // read optional underscore + if (curOffset != formatSpec.Length && + formatSpec[curOffset] == '_') { + thousandsUnderscore = true; + curOffset++; + if (thousandsComma || curOffset != formatSpec.Length && formatSpec[curOffset] == ',') { + throw PythonOps.ValueError("Cannot specify both ',' and '_'"); + } + } // read precision if (curOffset != formatSpec.Length && @@ -191,11 +199,14 @@ private StringFormatSpec(char? fill, char? alignment, char? sign, int? width, bo break; default: throw PythonOps.ValueError("Cannot specify '_' with '{0}'", type); - } } } + if (curOffset != formatSpec.Length) { + throw PythonOps.ValueError("Invalid format specifier '{0}'", formatSpec); + } + return new StringFormatSpec( fill, align, diff --git a/Src/IronPython/Runtime/StringFormatter.cs b/Src/IronPython/Runtime/StringFormatter.cs index d55dce007..bd62147f4 100644 --- a/Src/IronPython/Runtime/StringFormatter.cs +++ b/Src/IronPython/Runtime/StringFormatter.cs @@ -464,7 +464,7 @@ private void CheckDataUsed() { if (!PythonOps.IsMappingType(DefaultContext.Default, _data)) { if ((!(_data is PythonTuple) && _dataIndex != 1) || (_data is PythonTuple && _dataIndex != ((PythonTuple)_data).__len__())) { - throw PythonOps.TypeError("not all arguments converted during string formatting"); + throw PythonOps.TypeError("not all arguments converted during {0} formatting", _asBytes ? "bytes" : "string"); } } } @@ -870,10 +870,12 @@ private void AppendBytes() { Debug.Assert(_asBytes); if (_opts.Value is Bytes bytes || Bytes.TryInvokeBytesOperator(_context, _opts.Value, out bytes!)) { AppendString(StringOps.Latin1Encoding.GetString(bytes.UnsafeByteArray)); - } else if (_opts.Value is ByteArray byteArray) { - AppendString(StringOps.Latin1Encoding.GetString(byteArray.UnsafeByteList.AsByteSpan())); + } else if (_opts.Value is IBufferProtocol bufferProtocol) { + using var buffer = bufferProtocol.GetBuffer(BufferFlags.FullRO); + var span = buffer.IsCContiguous() ? buffer.AsReadOnlySpan() : buffer.ToArray(); + AppendString(StringOps.Latin1Encoding.GetString(span)); } else { - throw PythonOps.TypeError($"%b requires bytes, or an object that implements __bytes__, not '{PythonOps.GetPythonTypeName(_opts.Value)}'"); + throw PythonOps.TypeError($"%b requires a bytes-like object, or an object that implements __bytes__, not '{PythonOps.GetPythonTypeName(_opts.Value)}'"); } } diff --git a/Src/IronPythonTest/Stress/Engine.cs b/Src/IronPythonTest/Stress/Engine.cs index f44c3a842..3fe565880 100644 --- a/Src/IronPythonTest/Stress/Engine.cs +++ b/Src/IronPythonTest/Stress/Engine.cs @@ -44,6 +44,8 @@ private static long GetTotalMemory() { #if FEATURE_REFEMIT [Test] public void ScenarioXGC() { + Assert.AreEqual(1, 1); // prevents nunit from counting towards memory usage + long initialMemory = GetTotalMemory(); // Create multiple scopes: diff --git a/Tests/test_strformat.py b/Tests/test_strformat.py index 4b497e434..1a7a4c5bd 100644 --- a/Tests/test_strformat.py +++ b/Tests/test_strformat.py @@ -226,13 +226,27 @@ class bad2(object): def __format__(self, *args): return 42 - self.assertRaisesMessage(TypeError, "bad.__format__ must return a str, not NoneType" if is_cli else "__format__ method did not return string", '{0}'.format, bad()) - self.assertRaisesMessage(TypeError, "bad2.__format__ must return a str, not int" if is_cli else "__format__ method did not return string", '{0}'.format, bad2()) + if is_cli: + self.assertRaisesMessage(TypeError, "bad.__format__ must return a str, not NoneType", '{0}'.format, bad()) + self.assertRaisesMessage(TypeError, "bad2.__format__ must return a str, not int", '{0}'.format, bad2()) + elif sys.version_info >= (3, 5): + self.assertRaisesMessage(TypeError, "__format__ must return a str, not NoneType", '{0}'.format, bad()) + self.assertRaisesMessage(TypeError, "__format__ must return a str, not int", '{0}'.format, bad2()) + else: + self.assertRaisesMessage(TypeError, "__format__ method did not return string", '{0}'.format, bad()) + self.assertRaisesMessage(TypeError, "__format__ method did not return string", '{0}'.format, bad2()) self.assertRaisesMessage(ValueError, "Unknown conversion specifier x", '{0!x}'.format, 'abc') - self.assertRaisesMessage(TypeError, "bad.__format__ must return a str, not NoneType" if is_cli else "__format__ method did not return string", format, bad()) - self.assertRaisesMessage(TypeError, "bad2.__format__ must return a str, not int" if is_cli else "__format__ method did not return string", format, bad2()) + if is_cli: + self.assertRaisesMessage(TypeError, "bad.__format__ must return a str, not NoneType", format, bad()) + self.assertRaisesMessage(TypeError, "bad2.__format__ must return a str, not int", format, bad2()) + elif sys.version_info >= (3, 5): + self.assertRaisesMessage(TypeError, "__format__ must return a str, not NoneType", format, bad()) + self.assertRaisesMessage(TypeError, "__format__ must return a str, not int", format, bad2()) + else: + self.assertRaisesMessage(TypeError, "__format__ method did not return string", format, bad()) + self.assertRaisesMessage(TypeError, "__format__ method did not return string", format, bad2()) def test_object__format__(self): self.assertEqual(object.__format__("aaa", ""), "aaa") @@ -278,8 +292,13 @@ def test_str___format___errors(self): for char in allChars: if char != 's' and (char < '0' or char > '9'): x = ord(char) - if char==',': + if char == ',': errors.append(('10' + char, "Cannot specify ',' with 's'.")) + elif char == '_': + if is_cli or sys.version_info >= (3, 6): + errors.append(('10' + char, "Cannot specify '_' with 's'.")) + else: + errors.append(('10' + char, "Unknown format code '_' for object of type 'str'")) elif 0x20 < x < 0x80: errors.append(('10' + char, "Unknown format code '%s' for object of type 'str'" % char)) else: @@ -569,6 +588,9 @@ def test_float___format___errors(self): errors = [] okChars = set(['\0', '%', 'E', 'F', 'G', 'e', 'f', 'g', 'n', ','] + [chr(x) for x in range(ord('0'), ord('9') + 1)]) + if is_cli or sys.version_info >= (3, 6): + okChars.add('_') + # verify the okChars are actually ok for char in okChars: 2.0.__format__('10' + char) @@ -868,12 +890,58 @@ def test_int___format__(self): (1, '.0F', '1'), (1, '.0g', '1'), (1, '.0G', '1'), - ] + ] for value, spec, result in tests: self.assertEqual(value.__format__(spec), result) self.assertEqual(big(value).__format__(spec), result) + tests_thousands = [ + # thousands separator + (1000, ',', '1,000'), + (1000, '+,', '+1,000'), + (1000, '-,', '1,000'), + (1000, ' ,', ' 1,000'), + (-1000, ',', '-1,000'), + (-1000, '+,', '-1,000'), + (-1000, '-,', '-1,000'), + (-1000, ' ,', '-1,000'), + (1000, '_', '1_000'), + (1000, '+_', '+1_000'), + (1000, '-_', '1_000'), + (1000, ' _', ' 1_000'), + (-1000, '_', '-1_000'), + (-1000, '+_', '-1_000'), + (-1000, '-_', '-1_000'), + (-1000, ' _', '-1_000'), + + # thousands separator (zero padded) + (1000, '08,', '0,001,000'), + (1000, '+08,', '+001,000'), + (1000, '-08,', '0,001,000'), + (1000, ' 08,', ' 001,000'), + (-1000, '08,', '-001,000'), + (-1000, '+08,', '-001,000'), + (-1000, '-08,', '-001,000'), + (-1000, ' 08,', '-001,000'), + (-1000, '08_', '-001_000'), + (-1000, '+08_', '-001_000'), + (-1000, '-08_', '-001_000'), + (-1000, ' 08_', '-001_000'), + ] + + for value, spec, result in tests_thousands: + if not is_cli and sys.version_info < (3, 6) and "_" in spec: + continue + + assert "d" not in spec + self.assertEqual(value.__format__(spec), result) + self.assertEqual(big(value).__format__(spec), result) + + # also test with an explicit "d" + self.assertEqual(value.__format__(spec + "d"), result) + self.assertEqual(big(value).__format__(spec + "d"), result) + locale_tests = [ (1000, 'n', '1000', '1,000'), (-1000, 'n', '-1000', '-1,000'), @@ -921,6 +989,8 @@ def test_int___format___errors(self): errors.append((OverflowError, sys.maxsize + 1, 'c', "Python int too large to convert to C long")) okChars = set(['%', 'E', 'F', 'G', 'X', 'x', 'b', 'c', 'd', 'o', 'e', 'f', 'g', 'n', ','] + [chr(x) for x in range(ord('0'), ord('9') + 1)]) + if is_cli or sys.version_info >= (3, 6): + okChars.add('_') # verify the okChars are actually ok for char in okChars: