diff --git a/Signum.Utilities/Csv.cs b/Signum.Utilities/Csv.cs index 8a6c4a2ab8..d8e6431495 100644 --- a/Signum.Utilities/Csv.cs +++ b/Signum.Utilities/Csv.cs @@ -6,6 +6,8 @@ using System.Collections.Concurrent; using System.Collections; using System.IO.Pipes; +using System; +using System.ComponentModel.Design.Serialization; namespace Signum.Utilities; @@ -181,8 +183,7 @@ public static IEnumerable ReadStream(Stream stream, Encoding? encoding = n var members = CsvMemberCache.Members; var parsers = members.Select(m => GetParser(defCulture, m, defOptions.ParserFactory)).ToList(); - - Regex regex = GetRegex(defCulture, defOptions.RegexTimeout, defOptions.ListSeparator); + Regex valueRegex = GetRegex(isLine: false, defCulture, defOptions.RegexTimeout, defOptions.ListSeparator); if (defOptions.AsumeSingleLine) { @@ -199,66 +200,68 @@ public static IEnumerable ReadStream(Stream stream, Encoding? encoding = n if (csvLine == null) yield break; - Match? m = null; - T? t = null; - try + if (csvLine.Length > 0) { - m = regex.Match(csvLine); - if (m.Length > 0) + T? t = null; + try { - t = ReadObject(m, members, parsers); + var m = valueRegex.EnumerateMatches(csvLine); + + t = ReadObject(m, csvLine.AsSpan(), members, parsers); } - } - catch (Exception e) - { - e.Data["row"] = line; + catch (Exception e) + { + e.Data["row"] = line; - if (defOptions.SkipError == null || !defOptions.SkipError(e, m)) - throw new ParseCsvException(e); - } + if (defOptions.SkipError == null || !defOptions.SkipError(e, csvLine)) + throw new ParseCsvException(e); + } - if (t != null) - yield return t; + if (t != null) + yield return t; + } line++; } } } else { + Regex lineRegex = GetRegex(isLine: true, defCulture, defOptions.RegexTimeout, defOptions.ListSeparator); + using (StreamReader sr = new StreamReader(stream, encoding)) { string str = sr.ReadToEnd(); - var matches = regex.Matches(str).Cast(); - - if (skipLines > 0) - matches = matches.Skip(skipLines); - - int line = skipLines; - foreach (var m in matches) + int i = 0; + foreach (Match m in lineRegex.Matches(str)) { + if (i < skipLines) + continue; + if (m.Length > 0) { T? t = null; try { + var line = m.Value; + if (options?.Constructor != null) - t = options.Constructor(m); + t = options.Constructor(line); else - t = ReadObject(m, members, parsers); + t = ReadObject(valueRegex.EnumerateMatches(line), line, members, parsers); } catch (Exception e) { - e.Data["row"] = line; + e.Data["row"] = i; - if (defOptions.SkipError == null || !defOptions.SkipError(e, m)) + if (defOptions.SkipError == null || !defOptions.SkipError(e, str.Substring(m.Index, m.Length))) throw new ParseCsvException(e); } if (t != null) yield return t; } - line++; + i++; } } } @@ -271,18 +274,20 @@ public static T ReadLine(string csvLine, CultureInfo? culture = null, CsvRead var defCulture = GetDefaultCulture(culture); - Regex regex = GetRegex(defCulture, defOptions.RegexTimeout); + Regex regex = GetRegex(isLine: false, defCulture, defOptions.RegexTimeout); - Match m = regex.Match(csvLine); + var vme = regex.EnumerateMatches(csvLine); var members = CsvMemberCache.Members; - return ReadObject(m, + return ReadObject(vme, + csvLine.AsSpan(), members, members.Select(c => GetParser(defCulture, c, defOptions.ParserFactory)).ToList()); } - private static Func GetParser(CultureInfo culture, CsvMemberInfo column, Func, CultureInfo, Func?>? parserFactory) + + private static ValueParser GetParser(CultureInfo culture, CsvMemberInfo column, Func, CultureInfo, ValueParser?>? parserFactory) { if (parserFactory != null) { @@ -294,43 +299,51 @@ public static T ReadLine(string csvLine, CultureInfo? culture = null, CsvRead var type = column.IsCollection ? column.MemberInfo.ReturningType().ElementType()! : column.MemberInfo.ReturningType(); - return str => ConvertTo(str, type, culture, column.Format); + return GetBasicParser(type.UnNullify(), culture, column.Format); } - static T ReadObject(Match m, List> members, List> parsers) - { - var vals = m.Groups["val"].Captures; - - if (vals.Count < members.Count) - throw new FormatException("Only {0} columns found (instead of {1}) in line: {2}".FormatWith(vals.Count, members.Count, m.Value)); + public delegate object? ValueParser(ReadOnlySpan str); + static T ReadObject(Regex.ValueMatchEnumerator vme, ReadOnlySpan line, List> members, List parsers) + { T t = Activator.CreateInstance(); - for (int i = 0; i < members.Count; i++) + bool endsInCollection = false; + int i = 0; + foreach (var v in vme) { + if (members.Count <= i) + continue; + + var value = line.Slice(v.Index, v.Length); var member = members[i]; var parser = parsers[i]; - string? str = null; try { if (!member.IsCollection) { - str = DecodeCsv(vals[i].Value); + value = DecodeCsv(value); - object? val = parser(str); + object? val = parser(value); member.MemberEntry.Setter!(t, val); } else { + if (i != members.Count - 1) + throw new InvalidOperationException($"Collection {member.MemberInfo} should be the last member"); + endsInCollection = true; var list = (IList)Activator.CreateInstance(member.MemberInfo.ReturningType())!; - for (int j = i; j < vals.Count; j++) - { - str = DecodeCsv(vals[j].Value); - - object? val = parser(str); + value = DecodeCsv(value); + object? val = parser(value); + list.Add(val); + foreach (var v2 in vme) + { + value = line.Slice(v2.Index, v2.Length); + value = DecodeCsv(value); + val = parser(value); list.Add(val); } @@ -339,11 +352,17 @@ static T ReadObject(Match m, List> members, List ReadUntypedStream(Stream stream, Encoding? e var defCulture = GetDefaultCulture(culture); var defOptions = options ?? new CsvReadOptions(); - Regex regex = GetRegex(defCulture, defOptions.RegexTimeout, defOptions.ListSeparator); + Regex valueRegex = GetRegex(false, defCulture, defOptions.RegexTimeout, defOptions.ListSeparator); if (defOptions.AsumeSingleLine) { using (StreamReader sr = new StreamReader(stream, encoding)) @@ -386,7 +405,7 @@ public static IEnumerable ReadUntypedStream(Stream stream, Encoding? e string[]? t = null; try { - m = regex.Match(csvLine); + m = valueRegex.Match(csvLine); if (m.Length > 0) { t = m.Groups["val"].Captures.Select(c => c.Value).ToArray(); @@ -396,7 +415,7 @@ public static IEnumerable ReadUntypedStream(Stream stream, Encoding? e { e.Data["row"] = line; - if (defOptions.SkipError == null || !defOptions.SkipError(e, m)) + if (defOptions.SkipError == null || !defOptions.SkipError(e, csvLine)) throw new ParseCsvException(e); } @@ -413,7 +432,7 @@ public static IEnumerable ReadUntypedStream(Stream stream, Encoding? e { string str = sr.ReadToEnd(); - var matches = regex.Matches(str).Cast(); + var matches = valueRegex.Matches(str).Cast(); int line = 0; foreach (var m in matches) @@ -429,7 +448,7 @@ public static IEnumerable ReadUntypedStream(Stream stream, Encoding? e { e.Data["row"] = line; - if (defOptions.SkipError == null || !defOptions.SkipError(e, m)) + if (defOptions.SkipError == null || !defOptions.SkipError(e, m.Value)) throw new ParseCsvException(e); } if (t != null) @@ -530,16 +549,17 @@ public class MyFileCSV """; } - - static ConcurrentDictionary regexCache = new ConcurrentDictionary(); - const string BaseRegex = @"^((?'(?:[^']+|'')*'|[^;\r\n]*))?((?!($|\r\n));(?'(?:[^']+|'')*'|[^;\r\n]*))*($|\r\n)"; - static Regex GetRegex(CultureInfo culture, TimeSpan timeout, char? listSeparator = null) + static ConcurrentDictionary<(bool multiLine, char separator, TimeSpan timeout), Regex> regexCache = new(); + readonly static string ValueRegex = "'(?:[^']+|'')*'|[^;\r\n]*".Replace('\'', '"'); + readonly static string LineRegex = $@"^({ValueRegex})?((?!($|\r\n));({ValueRegex}))*($|\r\n)"; + static Regex GetRegex(bool isLine, CultureInfo culture, TimeSpan timeout, char? listSeparator = null) { char separator = listSeparator ?? GetListSeparator(culture); - return regexCache.GetOrAdd(separator, s => - new Regex(BaseRegex.Replace('\'', '"').Replace(';', s), RegexOptions.Multiline | RegexOptions.ExplicitCapture, timeout)); + return regexCache.GetOrAdd((isLine, separator, timeout), a => + new Regex((isLine ? LineRegex : ValueRegex).Replace(';', a.separator), RegexOptions.Multiline | RegexOptions.ExplicitCapture, a.timeout)); } + private static char GetListSeparator(CultureInfo culture) { @@ -570,11 +590,13 @@ static CsvMemberCache() public static List> Members; } - static string DecodeCsv(string s) + + + static ReadOnlySpan DecodeCsv(ReadOnlySpan s) { if (s.StartsWith("\"") && s.EndsWith("\"")) { - string str = s[1..^1].Replace("\"\"", "\""); + string str = new string(s[1..^1]).Replace("\"\"", "\""); return Regex.Replace(str, "(? str => str.Length == 0 ? null : str.ToString(), + _ when type == typeof(byte) => str => str.Length == 0 ? null : byte.Parse(str, NumberStyles.Integer, culture), + _ when type == typeof(sbyte) => str => str.Length == 0 ? null : sbyte.Parse(str, NumberStyles.Integer, culture), + _ when type == typeof(short) => str => str.Length == 0 ? null : short.Parse(str, NumberStyles.Integer, culture), + _ when type == typeof(ushort) => str => str.Length == 0 ? null : ushort.Parse(str, NumberStyles.Integer, culture), + _ when type == typeof(int) => str => str.Length == 0 ? null : int.Parse(str, NumberStyles.Integer, culture), + _ when type == typeof(uint) => str => str.Length == 0 ? null : uint.Parse(str, NumberStyles.Integer, culture), + _ when type == typeof(long) => str => str.Length == 0 ? null : long.Parse(str, NumberStyles.Integer, culture), + _ when type == typeof(ulong) => str => str.Length == 0 ? null : ulong.Parse(str, NumberStyles.Integer, culture), + _ when type == typeof(float) => str => str.Length == 0 ? null : float.Parse(str, NumberStyles.Float, culture), + _ when type == typeof(double) => str => str.Length == 0 ? null : double.Parse(str, NumberStyles.Float, culture), + _ when type == typeof(decimal) => str => str.Length == 0 ? null : decimal.Parse(str, NumberStyles.Number, culture), + _ when type == typeof(DateTime) => str => str.Length == 0 ? null : DateTime.ParseExact(str, format, culture), + _ when type == typeof(DateTimeOffset) => str => str.Length == 0 ? null : DateTimeOffset.ParseExact(str, format, culture), + _ when type == typeof(DateOnly) => str => str.Length == 0 ? null : DateOnly.ParseExact(str, format, culture), + _ when type == typeof(TimeOnly) => str => str.Length == 0 ? null : TimeOnly.ParseExact(str, format, culture), + _ when type == typeof(Guid) => str => str.Length == 0 ? null : Guid.Parse(str.ToString()), + _ when type.IsEnum => str => str.Length == 0 ? null : Enum.Parse(type, str), + _ => str => Convert.ChangeType(new string(str), type, culture) + }; } } public class CsvReadOptions : CsvReadOptions where T : class { - public Func, CultureInfo, Func?>? ParserFactory; - public Func? Constructor; + public Func, CultureInfo, Csv.ValueParser?>? ParserFactory; + public CsvConstructor? Constructor; } +public delegate T CsvConstructor(ReadOnlySpan line); + public class CsvReadOptions { - public bool AsumeSingleLine = false; - public Func? SkipError; + public bool AsumeSingleLine = true; //Breaking change! + public Func? SkipError; public TimeSpan RegexTimeout = Regex.InfiniteMatchTimeout; public char? ListSeparator; }