From 28e5897b534b52dddaf06727af011dc884e72340 Mon Sep 17 00:00:00 2001 From: Norwin Roosen Date: Wed, 14 Aug 2019 21:19:23 +0200 Subject: [PATCH 1/6] detect csv delimiter in csv rendering fixes #7868 --- modules/markup/csv/csv.go | 56 ++++++++++++++++++++++++++++++++-- modules/markup/csv/csv_test.go | 10 ++++-- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 077947e774425..eecac8c1e77ef 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -9,6 +9,8 @@ import ( "encoding/csv" "html" "io" + "math" + "strings" "code.gitea.io/gitea/modules/markup" ) @@ -28,12 +30,13 @@ func (Parser) Name() string { // Extensions implements markup.Parser func (Parser) Extensions() []string { - return []string{".csv"} + return []string{".csv", ".tsv"} } // Render implements markup.Parser -func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { +func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { rd := csv.NewReader(bytes.NewReader(rawBytes)) + rd.Comma = p.bestDelimiter(rawBytes) var tmpBlock bytes.Buffer tmpBlock.WriteString(``) for { @@ -56,3 +59,52 @@ func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, return tmpBlock.Bytes() } + +func (p Parser) bestDelimiter(data []byte) rune { + // Scores the input data against delimiters, and returns the best matching. + // Reads at most 10k bytes & 10 lines. + maxLines := 10 + maxBytes := int(math.Min(float64(len(data)), 1e4)) + text := string(data[:maxBytes]) + lines := strings.SplitN(text, "\n", maxLines+1)[:maxLines] + + delimiters := []rune{',', ';', '\t', '|'} + bestDelim := delimiters[0] + bestScore := 0.0 + + for _, delim := range delimiters { + score := p.scoreDelimiter(lines, delim) + if score > bestScore { + bestScore = score + bestDelim = delim + } + } + + return bestDelim +} + +func (Parser) scoreDelimiter(lines []string, delim rune) (score float64) { + // Scores a delimiter against input csv data with a count and regularity metric. + + countTotal := 0.0 + countLineMax := 0.0 + linesNotEqual := 0.0 + + for _, line := range lines { + if len(line) == 0 { + continue + } + + countLine := float64(strings.Count(line, string(delim))) + countTotal += countLine + + if countLine != countLineMax { + if countLineMax != 0 { + linesNotEqual += 1 + } + countLineMax = math.Max(countLine, countLineMax) + } + } + + return countTotal * (1 - linesNotEqual/float64(len(lines))) +} diff --git a/modules/markup/csv/csv_test.go b/modules/markup/csv/csv_test.go index f050296cee1bb..7fb4ad0007e0c 100644 --- a/modules/markup/csv/csv_test.go +++ b/modules/markup/csv/csv_test.go @@ -13,9 +13,13 @@ import ( func TestRenderCSV(t *testing.T) { var parser Parser var kases = map[string]string{ - "a": "
a
", - "1,2": "
12
", - "
": "
<br/>
", + "a": "
a
", + "1,2": "
12
", + "1;2": "
12
", + "1\t2": "
12
", + "1|2": "
12
", + "1,2,3;4,5,6;7,8,9\na;b;c": "
1,2,34,5,67,8,9
abc
", + "
": "
<br/>
", } for k, v := range kases { From 4a182238671de79fff9cf5e772528e2143f45a3e Mon Sep 17 00:00:00 2001 From: Norwin Roosen Date: Thu, 15 Aug 2019 03:49:50 +0200 Subject: [PATCH 2/6] make linter happy --- modules/markup/csv/csv.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index eecac8c1e77ef..c92aa603757a1 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -100,7 +100,7 @@ func (Parser) scoreDelimiter(lines []string, delim rune) (score float64) { if countLine != countLineMax { if countLineMax != 0 { - linesNotEqual += 1 + linesNotEqual++ } countLineMax = math.Max(countLine, countLineMax) } From 7596c4ba843fb6ae6fe9d7fa89b3b5f934160c08 Mon Sep 17 00:00:00 2001 From: Norwin Roosen Date: Thu, 15 Aug 2019 12:32:56 +0200 Subject: [PATCH 3/6] fix failing testcase & use ints where possible --- modules/markup/csv/csv.go | 28 +++++++++++++--------------- modules/markup/csv/csv_test.go | 2 +- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index c92aa603757a1..2a75e770b68eb 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -9,10 +9,10 @@ import ( "encoding/csv" "html" "io" - "math" "strings" "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/util" ) func init() { @@ -60,18 +60,18 @@ func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]strin return tmpBlock.Bytes() } +// bestDelimiter scores the input CSV data against delimiters, and returns the best match. +// Reads at most 10k bytes & 10 lines. func (p Parser) bestDelimiter(data []byte) rune { - // Scores the input data against delimiters, and returns the best matching. - // Reads at most 10k bytes & 10 lines. maxLines := 10 - maxBytes := int(math.Min(float64(len(data)), 1e4)) + maxBytes := util.Min(len(data), 1e4) text := string(data[:maxBytes]) - lines := strings.SplitN(text, "\n", maxLines+1)[:maxLines] + lines := strings.SplitN(text, "\n", maxLines+1) + lines = lines[:util.Min(maxLines, len(lines))] delimiters := []rune{',', ';', '\t', '|'} bestDelim := delimiters[0] bestScore := 0.0 - for _, delim := range delimiters { score := p.scoreDelimiter(lines, delim) if score > bestScore { @@ -83,28 +83,26 @@ func (p Parser) bestDelimiter(data []byte) rune { return bestDelim } +// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV func (Parser) scoreDelimiter(lines []string, delim rune) (score float64) { - // Scores a delimiter against input csv data with a count and regularity metric. - - countTotal := 0.0 - countLineMax := 0.0 - linesNotEqual := 0.0 + countTotal := 0 + countLineMax := 0 + linesNotEqual := 0 for _, line := range lines { if len(line) == 0 { continue } - countLine := float64(strings.Count(line, string(delim))) + countLine := strings.Count(line, string(delim)) countTotal += countLine - if countLine != countLineMax { if countLineMax != 0 { linesNotEqual++ } - countLineMax = math.Max(countLine, countLineMax) + countLineMax = util.Max(countLine, countLineMax) } } - return countTotal * (1 - linesNotEqual/float64(len(lines))) + return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines))) } diff --git a/modules/markup/csv/csv_test.go b/modules/markup/csv/csv_test.go index 7fb4ad0007e0c..a3f5bbdd215c8 100644 --- a/modules/markup/csv/csv_test.go +++ b/modules/markup/csv/csv_test.go @@ -18,7 +18,7 @@ func TestRenderCSV(t *testing.T) { "1;2": "
12
", "1\t2": "
12
", "1|2": "
12
", - "1,2,3;4,5,6;7,8,9\na;b;c": "
1,2,34,5,67,8,9
abc
", + "1,2,3;4,5,6;7,8,9\na;b;c": "
1,2,34,5,67,8,9
abc
", "
": "
<br/>
", } From 8f12e60264b791484ca2b3836a050f5904c6685e Mon Sep 17 00:00:00 2001 From: Norwin Roosen Date: Thu, 15 Aug 2019 15:00:33 +0200 Subject: [PATCH 4/6] expose markup type to template previously all markup had the .markdown class, which is incorrect, as it applies markdown CSS & JS logic to CSV rendering --- public/css/index.css | 3 ++- public/less/_repository.less | 4 ++++ routers/repo/view.go | 6 ++++-- templates/repo/view_file.tmpl | 2 +- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/public/css/index.css b/public/css/index.css index b19b85ad36af5..bb37cee68924f 100644 --- a/public/css/index.css +++ b/public/css/index.css @@ -489,6 +489,7 @@ footer .ui.left,footer .ui.right{line-height:40px} .repository.file.list .non-diff-file-content .view-raw img{padding:5px 5px 0 5px} .repository.file.list .non-diff-file-content .plain-text{padding:1em 2em 1em 2em} .repository.file.list .non-diff-file-content .plain-text pre{word-break:break-word;white-space:pre-wrap} +.repository.file.list .non-diff-file-content .csv{overflow-x:auto} .repository.file.list .non-diff-file-content pre{overflow:auto} .repository.file.list .sidebar{padding-left:0} .repository.file.list .sidebar .octicon{width:16px} @@ -1011,4 +1012,4 @@ tbody.commit-list{vertical-align:baseline} .comment-code-cloud .footer:after{clear:both;content:"";display:block} .comment-code-cloud button.comment-form-reply{margin:.5em .5em .5em 4.5em} .comment-code-cloud form.comment-form-reply{margin:0 0 0 4em} -.file-comment{font:12px 'SF Mono',Consolas,Menlo,'Liberation Mono',Monaco,'Lucida Console',monospace;color:rgba(0,0,0,.87)} \ No newline at end of file +.file-comment{font:12px 'SF Mono',Consolas,Menlo,'Liberation Mono',Monaco,'Lucida Console',monospace;color:rgba(0,0,0,.87)} diff --git a/public/less/_repository.less b/public/less/_repository.less index eb183c1626d28..ef05beb6fa399 100644 --- a/public/less/_repository.less +++ b/public/less/_repository.less @@ -400,6 +400,10 @@ } } + .csv { + overflow-x: auto; + } + pre { overflow: auto; } diff --git a/routers/repo/view.go b/routers/repo/view.go index b534ae0e16405..9d24506b47ce9 100644 --- a/routers/repo/view.go +++ b/routers/repo/view.go @@ -162,8 +162,9 @@ func renderDirectory(ctx *context.Context, treeLink string) { d, _ := ioutil.ReadAll(dataRc) buf = charset.ToUTF8WithFallback(append(buf, d...)) - if markup.Type(readmeFile.Name()) != "" { + if markupType := markup.Type(readmeFile.Name()); markupType != "" { ctx.Data["IsMarkup"] = true + ctx.Data["MarkupType"] = string(markupType) ctx.Data["FileContent"] = string(markup.Render(readmeFile.Name(), buf, treeLink, ctx.Repo.Repository.ComposeMetas())) } else { ctx.Data["IsRenderedHTML"] = true @@ -282,8 +283,9 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st readmeExist := markup.IsReadmeFile(blob.Name()) ctx.Data["ReadmeExist"] = readmeExist - if markup.Type(blob.Name()) != "" { + if markupType := markup.Type(blob.Name()); markupType != "" { ctx.Data["IsMarkup"] = true + ctx.Data["MarkupType"] = markupType ctx.Data["FileContent"] = string(markup.Render(blob.Name(), buf, path.Dir(treeLink), ctx.Repo.Repository.ComposeMetas())) } else if readmeExist { ctx.Data["IsRenderedHTML"] = true diff --git a/templates/repo/view_file.tmpl b/templates/repo/view_file.tmpl index 72b1ae7a84e50..895a72aaee5b0 100644 --- a/templates/repo/view_file.tmpl +++ b/templates/repo/view_file.tmpl @@ -45,7 +45,7 @@
-
+
{{if .IsMarkup}} {{if .FileContent}}{{.FileContent | Safe}}{{end}} {{else if .IsRenderedHTML}} From 8d61e12093743b403b69766a1b93157601976996 Mon Sep 17 00:00:00 2001 From: Norwin Roosen Date: Thu, 15 Aug 2019 17:05:11 +0200 Subject: [PATCH 5/6] fix build (missing `make css`) --- public/css/index.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/css/index.css b/public/css/index.css index bb37cee68924f..6b906cc4a8137 100644 --- a/public/css/index.css +++ b/public/css/index.css @@ -1012,4 +1012,4 @@ tbody.commit-list{vertical-align:baseline} .comment-code-cloud .footer:after{clear:both;content:"";display:block} .comment-code-cloud button.comment-form-reply{margin:.5em .5em .5em 4.5em} .comment-code-cloud form.comment-form-reply{margin:0 0 0 4em} -.file-comment{font:12px 'SF Mono',Consolas,Menlo,'Liberation Mono',Monaco,'Lucida Console',monospace;color:rgba(0,0,0,.87)} +.file-comment{font:12px 'SF Mono',Consolas,Menlo,'Liberation Mono',Monaco,'Lucida Console',monospace;color:rgba(0,0,0,.87)} \ No newline at end of file From 0e3e9da35ec4c661c671de6c3ca1a684086dc275 Mon Sep 17 00:00:00 2001 From: Norwin Roosen Date: Thu, 15 Aug 2019 22:06:36 +0200 Subject: [PATCH 6/6] ignore quoted csv content for delimiter scoring also fix html generation --- modules/markup/csv/csv.go | 7 ++++++- modules/markup/csv/csv_test.go | 15 ++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 2a75e770b68eb..1e3acc9b47e2f 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -9,14 +9,18 @@ import ( "encoding/csv" "html" "io" + "regexp" "strings" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/util" ) +var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`) + func init() { markup.RegisterParser(Parser{}) + } // Parser implements markup.Parser for orgmode @@ -53,7 +57,7 @@ func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]strin tmpBlock.WriteString(html.EscapeString(field)) tmpBlock.WriteString("") } - tmpBlock.WriteString("") + tmpBlock.WriteString("") } tmpBlock.WriteString("") @@ -66,6 +70,7 @@ func (p Parser) bestDelimiter(data []byte) rune { maxLines := 10 maxBytes := util.Min(len(data), 1e4) text := string(data[:maxBytes]) + text = quoteRegexp.ReplaceAllLiteralString(text, "") lines := strings.SplitN(text, "\n", maxLines+1) lines = lines[:util.Min(maxLines, len(lines))] diff --git a/modules/markup/csv/csv_test.go b/modules/markup/csv/csv_test.go index a3f5bbdd215c8..4d4e0871e94d4 100644 --- a/modules/markup/csv/csv_test.go +++ b/modules/markup/csv/csv_test.go @@ -13,13 +13,14 @@ import ( func TestRenderCSV(t *testing.T) { var parser Parser var kases = map[string]string{ - "a": "
a
", - "1,2": "
12
", - "1;2": "
12
", - "1\t2": "
12
", - "1|2": "
12
", - "1,2,3;4,5,6;7,8,9\na;b;c": "
1,2,34,5,67,8,9
abc
", - "
": "
<br/>
", + "a": "
a
", + "1,2": "
12
", + "1;2": "
12
", + "1\t2": "
12
", + "1|2": "
12
", + "1,2,3;4,5,6;7,8,9\na;b;c": "
1,2,34,5,67,8,9
abc
", + "\"1,2,3,4\";\"a\nb\"\nc;d": "
1,2,3,4a\nb
cd
", + "
": "
<br/>
", } for k, v := range kases {