Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Full-Text Fixes Pt. 3 #1942

Merged
merged 1 commit into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 199 additions & 5 deletions enginetest/queries/fulltext_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package queries

import (
"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/plan"
"github.com/dolthub/go-mysql-server/sql/types"
)

Expand Down Expand Up @@ -245,6 +246,106 @@ var FulltextTests = []ScriptTest{
},
},
},
{
Name: "Basic UPDATE and DELETE checks",
SetUpScript: []string{
"CREATE TABLE test (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200), v2 VARCHAR(200), FULLTEXT idx (v1, v2));",
"INSERT INTO test VALUES (1, 'abc', 'def pqr'), (2, 'ghi', 'jkl'), (3, 'mno', 'mno'), (4, 'stu vwx', 'xyz zyx yzx'), (5, 'ghs', 'mno shg');",
},
Assertions: []ScriptTestAssertion{
{
Query: "SELECT * FROM test WHERE MATCH(v1, v2) AGAINST ('ghi');",
Expected: []sql.Row{{uint64(2), "ghi", "jkl"}},
},
{
Query: "UPDATE test SET v1 = 'rgb' WHERE pk = 2;",
Expected: []sql.Row{{types.OkResult{RowsAffected: 1, Info: plan.UpdateInfo{Matched: 1, Updated: 1}}}},
},
{
Query: "SELECT * FROM test WHERE MATCH(v1, v2) AGAINST ('ghi');",
Expected: []sql.Row{},
},
{
Query: "SELECT * FROM test WHERE MATCH(v1, v2) AGAINST ('rgb');",
Expected: []sql.Row{{uint64(2), "rgb", "jkl"}},
},
{
Query: "UPDATE test SET v2 = 'mno' WHERE pk = 2;",
Expected: []sql.Row{{types.OkResult{RowsAffected: 1, Info: plan.UpdateInfo{Matched: 1, Updated: 1}}}},
},
{
Query: "SELECT * FROM test WHERE MATCH(v1, v2) AGAINST ('mno');",
Expected: []sql.Row{{uint64(2), "rgb", "mno"}, {uint64(3), "mno", "mno"}, {uint64(5), "ghs", "mno shg"}},
},
{
Query: "DELETE FROM test WHERE pk = 3;",
Expected: []sql.Row{{types.NewOkResult(1)}},
},
{
Query: "SELECT * FROM test WHERE MATCH(v1, v2) AGAINST ('mno');",
Expected: []sql.Row{{uint64(2), "rgb", "mno"}, {uint64(5), "ghs", "mno shg"}},
},
},
},
{
Name: "Collation handling",
SetUpScript: []string{
"CREATE TABLE test1 (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200) COLLATE utf8mb4_0900_bin, v2 VARCHAR(200) COLLATE utf8mb4_0900_bin, FULLTEXT idx (v1, v2));",
"CREATE TABLE test2 (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200) COLLATE utf8mb4_0900_ai_ci, v2 VARCHAR(200) COLLATE utf8mb4_0900_ai_ci, FULLTEXT idx (v1, v2));",
"INSERT INTO test1 VALUES (1, 'abc', 'def pqr'), (2, 'ghi', 'jkl'), (3, 'mno', 'mno'), (4, 'stu vwx', 'xyz zyx yzx'), (5, 'ghs', 'mno shg');",
"INSERT INTO test2 VALUES (1, 'abc', 'def pqr'), (2, 'ghi', 'jkl'), (3, 'mno', 'mno'), (4, 'stu vwx', 'xyz zyx yzx'), (5, 'ghs', 'mno shg');",
},
Assertions: []ScriptTestAssertion{
{
Query: "SELECT * FROM test1 WHERE MATCH(v1, v2) AGAINST ('ghi');",
Expected: []sql.Row{{uint64(2), "ghi", "jkl"}},
},
{
Query: "SELECT * FROM test1 WHERE MATCH(v2, v1) AGAINST ('jkl') = 0;",
Expected: []sql.Row{{uint64(1), "abc", "def pqr"}, {uint64(3), "mno", "mno"}, {uint64(4), "stu vwx", "xyz zyx yzx"}, {uint64(5), "ghs", "mno shg"}},
},
{
Query: "SELECT * FROM test1 WHERE MATCH(v2, v1) AGAINST ('jkl mno') AND pk = 3;",
Expected: []sql.Row{{uint64(3), "mno", "mno"}},
},
{
Query: "SELECT * FROM test1 WHERE MATCH(v1, v2) AGAINST ('GHI');",
Expected: []sql.Row{},
},
{
Query: "SELECT * FROM test1 WHERE MATCH(v2, v1) AGAINST ('JKL') = 0;",
Expected: []sql.Row{{uint64(1), "abc", "def pqr"}, {uint64(2), "ghi", "jkl"}, {uint64(3), "mno", "mno"}, {uint64(4), "stu vwx", "xyz zyx yzx"}, {uint64(5), "ghs", "mno shg"}},
},
{
Query: "SELECT * FROM test1 WHERE MATCH(v2, v1) AGAINST ('JKL MNO') AND pk = 3;",
Expected: []sql.Row{},
},
{
Query: "SELECT * FROM test2 WHERE MATCH(v1, v2) AGAINST ('ghi');",
Expected: []sql.Row{{uint64(2), "ghi", "jkl"}},
},
{
Query: "SELECT * FROM test2 WHERE MATCH(v2, v1) AGAINST ('jkl') = 0;",
Expected: []sql.Row{{uint64(1), "abc", "def pqr"}, {uint64(3), "mno", "mno"}, {uint64(4), "stu vwx", "xyz zyx yzx"}, {uint64(5), "ghs", "mno shg"}},
},
{
Query: "SELECT * FROM test2 WHERE MATCH(v2, v1) AGAINST ('jkl mno') AND pk = 3;",
Expected: []sql.Row{{uint64(3), "mno", "mno"}},
},
{
Query: "SELECT * FROM test2 WHERE MATCH(v1, v2) AGAINST ('GHI');",
Expected: []sql.Row{{uint64(2), "ghi", "jkl"}},
},
{
Query: "SELECT * FROM test2 WHERE MATCH(v2, v1) AGAINST ('JKL') = 0;",
Expected: []sql.Row{{uint64(1), "abc", "def pqr"}, {uint64(3), "mno", "mno"}, {uint64(4), "stu vwx", "xyz zyx yzx"}, {uint64(5), "ghs", "mno shg"}},
},
{
Query: "SELECT * FROM test2 WHERE MATCH(v2, v1) AGAINST ('JKL MNO') AND pk = 3;",
Expected: []sql.Row{{uint64(3), "mno", "mno"}},
},
},
},
{
Name: "CREATE INDEX before insertions",
SetUpScript: []string{
Expand Down Expand Up @@ -444,13 +545,69 @@ var FulltextTests = []ScriptTest{
{
Name: "ALTER TABLE DROP COLUMN used by index",
SetUpScript: []string{
"CREATE TABLE test (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200), v2 VARCHAR(200), FULLTEXT idx (v1, v2));",
"INSERT INTO test VALUES (1, 'abc', 'def pqr'), (2, 'ghi', 'jkl'), (3, 'mno', 'mno'), (4, 'stu vwx', 'xyz zyx yzx'), (5, 'ghs', 'mno shg');",
"CREATE TABLE test (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200), v2 VARCHAR(200), v3 VARCHAR(200), FULLTEXT idx1 (v1, v2), FULLTEXT idx2 (v2), FULLTEXT idx3 (v2, v3));",
"INSERT INTO test VALUES (1, 'abc', 'def', 'ghi');",
},
Assertions: []ScriptTestAssertion{
{
Query: "ALTER TABLE test DROP COLUMN v2;",
ExpectedErr: sql.ErrFullTextMissingColumn,
Query: "SELECT * FROM test WHERE MATCH(v1, v2) AGAINST ('abc');",
Expected: []sql.Row{{uint64(1), "abc", "def", "ghi"}},
},
{
Query: "SELECT * FROM test WHERE MATCH(v2) AGAINST ('def');",
Expected: []sql.Row{{uint64(1), "abc", "def", "ghi"}},
},
{
Query: "SELECT * FROM test WHERE MATCH(v2, v3) AGAINST ('ghi');",
Expected: []sql.Row{{uint64(1), "abc", "def", "ghi"}},
},
{
Query: "SHOW CREATE TABLE test;",
Expected: []sql.Row{{"test", "CREATE TABLE `test` (\n `pk` bigint unsigned NOT NULL,\n `v1` varchar(200),\n `v2` varchar(200),\n `v3` varchar(200),\n PRIMARY KEY (`pk`),\n FULLTEXT KEY `idx1` (`v1`,`v2`),\n FULLTEXT KEY `idx2` (`v2`),\n FULLTEXT KEY `idx3` (`v2`,`v3`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_bin"}},
},
{
Query: "ALTER TABLE test DROP COLUMN v2;",
Expected: []sql.Row{{types.NewOkResult(0)}},
},
{
Query: "SELECT * FROM test WHERE MATCH(v1, v2) AGAINST ('abc');",
ExpectedErr: sql.ErrColumnNotFound,
},
{
Query: "SELECT * FROM test WHERE MATCH(v2) AGAINST ('def');",
ExpectedErr: sql.ErrColumnNotFound,
},
{
Query: "SELECT * FROM test WHERE MATCH(v2, v3) AGAINST ('ghi');",
ExpectedErr: sql.ErrColumnNotFound,
},
{
Query: "SELECT * FROM test WHERE MATCH(v1) AGAINST ('abc');",
Expected: []sql.Row{{uint64(1), "abc", "ghi"}},
},
{
Query: "SELECT * FROM test WHERE MATCH(v3) AGAINST ('ghi');",
Expected: []sql.Row{{uint64(1), "abc", "ghi"}},
},
{
Query: "SHOW CREATE TABLE test;",
Expected: []sql.Row{{"test", "CREATE TABLE `test` (\n `pk` bigint unsigned NOT NULL,\n `v1` varchar(200),\n `v3` varchar(200),\n PRIMARY KEY (`pk`),\n FULLTEXT KEY `idx1` (`v1`),\n FULLTEXT KEY `idx3` (`v3`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_bin"}},
},
{
Query: "ALTER TABLE test DROP COLUMN v3;",
Expected: []sql.Row{{types.NewOkResult(0)}},
},
{
Query: "SELECT * FROM test WHERE MATCH(v1) AGAINST ('abc');",
Expected: []sql.Row{{uint64(1), "abc"}},
},
{
Query: "SELECT * FROM test WHERE MATCH(v3) AGAINST ('ghi');",
ExpectedErr: sql.ErrColumnNotFound,
},
{
Query: "SHOW CREATE TABLE test;",
Expected: []sql.Row{{"test", "CREATE TABLE `test` (\n `pk` bigint unsigned NOT NULL,\n `v1` varchar(200),\n PRIMARY KEY (`pk`),\n FULLTEXT KEY `idx1` (`v1`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_bin"}},
},
},
},
Expand Down Expand Up @@ -489,7 +646,7 @@ var FulltextTests = []ScriptTest{
},
},
{
Name: "ALTER TABLE DROP PRIMARY KEY",
Name: "ALTER TABLE DROP TABLE",
SetUpScript: []string{
"CREATE TABLE test (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200), v2 VARCHAR(200), FULLTEXT idx (v1, v2));",
"INSERT INTO test VALUES (1, 'abc', 'def pqr'), (2, 'ghi', 'jkl'), (3, 'mno', 'mno'), (4, 'stu vwx', 'xyz zyx yzx'), (5, 'ghs', 'mno shg');",
Expand All @@ -501,6 +658,27 @@ var FulltextTests = []ScriptTest{
},
},
},
{
Name: "TRUNCATE TABLE",
SetUpScript: []string{
"CREATE TABLE test (pk BIGINT UNSIGNED PRIMARY KEY, v1 VARCHAR(200), v2 VARCHAR(200), FULLTEXT idx (v1, v2));",
"INSERT INTO test VALUES (1, 'abc', 'def pqr'), (2, 'ghi', 'jkl'), (3, 'mno', 'mno'), (4, 'stu vwx', 'xyz zyx yzx'), (5, 'ghs', 'mno shg');",
},
Assertions: []ScriptTestAssertion{
{
Query: "SELECT * FROM test WHERE MATCH(v1, v2) AGAINST ('ghi');",
Expected: []sql.Row{{uint64(2), "ghi", "jkl"}},
},
{
Query: "TRUNCATE TABLE test;",
Expected: []sql.Row{{types.NewOkResult(5)}},
},
{
Query: "SELECT * FROM test WHERE MATCH(v1, v2) AGAINST ('ghi');",
Expected: []sql.Row{},
},
},
},
{
Name: "No prefix needed for TEXT columns",
Assertions: []ScriptTestAssertion{
Expand Down Expand Up @@ -648,4 +826,20 @@ var FulltextTests = []ScriptTest{
},
},
},
{
Name: "Foreign keys ignore Full-Text indexes",
SetUpScript: []string{
"CREATE TABLE parent (pk BIGINT, v1 VARCHAR(200), FULLTEXT idx (v1));",
},
Assertions: []ScriptTestAssertion{
{
Query: "CREATE TABLE child1 (pk BIGINT, v1 VARCHAR(200), FULLTEXT idx (v1), CONSTRAINT fk FOREIGN KEY (v1) REFERENCES parent(v1));",
ExpectedErr: sql.ErrForeignKeyMissingReferenceIndex,
},
{
Query: "CREATE TABLE child2 (pk BIGINT, v1 VARCHAR(200), INDEX idx (v1), CONSTRAINT fk FOREIGN KEY (v1) REFERENCES parent(v1));",
ExpectedErr: sql.ErrForeignKeyMissingReferenceIndex,
},
},
},
}
4 changes: 2 additions & 2 deletions sql/analyzer/apply_foreign_keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ func getForeignKeyReferences(ctx *sql.Context, a *Analyzer, tbl sql.ForeignKeyTa
}
}

parentIndex, ok, err := plan.FindIndexWithPrefix(ctx, parentTbl, fk.ParentColumns, true)
parentIndex, ok, err := plan.FindFKIndexWithPrefix(ctx, parentTbl, fk.ParentColumns, true)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -363,7 +363,7 @@ func getForeignKeyRefActions(ctx *sql.Context, a *Analyzer, tbl sql.ForeignKeyTa
}
}

childIndex, ok, err := plan.FindIndexWithPrefix(ctx, childTbl, fk.Columns, false)
childIndex, ok, err := plan.FindFKIndexWithPrefix(ctx, childTbl, fk.Columns, false)
if err != nil {
return nil, err
}
Expand Down
3 changes: 3 additions & 0 deletions sql/analyzer/match_against.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ func processMatchAgainst(ctx *sql.Context, matchAgainstExpr *expression.MatchAga
if !ok {
return nil, transform.NewTree, fmt.Errorf("cannot use MATCH ... AGAINST ... on a table that does not declare indexes")
}
if _, ok = indexedTbl.(sql.StatisticsTable); !ok {
return nil, transform.NewTree, fmt.Errorf("cannot use MATCH ... AGAINST ... on a table that does not implement sql.StatisticsTable")
}

// Verify the indexes that have been set
ftIndex := matchAgainstExpr.GetIndex()
Expand Down
33 changes: 25 additions & 8 deletions sql/expression/matchagainst.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package expression

import (
"fmt"
"math"
"strings"
"sync"

Expand Down Expand Up @@ -49,6 +50,7 @@ type MatchAgainst struct {
docCountIndex sql.Index
globalCountIndex sql.Index
rowCountIndex sql.Index
parentRowCount uint64
}

var _ sql.Expression = (*MatchAgainst)(nil)
Expand Down Expand Up @@ -267,6 +269,12 @@ func (expr *MatchAgainst) inNaturalLanguageMode(ctx *sql.Context, row sql.Row) (
err = nErr
return
}
// Load the number of rows from the parent table, since it's used in the relevancy calculation
expr.parentRowCount, nErr = expr.ParentTable.(sql.StatisticsTable).RowCount(ctx)
if nErr != nil {
err = nErr
return
}
})
if err != nil {
return 0, err
Expand Down Expand Up @@ -310,13 +318,18 @@ func (expr *MatchAgainst) inNaturalLanguageMode(ctx *sql.Context, row sql.Row) (
if err != nil {
return 0, err
}
// This did not match, so we continue
if len(docCountRows) == 0 {
// This did not match, so we continue
continue
} else if len(docCountRows) > 1 {
return 0, fmt.Errorf("somehow there are duplicate entries within the Full-Text doc count table")
}
docCountRow := docCountRows[0]
docCount := float64(docCountRow[len(docCountRow)-1].(uint64))
if docCount == 0 {
// We've got an empty document count, so the word does not match (so it should have been deleted)
continue
}

// Otherwise, we've found a match, so we'll grab the global count as well
lookup = sql.IndexLookup{Ranges: []sql.Range{
Expand Down Expand Up @@ -363,17 +376,21 @@ func (expr *MatchAgainst) inNaturalLanguageMode(ctx *sql.Context, row sql.Row) (
rowCountRow := rowCountRows[0]

// Calculate the relevancy (partially based on an old MySQL implementation)
//TODO: use an actual algorithm with a good distribution, however we're focusing on correctly returned results for now
docCount := float32(docCountRow[len(docCountRow)-1].(uint64))
globalCount := float32(globalCountRow[len(globalCountRow)-1].(uint64))
uniqueWords := float32(rowCountRow[2].(uint64))
fp := docCount / globalCount
sp := 1 + 1/(1+0.115*uniqueWords)
accumulatedRelevancy += fp * sp
// https://web.archive.org/web/20220122170304/http://dev.mysql.com/doc/internals/en/full-text-search.html
globalCount := float64(globalCountRow[len(globalCountRow)-1].(uint64))
uniqueWords := float64(rowCountRow[2].(uint64))
base := math.Log(docCount) + 1
normFactor := uniqueWords / (1 + 0.115*uniqueWords)
globalMult := math.Log(float64(expr.parentRowCount)/globalCount) + 1
accumulatedRelevancy += float32(base * normFactor * globalMult)
Hydrocharged marked this conversation as resolved.
Show resolved Hide resolved
}
if err != nil {
return 0, err
}
// Due to how we handle floating to bool conversion, we need to add 0.5 if the result is positive
if accumulatedRelevancy > 0 {
accumulatedRelevancy += 0.5
}
// Return the accumulated relevancy from all of the parsed words
return accumulatedRelevancy, nil
}
Expand Down
Loading