Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Idx histogram manipulation #2142

Merged
merged 14 commits into from
Nov 13, 2023
7 changes: 7 additions & 0 deletions enginetest/enginetests.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ func TestStatistics(t *testing.T, harness Harness) {
}
}

// TestStatisticIndexFilters tests index histogram costing
func TestStatisticIndexFilters(t *testing.T, harness Harness) {
for _, script := range queries.StatsIndexTests {
TestScript(t, harness, script)
}
}

// TestStatisticsPrepared tests the statistics from ANALYZE TABLE
func TestStatisticsPrepared(t *testing.T, harness Harness) {
for _, script := range queries.StatisticsQueries {
Expand Down
27 changes: 23 additions & 4 deletions enginetest/evaluation.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ func TestScriptWithEngine(t *testing.T, e QueryEngine, harness Harness, script q
assertion.ExpectedWarningMessageSubstring, assertion.SkipResultsCheck)
} else if assertion.SkipResultsCheck {
RunQuery(t, e, harness, assertion.Query)
} else if assertion.CheckIndexedAccess {
TestQueryWithIndexCheck(t, ctx, e, harness, assertion.Query, assertion.Expected, assertion.ExpectedColumns, assertion.Bindings)
} else if assertion.CheckIndexedAccess || assertion.IndexName != "" {
TestQueryWithIndexCheck(t, ctx, e, harness, assertion.Query, assertion.IndexName, assertion.Expected, assertion.ExpectedColumns, assertion.Bindings)
} else {
TestQueryWithContext(t, ctx, e, harness, assertion.Query, assertion.Expected, assertion.ExpectedColumns, assertion.Bindings)
}
Expand Down Expand Up @@ -346,7 +346,21 @@ func TestQueryWithContext(t *testing.T, ctx *sql.Context, e QueryEngine, harness
validateEngine(t, ctx, harness, e)
}

func TestQueryWithIndexCheck(t *testing.T, ctx *sql.Context, e QueryEngine, harness Harness, q string, expected []sql.Row, expectedCols []*sql.Column, bindings map[string]*querypb.BindVariable) {
func GetFilterIndex(n sql.Node) sql.IndexLookup {
var lookup sql.IndexLookup
transform.InspectUp(n, func(n sql.Node) bool {
switch n := n.(type) {
case *plan.IndexedTableAccess:
lookup = plan.GetIndexLookup(n)
return true
default:
return false
}
})
return lookup
}

func TestQueryWithIndexCheck(t *testing.T, ctx *sql.Context, e QueryEngine, harness Harness, q string, expIndex string, expected []sql.Row, expectedCols []*sql.Column, bindings map[string]*querypb.BindVariable) {
ctx = ctx.WithQuery(q)
require := require.New(t)
if len(bindings) > 0 {
Expand All @@ -357,7 +371,12 @@ func TestQueryWithIndexCheck(t *testing.T, ctx *sql.Context, e QueryEngine, harn
if !IsServerEngine(e) {
node, err := e.AnalyzeQuery(ctx, q)
require.NoError(err, "Unexpected error for query %s: %s", q, err)
require.True(CheckIndexedAccess(node), "expected plan to have index, but found: %s", sql.DebugString(node))
if expIndex != "" {
lookup := GetFilterIndex(node)
require.Equal(strings.ToLower(expIndex), strings.ToLower(lookup.Index.ID()))
} else {
require.True(CheckIndexedAccess(node), "expected plan to have index, but found: %s", sql.DebugString(node))
}
}

sch, iter, err := e.QueryWithBindings(ctx, q, nil, bindings)
Expand Down
4 changes: 4 additions & 0 deletions enginetest/memory_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,10 @@ func TestStatistics(t *testing.T) {
enginetest.TestStatistics(t, enginetest.NewDefaultMemoryHarness())
}

func TestStatisticIndexFilters(t *testing.T) {
enginetest.TestStatisticIndexFilters(t, enginetest.NewDefaultMemoryHarness())
}

func TestSpatialInsertInto(t *testing.T) {
enginetest.TestSpatialInsertInto(t, enginetest.NewDefaultMemoryHarness())
}
Expand Down
145 changes: 0 additions & 145 deletions enginetest/queries/information_schema_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,9 @@
package queries

import (
"time"

"github.com/dolthub/vitess/go/sqltypes"

"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/stats"
"github.com/dolthub/go-mysql-server/sql/types"
)

Expand Down Expand Up @@ -1631,145 +1628,3 @@ var SkippedInfoSchemaScripts = []ScriptTest{
},
},
}

var StatisticsQueries = []ScriptTest{
{
Name: "analyze single int column",
SetUpScript: []string{
"CREATE TABLE t (i bigint primary key)",
"INSERT INTO t VALUES (1), (2), (3)",
"ANALYZE TABLE t",
},
Assertions: []ScriptTestAssertion{
{
Query: "SELECT * FROM information_schema.column_statistics",
Expected: []sql.Row{
{"mydb", "t", "i", stats.NewStatistic(3, 3, 0, 24, time.Now(), sql.NewStatQualifier("mydb", "t", "primary"), []string{"i"}, []sql.Type{types.Int64}, []*stats.Bucket{
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(1)}, nil, nil),
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(2)}, nil, nil),
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(3)}, nil, nil),
}, sql.IndexClassDefault),
},
},
},
},
},
{
Name: "analyze update/drop",
SetUpScript: []string{
"CREATE TABLE t (i bigint primary key, j bigint, key(j))",
"INSERT INTO t VALUES (1, 4), (2, 5), (3, 6)",
},
Assertions: []ScriptTestAssertion{
{
Query: "analyze table t update histogram on (i) using data '{\"row_count\": 40, \"distinct_count\": 40, \"null_count\": 1, \"buckets\": [{\"row_count\": 20, \"distinct_count\": 20, \"upper_bound\": [50], \"bound_count\": 1}, {\"row_count\": 20, \"distinct_count\": 20, \"upper_bound\": [80], \"bound_count\": 1}]}'",
Expected: []sql.Row{{"t", "histogram", "status", "OK"}},
},
{
Query: "SELECT * FROM information_schema.column_statistics",
Expected: []sql.Row{
{"mydb", "t", "i", stats.NewStatistic(40, 40, 1, 0, time.Now(), sql.NewStatQualifier("mydb", "t", "primary"), []string{"i"}, []sql.Type{types.Int64}, []*stats.Bucket{
stats.NewHistogramBucket(20, 20, 0, 1, sql.Row{float64(50)}, nil, nil),
stats.NewHistogramBucket(20, 20, 0, 1, sql.Row{float64(80)}, nil, nil),
}, sql.IndexClassDefault),
},
},
},
{
Query: "analyze table t drop histogram on (i)",
Expected: []sql.Row{{"t", "histogram", "status", "OK"}},
},
{
Query: "SELECT * FROM information_schema.column_statistics",
Expected: []sql.Row{},
},
},
},
{
Name: "analyze two int columns",
SetUpScript: []string{
"CREATE TABLE t (i bigint primary key, j bigint, key(j))",
"INSERT INTO t VALUES (1, 4), (2, 5), (3, 6)",
"ANALYZE TABLE t",
},
Assertions: []ScriptTestAssertion{
{
Query: "SELECT * FROM information_schema.column_statistics",
Expected: []sql.Row{
{"mydb", "t", "i", stats.NewStatistic(3, 3, 0, 48, time.Now(), sql.NewStatQualifier("mydb", "t", "primary"), []string{"i"}, []sql.Type{types.Int64}, []*stats.Bucket{
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(1)}, nil, []sql.Row{}),
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(2)}, nil, []sql.Row{}),
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(3)}, nil, []sql.Row{}),
}, sql.IndexClassDefault),
},
{"mydb", "t", "j", stats.NewStatistic(3, 3, 0, 48, time.Now(), sql.NewStatQualifier("mydb", "t", "j"), []string{"j"}, []sql.Type{types.Int64}, []*stats.Bucket{
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(4)}, nil, []sql.Row{}),
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(5)}, nil, []sql.Row{}),
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{int64(6)}, nil, []sql.Row{}),
}, sql.IndexClassDefault),
},
},
},
},
},
{
Name: "analyze float columns",
SetUpScript: []string{
"CREATE TABLE t (i double primary key)",
"INSERT INTO t VALUES (1.25), (45.25), (7.5), (10.5)",
"ANALYZE TABLE t",
},
Assertions: []ScriptTestAssertion{
{
Query: "SELECT * FROM information_schema.column_statistics",
Expected: []sql.Row{
{"mydb", "t", "i", stats.NewStatistic(4, 4, 0, 32, time.Now(), sql.NewStatQualifier("mydb", "t", "primary"), []string{"i"}, []sql.Type{types.Float64}, []*stats.Bucket{
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{float64(1.25)}, nil, []sql.Row{}),
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{float64(7.5)}, nil, []sql.Row{}),
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{float64(10.5)}, nil, []sql.Row{}),
stats.NewHistogramBucket(1, 1, 0, 1, sql.Row{float64(45.25)}, nil, []sql.Row{}),
}, sql.IndexClassDefault),
},
},
},
},
},
{
Name: "analyze empty table creates stats with 0s",
SetUpScript: []string{
"CREATE TABLE t (i float)",
"ANALYZE TABLE t",
},
Assertions: []ScriptTestAssertion{
{
Query: "SELECT * FROM information_schema.column_statistics",
Expected: []sql.Row{},
},
},
},
{
Name: "analyze columns that can't be converted to float throws error",
SetUpScript: []string{
"CREATE TABLE t (t longtext)",
"INSERT INTO t VALUES ('not a number')",
"ANALYZE TABLE t",
},
Assertions: []ScriptTestAssertion{
{
Query: "SELECT * FROM information_schema.column_statistics",
Expected: []sql.Row{},
},
},
},
{
Query: `
SELECT
COLUMN_NAME,
JSON_EXTRACT(HISTOGRAM, '$."number-of-buckets-specified"')
FROM information_schema.COLUMN_STATISTICS
WHERE SCHEMA_NAME = 'mydb'
AND TABLE_NAME = 'mytable'
`,
Expected: nil,
},
}
1 change: 1 addition & 0 deletions enginetest/queries/script_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ type ScriptTestAssertion struct {

// CheckIndexedAccess indicates whether we should verify the query plan uses an index
CheckIndexedAccess bool
IndexName string
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't dig into the details, but noticed we have an ExpectedIndexes []string field already. Seems like they're both trying to model the same thing. Any chance we can condense to just one?

}

// ScriptTests are a set of test scripts to run.
Expand Down
Loading