From 02109a8c86a4e6f3319a7ba91bd49c3aba932fd9 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Fri, 14 Jul 2023 15:13:28 -0700 Subject: [PATCH 01/46] Outline `getColumnRefFromScalar` from `addMergeJoins` --- sql/analyzer/indexed_joins.go | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 7b5aab9fce..d03149cdb2 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -658,6 +658,18 @@ func satisfiesScalarRefs(e memo.ScalarExpr, grp *memo.ExprGroup) bool { return e.Group().ScalarProps().Tables.Difference(grp.RelProps.OutputTables()).Len() == 0 } +func getColumnRefFromScalar(s memo.ScalarExpr) *memo.ColRef { + var result *memo.ColRef + memo.DfsScalar(s, func(e memo.ScalarExpr) (err error) { + if c, ok := e.(*memo.ColRef); ok { + result = c + return memo.HaltErr + } + return + }) + return result +} + // addMergeJoins will add merge join operators to join relations // with native indexes providing sort enforcement on an equality // filter. @@ -713,22 +725,8 @@ func addMergeJoins(m *memo.Memo) error { continue } - var lRef *memo.ColRef - memo.DfsScalar(l.Scalar, func(e memo.ScalarExpr) (err error) { - if c, ok := e.(*memo.ColRef); ok { - lRef = c - return memo.HaltErr - } - return - }) - var rRef *memo.ColRef - memo.DfsScalar(r.Scalar, func(e memo.ScalarExpr) (err error) { - if c, ok := e.(*memo.ColRef); ok { - rRef = c - return memo.HaltErr - } - return - }) + lRef := getColumnRefFromScalar(l.Scalar) + rRef := getColumnRefFromScalar(r.Scalar) // check that comparer is not non-decreasing if !isWeaklyMonotonic(l.Scalar) || !isWeaklyMonotonic(r.Scalar) { From 54ef154510869b67725e2d70873d1a2df1f830e1 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Fri, 14 Jul 2023 17:37:07 -0700 Subject: [PATCH 02/46] Create new join type `JoinTypeSlidingRange` and `JoinTypeOuterLeftSlidingRange` --- sql/plan/join.go | 17 +++++++++++++++-- sql/plan/jointype_string.go | 27 +++++++++++++++------------ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/sql/plan/join.go b/sql/plan/join.go index 805e451f94..893bb5803f 100644 --- a/sql/plan/join.go +++ b/sql/plan/join.go @@ -45,6 +45,8 @@ const ( JoinTypeLeftOuterHashExcludeNulls // LeftOuterHashJoinExcludeNulls JoinTypeMerge // MergeJoin JoinTypeLeftOuterMerge // LeftOuterMergeJoin + JoinTypeSlidingRange // SlidingRangeJoin + JoinTypeLeftOuterSlidingRange // LeftOuterSlidingRangeJoin JoinTypeSemiHash // SemiHashJoin JoinTypeAntiHash // AntiHashJoin JoinTypeSemiLookup // SemiLookupJoin @@ -61,7 +63,7 @@ const ( func (i JoinType) IsLeftOuter() bool { switch i { - case JoinTypeLeftOuter, JoinTypeLeftOuterExcludeNulls, JoinTypeLeftOuterLookup, JoinTypeLeftOuterHash, JoinTypeLeftOuterHashExcludeNulls, JoinTypeLeftOuterMerge: + case JoinTypeLeftOuter, JoinTypeLeftOuterExcludeNulls, JoinTypeLeftOuterLookup, JoinTypeLeftOuterHash, JoinTypeLeftOuterHashExcludeNulls, JoinTypeLeftOuterMerge, JoinTypeLeftOuterSlidingRange: return true default: return false @@ -93,7 +95,7 @@ func (i JoinType) IsPhysical() bool { JoinTypeSemiLookup, JoinTypeSemiMerge, JoinTypeSemiHash, JoinTypeHash, JoinTypeLeftOuterHash, JoinTypeLeftOuterHashExcludeNulls, JoinTypeMerge, JoinTypeLeftOuterMerge, - JoinTypeAntiLookup, JoinTypeAntiMerge, JoinTypeAntiHash: + JoinTypeAntiLookup, JoinTypeAntiMerge, JoinTypeAntiHash, JoinTypeSlidingRange, JoinTypeLeftOuterSlidingRange: return true default: return false @@ -209,6 +211,17 @@ func (i JoinType) AsHash() JoinType { } } +func (i JoinType) AsSlidingRange() JoinType { + switch i { + case JoinTypeInner: + return JoinTypeSlidingRange + case JoinTypeLeftOuter: + return JoinTypeLeftOuterSlidingRange + default: + return i + } +} + func (i JoinType) AsMerge() JoinType { switch i { case JoinTypeInner: diff --git a/sql/plan/jointype_string.go b/sql/plan/jointype_string.go index c25c191f79..3648d8eec8 100644 --- a/sql/plan/jointype_string.go +++ b/sql/plan/jointype_string.go @@ -26,21 +26,24 @@ func _() { _ = x[JoinTypeLeftOuterHashExcludeNulls-15] _ = x[JoinTypeMerge-16] _ = x[JoinTypeLeftOuterMerge-17] - _ = x[JoinTypeSemiHash-18] - _ = x[JoinTypeAntiHash-19] - _ = x[JoinTypeSemiLookup-20] - _ = x[JoinTypeAntiLookup-21] - _ = x[JoinTypeSemiMerge-22] - _ = x[JoinTypeAntiMerge-23] - _ = x[JoinTypeNatural-24] - _ = x[JoinTypeLateralCross-25] - _ = x[JoinTypeLateralInner-26] - _ = x[JoinTypeLateralLeft-27] + _ = x[JoinTypeSlidingRange-18] + _ = x[JoinTypeLeftOuterSlidingRange-19] + _ = x[JoinTypeSemiHash-20] + _ = x[JoinTypeAntiHash-21] + _ = x[JoinTypeSemiLookup-22] + _ = x[JoinTypeAntiLookup-23] + _ = x[JoinTypeSemiMerge-24] + _ = x[JoinTypeAntiMerge-25] + _ = x[JoinTypeNatural-26] + _ = x[JoinTypeLateralCross-27] + _ = x[JoinTypeLateralInner-28] + _ = x[JoinTypeLateralLeft-29] + _ = x[JoinTypeLateralRight-30] } -const _JoinType_name = "UnknownJoinCrossJoinCrossHashJoinInnerJoinSemiJoinAntiJoinLeftOuterJoinLeftOuterJoinExcludingNullsFullOuterJoinGroupByJoinRightJoinLookupJoinLeftOuterLookupJoinHashJoinLeftOuterHashJoinLeftOuterHashJoinExcludeNullsMergeJoinLeftOuterMergeJoinSemiHashJoinAntiHashJoinSemiLookupJoinAntiLookupJoinSemiMergeJoinAntiMergeJoinNaturalJoinLateralCrossJoinLateralInnerJoinLateralLeftJoin" +const _JoinType_name = "UnknownJoinCrossJoinCrossHashJoinInnerJoinSemiJoinAntiJoinLeftOuterJoinLeftOuterJoinExcludingNullsFullOuterJoinGroupByJoinRightJoinLookupJoinLeftOuterLookupJoinHashJoinLeftOuterHashJoinLeftOuterHashJoinExcludeNullsMergeJoinLeftOuterMergeJoinSlidingRangeJoinLeftOuterSlidingRangeJoinSemiHashJoinAntiHashJoinSemiLookupJoinAntiLookupJoinSemiMergeJoinAntiMergeJoinNaturalJoinLateralCrossJoinLateralInnerJoinLateralLeftJoinLateralLeftJoin" -var _JoinType_index = [...]uint16{0, 11, 20, 33, 42, 50, 58, 71, 98, 111, 122, 131, 141, 160, 168, 185, 214, 223, 241, 253, 265, 279, 293, 306, 319, 330, 346, 362, 377} +var _JoinType_index = [...]uint16{0, 11, 20, 33, 42, 50, 58, 71, 98, 111, 122, 131, 141, 160, 168, 185, 214, 223, 241, 257, 282, 294, 306, 320, 334, 347, 360, 371, 387, 403, 418, 433} func (i JoinType) String() string { if i >= JoinType(len(_JoinType_index)-1) { From 7c4ab43407992666bee0748421c2511ccab25a99 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Fri, 14 Jul 2023 17:38:41 -0700 Subject: [PATCH 03/46] Add `Between` expression to Memoizer. --- optgen/cmd/source/memo.yaml | 6 ++++++ optgen/cmd/support/memo_gen.go | 2 ++ sql/memo/exec_builder.go | 16 ++++++++++++++++ sql/memo/interner.go | 3 +++ sql/memo/memo.go | 16 ++++++++++++++++ sql/memo/memo.og.go | 25 +++++++++++++++++++++++++ 6 files changed, 68 insertions(+) diff --git a/optgen/cmd/source/memo.yaml b/optgen/cmd/source/memo.yaml index 3a91ab2c25..5bffdae6bf 100644 --- a/optgen/cmd/source/memo.yaml +++ b/optgen/cmd/source/memo.yaml @@ -128,6 +128,12 @@ exprs: scalar: true attrs: - [values, "[]*ExprGroup"] +- name: "Between" + scalar: true + attrs: + - [Value, "*ExprGroup"] + - [Min, "*ExprGroup"] + - [Max, "*ExprGroup"] - name: "Hidden" scalar: true attrs: diff --git a/optgen/cmd/support/memo_gen.go b/optgen/cmd/support/memo_gen.go index 2cf1f19541..cfdb196647 100644 --- a/optgen/cmd/support/memo_gen.go +++ b/optgen/cmd/support/memo_gen.go @@ -221,6 +221,8 @@ func (g *MemoGen) genFormatters(defines []ExprDef) { fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: '%%s.%%s'\", r.Gf.Table(), r.Gf.Name())\n", loweredName) case "Bindvar": fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: %%s\", r.Name)\n", loweredName) + case "Between": + fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: %%d, %%d, %%d\", r.Value.Id, r.Min.Id, r.Max.Id)\n", loweredName) case "Hidden": fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: %%s\", r.E)\n", loweredName) case "Tuple": diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index 558be74d57..164965761b 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -549,6 +549,22 @@ func (b *ExecBuilder) buildTuple(e *Tuple, sch sql.Schema) (sql.Expression, erro return expression.NewTuple(values...), nil } +func (b *ExecBuilder) buildBetween(e *Between, sch sql.Schema) (sql.Expression, error) { + value, err := b.buildScalar(e.Value.Scalar, sch) + if err != nil { + return nil, err + } + min, err := b.buildScalar(e.Min.Scalar, sch) + if err != nil { + return nil, err + } + max, err := b.buildScalar(e.Max.Scalar, sch) + if err != nil { + return nil, err + } + return expression.NewBetween(value, min, max), nil +} + func (b *ExecBuilder) buildHidden(e *Hidden, sch sql.Schema) (sql.Expression, error) { ret, _, err := fixidx.FixFieldIndexes(e.g.m.scope, nil, sch, e.E) return ret, err diff --git a/sql/memo/interner.go b/sql/memo/interner.go index 37754a618e..36a3ff51d6 100644 --- a/sql/memo/interner.go +++ b/sql/memo/interner.go @@ -41,6 +41,7 @@ const ( ScalarExprBindvar ScalarExprIsNull ScalarExprTuple + ScalarExprBetween ScalarExprHidden ) @@ -84,6 +85,8 @@ func internExpr(e ScalarExpr) uint64 { for _, c := range e.Values { h.Write([]byte(fmt.Sprintf("%d", internExpr(c.Scalar)))) } + case *Between: + h.Write([]byte(fmt.Sprintf("%d%d%d%d", e.ExprId(), internExpr(e.Value.Scalar), internExpr(e.Min.Scalar), internExpr(e.Max.Scalar)))) case *Regexp: h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), internExpr(e.Left.Scalar), internExpr(e.Right.Scalar)))) case *Not: diff --git a/sql/memo/memo.go b/sql/memo/memo.go index c9981537f3..30899ba6a7 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -146,6 +146,8 @@ func (m *Memo) MemoizeScalar(e sql.Expression) *ExprGroup { scalar = m.memoizeOr(e) case *expression.BindVar: scalar = m.memoizeBindvar(e) + case *expression.Between: + scalar = m.memoizeBetween(e) default: scalar = m.memoizeHidden(e) } @@ -305,6 +307,20 @@ func (m *Memo) memoizeBindvar(e *expression.BindVar) *ExprGroup { return grp } +func (m *Memo) memoizeBetween(e *expression.Between) *ExprGroup { + valueGrp := m.MemoizeScalar(e.Val) + minGrp := m.MemoizeScalar(e.Lower) + maxGrp := m.MemoizeScalar(e.Upper) + scalar := &Between{scalarBase: &scalarBase{}, Value: valueGrp, Min: minGrp, Max: maxGrp} + grp := m.PreexistingScalar(scalar) + if grp != nil { + return grp + } + grp = m.NewExprGroup(scalar) + // TODO scalar props + return grp +} + func (m *Memo) memoizeHidden(e sql.Expression) *ExprGroup { var cols sql.ColSet var tables sql.FastIntSet diff --git a/sql/memo/memo.og.go b/sql/memo/memo.og.go index 898dfc402b..b3d7ef3c65 100644 --- a/sql/memo/memo.og.go +++ b/sql/memo/memo.og.go @@ -875,6 +875,27 @@ func (r *Tuple) Children() []*ExprGroup { return nil } +type Between struct { + *scalarBase + Value *ExprGroup + Min *ExprGroup + Max *ExprGroup +} + +var _ ScalarExpr = (*Between)(nil) + +func (r *Between) ExprId() ScalarExprId { + return ScalarExprBetween +} + +func (r *Between) String() string { + return FormatExpr(r) +} + +func (r *Between) Children() []*ExprGroup { + return nil +} + type Hidden struct { *scalarBase E sql.Expression @@ -986,6 +1007,8 @@ func FormatExpr(r exprType) string { vals[i] = fmt.Sprintf("%d", v.Id) } return fmt.Sprintf("tuple: %s", strings.Join(vals, " ")) + case *Between: + return fmt.Sprintf("between: %d, %d, %d", r.Value.Id, r.Min.Id, r.Max.Id) case *Hidden: return fmt.Sprintf("hidden: %s", r.E) default: @@ -1091,6 +1114,8 @@ func buildScalarExpr(b *ExecBuilder, r ScalarExpr, sch sql.Schema) (sql.Expressi return b.buildIsNull(r, sch) case *Tuple: return b.buildTuple(r, sch) + case *Between: + return b.buildBetween(r, sch) case *Hidden: return b.buildHidden(r, sch) default: From e8763e68559b6b1b32c4d878c302975503ccc618 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Fri, 14 Jul 2023 17:41:56 -0700 Subject: [PATCH 04/46] Add Sliding Range Join expression to Memo. --- optgen/cmd/source/memo.yaml | 4 +++ sql/analyzer/indexed_joins.go | 55 +++++++++++++++++++++++++++++++++++ sql/memo/memo.go | 28 ++++++++++++++++++ sql/memo/memo.og.go | 20 +++++++++++++ 4 files changed, 107 insertions(+) diff --git a/optgen/cmd/source/memo.yaml b/optgen/cmd/source/memo.yaml index 5bffdae6bf..f2bf79857c 100644 --- a/optgen/cmd/source/memo.yaml +++ b/optgen/cmd/source/memo.yaml @@ -13,6 +13,10 @@ exprs: join: true attrs: - [lookup, "*Lookup"] +- name: "SlidingRangeJoin" + join: true + attrs: + - [slidingRange, "*SlidingRange"] - name: "ConcatJoin" join: true attrs: diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index d03149cdb2..b068484800 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -201,6 +201,10 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco if err != nil { return nil, err } + err = addSlidingRangeJoin(m) + if err != nil { + return nil, err + } hints := memo.ExtractJoinHint(n) for _, h := range hints { @@ -651,6 +655,57 @@ func addHashJoins(m *memo.Memo) error { }) } +func addSlidingRangeJoin(m *memo.Memo) error { + return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { + switch e.(type) { + case *memo.InnerJoin, *memo.LeftJoin: + default: + return nil + } + + join := e.(memo.JoinRel).JoinPrivate() + if len(join.Filter) != 1 { + return nil + } + + filter := join.Filter[0] + + switch f := filter.(type) { + case *memo.Between: + if !(satisfiesScalarRefs(f.Value.Scalar, join.Left) && + satisfiesScalarRefs(f.Min.Scalar, join.Right) && + satisfiesScalarRefs(f.Max.Scalar, join.Right)) { + return nil + } + // TODO: Is this safe? If the expression references multiple columns, does this reference one + // arbitrarily? + valueColRef := getColumnRefFromScalar(f.Value.Scalar) + minColRef := getColumnRefFromScalar(f.Min.Scalar) + maxColRef := getColumnRefFromScalar(f.Max.Scalar) + if valueColRef == nil || minColRef == nil || maxColRef == nil { + return nil + } + + rel := &memo.SlidingRangeJoin{ + JoinBase: join.Copy(), + } + rel.SlidingRange = &memo.SlidingRange{ + ValueCol: valueColRef, + MinColRef: minColRef, + MaxColRef: maxColRef, + Parent: rel.JoinBase, + } + rel.Op = rel.Op.AsSlidingRange() + e.Group().Prepend(rel) + + return nil + default: + return nil + } + + }) +} + // satisfiesScalarRefs returns true if all GetFields in the expression // are columns provided by |grp| func satisfiesScalarRefs(e memo.ScalarExpr, grp *memo.ExprGroup) bool { diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 30899ba6a7..68c8ebcca4 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -397,6 +397,27 @@ func (m *Memo) MemoizeLookupJoin(grp, left, right *ExprGroup, op plan.JoinType, return grp } +func (m *Memo) MemoizeSlidingRangeJoin(grp, left, right *ExprGroup, op plan.JoinType, filter []ScalarExpr, slidingRange *SlidingRange) *ExprGroup { + newJoin := &SlidingRangeJoin{ + JoinBase: &JoinBase{ + relBase: &relBase{}, + Left: left, + Right: right, + Op: op, + Filter: filter, + }, + SlidingRange: slidingRange, + } + newJoin.SlidingRange.Parent = newJoin.JoinBase + + if grp == nil { + return m.NewExprGroup(newJoin) + } + newJoin.g = grp + grp.Prepend(newJoin) + return grp +} + func (m *Memo) MemoizeMergeJoin(grp, left, right *ExprGroup, lIdx, rIdx *IndexScan, op plan.JoinType, filter []ScalarExpr, swapCmp bool) *ExprGroup { rel := &MergeJoin{ JoinBase: &JoinBase{ @@ -929,6 +950,13 @@ type IndexScan struct { Parent *JoinBase } +type SlidingRange struct { + ValueCol *ColRef + MinColRef *ColRef + MaxColRef *ColRef + Parent *JoinBase +} + // splitConjunction_memo breaks AND expressions into their left and right parts, recursively func SplitConjunction(e ScalarExpr) []ScalarExpr { if e == nil { diff --git a/sql/memo/memo.og.go b/sql/memo/memo.og.go index b3d7ef3c65..c66e74e247 100644 --- a/sql/memo/memo.og.go +++ b/sql/memo/memo.og.go @@ -102,6 +102,22 @@ func (r *LookupJoin) JoinPrivate() *JoinBase { return r.JoinBase } +type SlidingRangeJoin struct { + *JoinBase + SlidingRange *SlidingRange +} + +var _ RelExpr = (*SlidingRangeJoin)(nil) +var _ JoinRel = (*SlidingRangeJoin)(nil) + +func (r *SlidingRangeJoin) String() string { + return FormatExpr(r) +} + +func (r *SlidingRangeJoin) JoinPrivate() *JoinBase { + return r.JoinBase +} + type ConcatJoin struct { *JoinBase Concat []*Lookup @@ -931,6 +947,8 @@ func FormatExpr(r exprType) string { return fmt.Sprintf("antijoin %d %d", r.Left.Id, r.Right.Id) case *LookupJoin: return fmt.Sprintf("lookupjoin %d %d", r.Left.Id, r.Right.Id) + case *SlidingRangeJoin: + return fmt.Sprintf("slidingrangejoin %d %d", r.Left.Id, r.Right.Id) case *ConcatJoin: return fmt.Sprintf("concatjoin %d %d", r.Left.Id, r.Right.Id) case *HashJoin: @@ -1033,6 +1051,8 @@ func buildRelExpr(b *ExecBuilder, r RelExpr, input sql.Schema, children ...sql.N result, err = b.buildAntiJoin(r, input, children...) case *LookupJoin: result, err = b.buildLookupJoin(r, input, children...) + case *SlidingRangeJoin: + result, err = b.buildSlidingRangeJoin(r, input, children...) case *ConcatJoin: result, err = b.buildConcatJoin(r, input, children...) case *HashJoin: From 3a14a7c2c868696bd68d5104b815b89bcf432010 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Fri, 14 Jul 2023 17:45:18 -0700 Subject: [PATCH 05/46] Add coster for Sliding Range Join --- sql/memo/coster.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sql/memo/coster.go b/sql/memo/coster.go index bf8b1f804f..c23c79bf17 100644 --- a/sql/memo/coster.go +++ b/sql/memo/coster.go @@ -71,6 +71,8 @@ func (c *coster) costRel(ctx *sql.Context, n RelExpr, s sql.StatsReader) (float6 return c.costMergeJoin(ctx, n, s) case *LookupJoin: return c.costLookupJoin(ctx, n, s) + case *SlidingRangeJoin: + return c.costSlidingRangeJoin(ctx, n, s) case *LateralCrossJoin: return c.costLateralCrossJoin(ctx, n, s) case *LateralInnerJoin: @@ -189,6 +191,11 @@ func (c *coster) costLookupJoin(_ *sql.Context, n *LookupJoin, _ sql.StatsReader return l*r*sel*(cpuCostFactor+randIOCostFactor) - r*seqIOCostFactor, nil } +func (c *coster) costSlidingRangeJoin(_ *sql.Context, n *SlidingRangeJoin, _ sql.StatsReader) (float64, error) { + // For now always favor sliding range. + return 0, nil +} + func (c *coster) costLateralCrossJoin(ctx *sql.Context, n *LateralCrossJoin, _ sql.StatsReader) (float64, error) { l := n.Left.RelProps.card r := n.Right.RelProps.card From 72779ec9ca9ab48ad304f85c031fda38ce8c3d10 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Fri, 14 Jul 2023 17:50:03 -0700 Subject: [PATCH 06/46] Generate Sliding Range Node. --- sql/memo/exec_builder.go | 47 +++++++++++++++++++++++ sql/plan/sliding_range.go | 78 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 sql/plan/sliding_range.go diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index 164965761b..2942269fd7 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -133,6 +133,53 @@ func (b *ExecBuilder) buildLookupJoin(j *LookupJoin, input sql.Schema, children return plan.NewJoin(left, right, j.Op, filters).WithScopeLen(j.g.m.scopeLen), nil } +func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, input sql.Schema, children ...sql.Node) (sql.Node, error) { + var ret sql.Node + var err error + + leftSch := input[:len(input)-len(sr.Parent.Right.RelProps.OutputCols())] + rightSch := input[len(sr.Parent.Left.RelProps.OutputCols()):] + + switch n := children[0].(type) { + case *plan.ResolvedTable: + ret, err = plan.NewSlidingRange(n, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) + case *plan.TableAlias: + ret, err = plan.NewSlidingRange(n.Child.(*plan.ResolvedTable), leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) + ret = plan.NewTableAlias(n.Name(), ret) + case *plan.Distinct: + ret, err = b.buildSlidingRange(sr, input, n.Child) + ret = plan.NewDistinct(ret) + case *plan.Filter: + ret, err = b.buildSlidingRange(sr, input, n.Child) + ret = plan.NewFilter(n.Expression, ret) + case *plan.Project: + ret, err = b.buildSlidingRange(sr, input, n.Child) + ret = plan.NewProject(n.Projections, ret) + case *plan.Limit: + ret, err = b.buildSlidingRange(sr, input, n.Child) + ret = plan.NewLimit(n.Limit, ret) + default: + panic(fmt.Sprintf("unexpected lookup child %T", n)) + } + if err != nil { + return nil, err + } + return ret, nil +} + +func (b *ExecBuilder) buildSlidingRangeJoin(j *SlidingRangeJoin, input sql.Schema, children ...sql.Node) (sql.Node, error) { + left := children[0] + right, err := b.buildSlidingRange(j.SlidingRange, input, children[1]) + if err != nil { + return nil, err + } + filters, err := b.buildFilterConjunction(j.g.m.scope, input, j.Filter...) + if err != nil { + return nil, err + } + return plan.NewJoin(left, right, j.Op, filters).WithScopeLen(j.g.m.scopeLen), nil +} + func (b *ExecBuilder) buildConcatJoin(j *ConcatJoin, input sql.Schema, children ...sql.Node) (sql.Node, error) { var alias string var name string diff --git a/sql/plan/sliding_range.go b/sql/plan/sliding_range.go new file mode 100644 index 0000000000..566b3e0068 --- /dev/null +++ b/sql/plan/sliding_range.go @@ -0,0 +1,78 @@ +// Copyright 2023 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package plan + +import ( + "fmt" + "github.com/dolthub/go-mysql-server/sql" +) + +// SlidingRange is a Node that wraps a table with min and max range columns. When used as a secondary provider in Join +// operations, it can efficiently compute the rows whose ranges bound the value from the other table. When the ranges +// don't overlap, the amortized complexity is O(1) for each result row. +type SlidingRange struct { + UnaryNode + childRowIter sql.RowIter + activeRanges priorityQueue + pendingRow sql.Row + valueColumnIndex int + minColumnIndex int + maxColumnIndex int + comparisonType sql.Type +} + +type priorityQueue struct { + slidingRange *SlidingRange + rows []sql.Row + err error +} + +var _ sql.Node = (*SlidingRange)(nil) + +func NewSlidingRange(child sql.Node, lhsSchema sql.Schema, rhsSchema sql.Schema, value, min, max string) (*SlidingRange, error) { + // TODO: This doesn't appear to actually use the passed in indexes. + maxColumnIndex := rhsSchema.IndexOfColName(max) + newSr := &SlidingRange{ + activeRanges: priorityQueue{}, + pendingRow: nil, + valueColumnIndex: lhsSchema.IndexOfColName(value), + minColumnIndex: rhsSchema.IndexOfColName(min), + maxColumnIndex: maxColumnIndex, + comparisonType: rhsSchema[maxColumnIndex].Type, + } + newSr.Child = child + newSr.activeRanges.slidingRange = newSr + return newSr, nil +} + +func (s *SlidingRange) String() string { + return s.Child.String() +} + +func (s *SlidingRange) WithChildren(children ...sql.Node) (sql.Node, error) { + if len(children) != 1 { + return nil, fmt.Errorf("ds") + } + + s2 := *s + s2.UnaryNode = UnaryNode{Child: children[0]} + return &s2, nil +} + +func (s *SlidingRange) CheckPrivileges(ctx *sql.Context, opChecker sql.PrivilegedOperationChecker) bool { + return s.Child.CheckPrivileges(ctx, opChecker) +} + +var _ sql.Node = (*SlidingRange)(nil) From 10e094ae5106fe0d4f7313766fb5849c1f069fa4 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Fri, 14 Jul 2023 17:50:40 -0700 Subject: [PATCH 07/46] Generate RowIter for SlidingRange --- sql/plan/sliding_range.go | 100 ++++++++++++++++++++++++++++++++ sql/rowexec/node_builder.gen.go | 2 + sql/rowexec/rel.go | 17 ++++++ 3 files changed, 119 insertions(+) diff --git a/sql/plan/sliding_range.go b/sql/plan/sliding_range.go index 566b3e0068..fdf9a6768d 100644 --- a/sql/plan/sliding_range.go +++ b/sql/plan/sliding_range.go @@ -15,8 +15,11 @@ package plan import ( + "container/heap" + "errors" "fmt" "github.com/dolthub/go-mysql-server/sql" + "io" ) // SlidingRange is a Node that wraps a table with min and max range columns. When used as a secondary provider in Join @@ -76,3 +79,100 @@ func (s *SlidingRange) CheckPrivileges(ctx *sql.Context, opChecker sql.Privilege } var _ sql.Node = (*SlidingRange)(nil) + +func (s *SlidingRange) Initialize(ctx *sql.Context, childRowIter sql.RowIter) (err error) { + s.childRowIter = childRowIter + s.activeRanges = priorityQueue{ + slidingRange: s, + rows: nil, + err: nil, + } + s.pendingRow, err = childRowIter.Next(ctx) + return err +} + +func (s *SlidingRange) IsInitialized() bool { + return s.childRowIter != nil +} + +func (s *SlidingRange) AcceptRow(ctx *sql.Context, row sql.Row) (sql.RowIter, error) { + // Remove rows from the heap if we've advanced beyond their max value. + for s.activeRanges.Len() > 0 { + maxValue := s.activeRanges.Peek() + compareResult, err := s.comparisonType.Compare(row[s.valueColumnIndex], maxValue) + if err != nil { + return nil, err + } + if compareResult > 0 { + heap.Pop(&s.activeRanges) + } else { + break + } + } + + // Advance the child iterator until we encounter a row whose min value is beyond the range. + for s.pendingRow != nil { + minValue := s.pendingRow[s.minColumnIndex] + compareResult, err := s.comparisonType.Compare(row[s.valueColumnIndex], minValue) + if err != nil { + return nil, err + } + + if compareResult < 0 { + break + } else { + heap.Push(&s.activeRanges, s.pendingRow) + } + + s.pendingRow, err = s.childRowIter.Next(ctx) + if err != nil { + if errors.Is(err, io.EOF) { + // We've already imported every range into the priority queue. + s.pendingRow = nil + break + } + return nil, err + } + } + + // Every active row must match the accepted row. + return sql.RowsToRowIter(s.activeRanges.rows...), nil +} + +func (pq priorityQueue) Len() int { return len(pq.rows) } + +func (pq *priorityQueue) Less(i, j int) bool { + lhs := pq.rows[i][pq.slidingRange.maxColumnIndex] + rhs := pq.rows[j][pq.slidingRange.maxColumnIndex] + // compareResult will be 0 if lhs==rhs, -1 if lhs < rhs, and +1 if lhs > rhs. + compareResult, err := pq.SortedType().Compare(lhs, rhs) + if pq.err == nil && err != nil { + pq.err = err + } + return compareResult < 0 +} + +func (pq *priorityQueue) Swap(i, j int) { + pq.rows[i], pq.rows[j] = pq.rows[j], pq.rows[i] +} + +func (pq *priorityQueue) Push(x any) { + item := x.(sql.Row) + pq.rows = append(pq.rows, item) +} + +func (pq *priorityQueue) Pop() any { + n := len(pq.rows) + x := pq.rows[n-1] + pq.rows = pq.rows[0 : n-1] + return x +} + +func (pq *priorityQueue) Peek() interface{} { + n := len(pq.rows) + return pq.rows[n-1][pq.slidingRange.maxColumnIndex] +} + +func (pq *priorityQueue) SortedType() sql.Type { + return pq.slidingRange.comparisonType +} diff --git a/sql/rowexec/node_builder.gen.go b/sql/rowexec/node_builder.gen.go index e132d04a36..663de0b1e1 100644 --- a/sql/rowexec/node_builder.gen.go +++ b/sql/rowexec/node_builder.gen.go @@ -360,6 +360,8 @@ func (b *BaseBuilder) buildNodeExec(ctx *sql.Context, n sql.Node, row sql.Row) ( return n.RowIter(ctx, row) case *plan.CreateSpatialRefSys: return b.buildCreateSpatialRefSys(ctx, n, row) + case *plan.SlidingRange: + return b.buildSlidingRange(ctx, n, row) default: return nil, fmt.Errorf("exec builder found unknown Node type %T", n) } diff --git a/sql/rowexec/rel.go b/sql/rowexec/rel.go index edf7e5028b..e53a703740 100644 --- a/sql/rowexec/rel.go +++ b/sql/rowexec/rel.go @@ -734,3 +734,20 @@ func (b *BaseBuilder) buildResolvedTable(ctx *sql.Context, n *plan.ResolvedTable func (b *BaseBuilder) buildTableCount(_ *sql.Context, n *plan.TableCountLookup, _ sql.Row) (sql.RowIter, error) { return sql.RowsToRowIter(sql.Row{int64(n.Count())}), nil } + +func (b *BaseBuilder) buildSlidingRange(ctx *sql.Context, n *plan.SlidingRange, row sql.Row) (sql.RowIter, error) { + // The first time, initialize the child rowIter and the heap + span, ctx := ctx.Span("plan.SlidingRange") + if !n.IsInitialized() { + i, err := b.buildNodeExec(ctx, n.Child, row) + if err != nil { + span.End() + return nil, err + } + err = n.Initialize(ctx, i) + if err != nil { + return nil, err + } + } + return n.AcceptRow(ctx, row) +} From 8582b8e4fee3742990aafd19fd83eabfd2bb4709 Mon Sep 17 00:00:00 2001 From: nicktobey Date: Mon, 17 Jul 2023 01:52:47 +0000 Subject: [PATCH 08/46] [ga-format-pr] Run ./format_repo.sh to fix formatting --- sql/plan/sliding_range.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/plan/sliding_range.go b/sql/plan/sliding_range.go index fdf9a6768d..d6cb0f4887 100644 --- a/sql/plan/sliding_range.go +++ b/sql/plan/sliding_range.go @@ -18,8 +18,9 @@ import ( "container/heap" "errors" "fmt" - "github.com/dolthub/go-mysql-server/sql" "io" + + "github.com/dolthub/go-mysql-server/sql" ) // SlidingRange is a Node that wraps a table with min and max range columns. When used as a secondary provider in Join From 26507af831c1c7c8be33d2897d1f3ff794f71d11 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Mon, 17 Jul 2023 11:34:44 -0700 Subject: [PATCH 09/46] Add basic tests for SlidingRangeJoin. --- enginetest/join_planning_tests.go | 32 ++++++++ enginetest/memory_engine_test.go | 10 +++ enginetest/range_join_op_tests.go | 128 ++++++++++++++++++++++++++++++ sql/memo/coster.go | 17 ++++ 4 files changed, 187 insertions(+) create mode 100644 enginetest/range_join_op_tests.go diff --git a/enginetest/join_planning_tests.go b/enginetest/join_planning_tests.go index 0097660f43..28136510f9 100644 --- a/enginetest/join_planning_tests.go +++ b/enginetest/join_planning_tests.go @@ -915,6 +915,38 @@ join uv d on d.u = c.x`, }, }, }, + { + name: "simple range join", + setup: []string{ + "create table vals (val int primary key)", + "create table ranges (min int primary key, max int, unique key(min,max))", + "insert into vals values (0), (1), (2), (3), (4), (5), (6)", + "insert into ranges values (0,2), (1,3), (2,4), (3,5), (4,6)", + }, + tests: []JoinPlanTest{ + { + q: "select * from vals join ranges on val between min and max", + types: []plan.JoinType{plan.JoinTypeSlidingRange}, + exp: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 0, 2}, + {2, 1, 3}, + {2, 2, 4}, + {3, 1, 3}, + {3, 2, 4}, + {3, 3, 5}, + {4, 2, 4}, + {4, 3, 5}, + {4, 4, 6}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + }, + }, } func TestJoinPlanning(t *testing.T, harness Harness) { diff --git a/enginetest/memory_engine_test.go b/enginetest/memory_engine_test.go index bb568e39a7..9c37beabb6 100644 --- a/enginetest/memory_engine_test.go +++ b/enginetest/memory_engine_test.go @@ -170,6 +170,16 @@ func TestJoinOpsPrepared(t *testing.T) { enginetest.TestJoinOpsPrepared(t, enginetest.NewMemoryHarness("simple", 1, testNumPartitions, true, nil)) } +// TestJoinOps runs range-join-specific tests for merge +func TestRangeJoinOps(t *testing.T) { + enginetest.TestRangeJoinOps(t, enginetest.NewMemoryHarness("simple", 1, testNumPartitions, true, nil)) +} + +// TestJoinOpsPrepared runs prepared range-join-specific tests for merge +func TestRangeJoinOpsPrepared(t *testing.T) { + enginetest.TestRangeJoinOpsPrepared(t, enginetest.NewMemoryHarness("simple", 1, testNumPartitions, true, nil)) +} + // TestJSONTableQueries runs the canonical test queries against a single threaded index enabled harness. func TestJSONTableQueries(t *testing.T) { enginetest.TestJSONTableQueries(t, enginetest.NewMemoryHarness("simple", 1, testNumPartitions, true, nil)) diff --git a/enginetest/range_join_op_tests.go b/enginetest/range_join_op_tests.go new file mode 100644 index 0000000000..4f58a1d930 --- /dev/null +++ b/enginetest/range_join_op_tests.go @@ -0,0 +1,128 @@ +// Copyright 2023 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package enginetest + +import ( + "fmt" + "testing" + + "github.com/dolthub/go-mysql-server/sql/memo" + + "github.com/dolthub/go-mysql-server/enginetest/scriptgen/setup" + "github.com/dolthub/go-mysql-server/sql" +) + +var biasedrangeCosters = map[string]memo.Coster{ + "inner": memo.NewInnerBiasedCoster(), + "lookup": memo.NewLookupBiasedCoster(), + "hash": memo.NewHashBiasedCoster(), + "merge": memo.NewMergeBiasedCoster(), + "partial": memo.NewPartialBiasedCoster(), + "slidingRange": memo.NewSlidingRangeBiasedCoster(), +} + +func TestRangeJoinOps(t *testing.T, harness Harness) { + for _, tt := range rangeJoinOpTests { + t.Run(tt.name, func(t *testing.T) { + e := mustNewEngine(t, harness) + defer e.Close() + for _, setup := range tt.setup { + for _, statement := range setup { + if sh, ok := harness.(SkippingHarness); ok { + if sh.SkipQueryTest(statement) { + t.Skip() + } + } + ctx := NewContext(harness) + RunQueryWithContext(t, e, harness, ctx, statement) + } + } + for k, c := range biasedrangeCosters { + e.Analyzer.Coster = c + for _, tt := range tt.tests { + evalJoinCorrectness(t, harness, e, fmt.Sprintf("%s join: %s", k, tt.Query), tt.Query, tt.Expected, tt.Skip) + } + } + }) + } +} + +func TestRangeJoinOpsPrepared(t *testing.T, harness Harness) { + for _, tt := range joinOpTests { + t.Run(tt.name, func(t *testing.T) { + e := mustNewEngine(t, harness) + defer e.Close() + for _, setup := range tt.setup { + for _, statement := range setup { + if sh, ok := harness.(SkippingHarness); ok { + if sh.SkipQueryTest(statement) { + t.Skip() + } + } + ctx := NewContext(harness) + RunQueryWithContext(t, e, harness, ctx, statement) + } + } + + for k, c := range biasedrangeCosters { + e.Analyzer.Coster = c + for _, tt := range tt.tests { + evalJoinCorrectnessPrepared(t, harness, e, fmt.Sprintf("%s join: %s", k, tt.Query), tt.Query, tt.Expected, tt.Skip) + } + } + }) + } +} + +var rangeJoinOpTests = []struct { + name string + setup [][]string + tests []JoinOpTests +}{ + { + name: "simple range join", + setup: [][]string{ + setup.MydbData[0], + { + "create table vals (val int primary key)", + "create table ranges (min int primary key, max int, unique key(min,max))", + "insert into vals values (0), (1), (2), (3), (4), (5), (6)", + "insert into ranges values (0,2), (1,3), (2,4), (3,5), (4,6)", + }, + }, + tests: []JoinOpTests{ + { + Query: "select * from vals join ranges on val between min and max", + Expected: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 0, 2}, + {2, 1, 3}, + {2, 2, 4}, + {3, 1, 3}, + {3, 2, 4}, + {3, 3, 5}, + {4, 2, 4}, + {4, 3, 5}, + {4, 4, 6}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + }, + }, +} diff --git a/sql/memo/coster.go b/sql/memo/coster.go index c23c79bf17..d9ea25d902 100644 --- a/sql/memo/coster.go +++ b/sql/memo/coster.go @@ -522,3 +522,20 @@ func (c *partialBiasedCoster) EstimateCost(ctx *sql.Context, r RelExpr, s sql.St return c.costRel(ctx, r, s) } } + +type slidingRangeBiasedCoster struct { + *coster +} + +func NewSlidingRangeBiasedCoster() Coster { + return &slidingRangeBiasedCoster{coster: &coster{}} +} + +func (c *slidingRangeBiasedCoster) EstimateCost(ctx *sql.Context, r RelExpr, s sql.StatsReader) (float64, error) { + switch r.(type) { + case *SlidingRangeJoin: + return -biasFactor, nil + default: + return c.costRel(ctx, r, s) + } +} From da1120f19f2331a0dae2cda4605da695d34f32ac Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Mon, 17 Jul 2023 16:33:46 -0700 Subject: [PATCH 10/46] SlidingRangeJoin requires an index on the min column, and uses an IndexScan. --- sql/analyzer/indexed_joins.go | 25 +++++++++++++++---------- sql/memo/exec_builder.go | 12 ++++++++++-- sql/memo/memo.go | 1 + 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index b068484800..3fa69218ec 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -668,6 +668,8 @@ func addSlidingRangeJoin(m *memo.Memo) error { return nil } + _, rIndexes, rFilters := lookupCandidates(join.Right.First) + filter := join.Filter[0] switch f := filter.(type) { @@ -686,17 +688,20 @@ func addSlidingRangeJoin(m *memo.Memo) error { return nil } - rel := &memo.SlidingRangeJoin{ - JoinBase: join.Copy(), - } - rel.SlidingRange = &memo.SlidingRange{ - ValueCol: valueColRef, - MinColRef: minColRef, - MaxColRef: maxColRef, - Parent: rel.JoinBase, + for _, rIdx := range sortedIndexScansForTableCol(rIndexes, minColRef, join.Right.RelProps.FuncDeps().Constants(), rFilters) { + rel := &memo.SlidingRangeJoin{ + JoinBase: join.Copy(), + } + rel.SlidingRange = &memo.SlidingRange{ + IndexScan: *rIdx, + ValueCol: valueColRef, + MinColRef: minColRef, + MaxColRef: maxColRef, + Parent: rel.JoinBase, + } + rel.Op = rel.Op.AsSlidingRange() + e.Group().Prepend(rel) } - rel.Op = rel.Op.AsSlidingRange() - e.Group().Prepend(rel) return nil default: diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index 2942269fd7..b6cac39de4 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -142,9 +142,17 @@ func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, input sql.Schema, chil switch n := children[0].(type) { case *plan.ResolvedTable: - ret, err = plan.NewSlidingRange(n, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) + scan, err := b.buildIndexScan(&sr.IndexScan, input, n) + if err != nil { + return nil, err + } + ret, err = plan.NewSlidingRange(scan, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) case *plan.TableAlias: - ret, err = plan.NewSlidingRange(n.Child.(*plan.ResolvedTable), leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) + scan, err := b.buildIndexScan(&sr.IndexScan, input, n.Child.(*plan.ResolvedTable)) + if err != nil { + return nil, err + } + ret, err = plan.NewSlidingRange(scan, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) ret = plan.NewTableAlias(n.Name(), ret) case *plan.Distinct: ret, err = b.buildSlidingRange(sr, input, n.Child) diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 68c8ebcca4..4191e7a6dc 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -951,6 +951,7 @@ type IndexScan struct { } type SlidingRange struct { + IndexScan IndexScan ValueCol *ColRef MinColRef *ColRef MaxColRef *ColRef From 1ecd74474a3091fd25609ce12de2c10f6bb1f2b3 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Mon, 17 Jul 2023 17:22:12 -0700 Subject: [PATCH 11/46] SlidingRangeJoin requires an index on the value column, and uses an IndexScan. --- sql/analyzer/indexed_joins.go | 28 ++++++++++++++++------------ sql/memo/exec_builder.go | 9 ++++++--- sql/memo/memo.go | 11 ++++++----- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 3fa69218ec..cb4b8687fe 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -668,6 +668,7 @@ func addSlidingRangeJoin(m *memo.Memo) error { return nil } + _, lIndexes, lFilters := lookupCandidates(join.Left.First) _, rIndexes, rFilters := lookupCandidates(join.Right.First) filter := join.Filter[0] @@ -688,19 +689,22 @@ func addSlidingRangeJoin(m *memo.Memo) error { return nil } - for _, rIdx := range sortedIndexScansForTableCol(rIndexes, minColRef, join.Right.RelProps.FuncDeps().Constants(), rFilters) { - rel := &memo.SlidingRangeJoin{ - JoinBase: join.Copy(), - } - rel.SlidingRange = &memo.SlidingRange{ - IndexScan: *rIdx, - ValueCol: valueColRef, - MinColRef: minColRef, - MaxColRef: maxColRef, - Parent: rel.JoinBase, + for _, lIdx := range sortedIndexScansForTableCol(lIndexes, valueColRef, join.Left.RelProps.FuncDeps().Constants(), lFilters) { + for _, rIdx := range sortedIndexScansForTableCol(rIndexes, minColRef, join.Right.RelProps.FuncDeps().Constants(), rFilters) { + rel := &memo.SlidingRangeJoin{ + JoinBase: join.Copy(), + } + rel.SlidingRange = &memo.SlidingRange{ + LeftIndex: *lIdx, + RightIndex: *rIdx, + ValueCol: valueColRef, + MinColRef: minColRef, + MaxColRef: maxColRef, + Parent: rel.JoinBase, + } + rel.Op = rel.Op.AsSlidingRange() + e.Group().Prepend(rel) } - rel.Op = rel.Op.AsSlidingRange() - e.Group().Prepend(rel) } return nil diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index b6cac39de4..790c8b16a5 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -142,13 +142,13 @@ func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, input sql.Schema, chil switch n := children[0].(type) { case *plan.ResolvedTable: - scan, err := b.buildIndexScan(&sr.IndexScan, input, n) + scan, err := b.buildIndexScan(&sr.RightIndex, input, n) if err != nil { return nil, err } ret, err = plan.NewSlidingRange(scan, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) case *plan.TableAlias: - scan, err := b.buildIndexScan(&sr.IndexScan, input, n.Child.(*plan.ResolvedTable)) + scan, err := b.buildIndexScan(&sr.RightIndex, input, n.Child.(*plan.ResolvedTable)) if err != nil { return nil, err } @@ -176,7 +176,10 @@ func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, input sql.Schema, chil } func (b *ExecBuilder) buildSlidingRangeJoin(j *SlidingRangeJoin, input sql.Schema, children ...sql.Node) (sql.Node, error) { - left := children[0] + left, err := b.buildIndexScan(&j.SlidingRange.LeftIndex, input, children[0]) + if err != nil { + return nil, err + } right, err := b.buildSlidingRange(j.SlidingRange, input, children[1]) if err != nil { return nil, err diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 4191e7a6dc..dea27b8abc 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -951,11 +951,12 @@ type IndexScan struct { } type SlidingRange struct { - IndexScan IndexScan - ValueCol *ColRef - MinColRef *ColRef - MaxColRef *ColRef - Parent *JoinBase + LeftIndex IndexScan + RightIndex IndexScan + ValueCol *ColRef + MinColRef *ColRef + MaxColRef *ColRef + Parent *JoinBase } // splitConjunction_memo breaks AND expressions into their left and right parts, recursively From bf05ce352d72c87470e06a4e16f4c402fa494167 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 18 Jul 2023 15:01:57 -0700 Subject: [PATCH 12/46] Add TODO about removing filter condition when making SlidingRange. --- sql/analyzer/indexed_joins.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index cb4b8687fe..f93f3affcd 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -694,6 +694,10 @@ func addSlidingRangeJoin(m *memo.Memo) error { rel := &memo.SlidingRangeJoin{ JoinBase: join.Copy(), } + // TODO: Remove the filter that was used to create the sliding range because it's no longer + // necessary to evaluate. However, removing this can cause issues if it's the only filter because + // iterjoin assumes that there's a filter condition. + // rel.Filter = rel.Filter[1:] rel.SlidingRange = &memo.SlidingRange{ LeftIndex: *lIdx, RightIndex: *rIdx, From 8f02ddc48a75ef434cb8209da20828a4c58090fd Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 19 Jul 2023 15:01:48 -0700 Subject: [PATCH 13/46] Add warning that IndexOfColName may not be safe. --- sql/plan/sliding_range.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/plan/sliding_range.go b/sql/plan/sliding_range.go index d6cb0f4887..7d536dfed9 100644 --- a/sql/plan/sliding_range.go +++ b/sql/plan/sliding_range.go @@ -46,7 +46,7 @@ type priorityQueue struct { var _ sql.Node = (*SlidingRange)(nil) func NewSlidingRange(child sql.Node, lhsSchema sql.Schema, rhsSchema sql.Schema, value, min, max string) (*SlidingRange, error) { - // TODO: This doesn't appear to actually use the passed in indexes. + // TODO: IndexOfColName is Only safe for schemas corresponding to a single table, where the source of the column is irrelevant. maxColumnIndex := rhsSchema.IndexOfColName(max) newSr := &SlidingRange{ activeRanges: priorityQueue{}, From 235d4e2604daeef727ae66cfa4045a5a4cbb479c Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 19 Jul 2023 15:02:16 -0700 Subject: [PATCH 14/46] Add fields to plan.SlidingRange to capture the expressions used for sorting. --- sql/memo/memo.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/memo/memo.go b/sql/memo/memo.go index dea27b8abc..137b83924d 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -951,11 +951,13 @@ type IndexScan struct { } type SlidingRange struct { - LeftIndex IndexScan - RightIndex IndexScan + LeftIndex *IndexScan + RightIndex *IndexScan ValueCol *ColRef MinColRef *ColRef MaxColRef *ColRef + MinExpr *ScalarExpr + ValueExpr *ScalarExpr Parent *JoinBase } From 67745f417538881b5504f6ee6dbc56f15a3ee343 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 19 Jul 2023 15:03:28 -0700 Subject: [PATCH 15/46] When there's no index for the sliding range, use nil instead. (Once this is complete, SlidingRange can choose whether to use an index or a sort.) --- sql/analyzer/indexed_joins.go | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index f93f3affcd..baa045d665 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -699,8 +699,10 @@ func addSlidingRangeJoin(m *memo.Memo) error { // iterjoin assumes that there's a filter condition. // rel.Filter = rel.Filter[1:] rel.SlidingRange = &memo.SlidingRange{ - LeftIndex: *lIdx, - RightIndex: *rIdx, + LeftIndex: lIdx, + RightIndex: rIdx, + ValueExpr: &f.Value.Scalar, + MinExpr: &f.Min.Scalar, ValueCol: valueColRef, MinColRef: minColRef, MaxColRef: maxColRef, @@ -710,6 +712,25 @@ func addSlidingRangeJoin(m *memo.Memo) error { e.Group().Prepend(rel) } } + rel := &memo.SlidingRangeJoin{ + JoinBase: join.Copy(), + } + // TODO: Remove the filter that was used to create the sliding range because it's no longer + // necessary to evaluate. However, removing this can cause issues if it's the only filter because + // iterjoin assumes that there's a filter condition. + // rel.Filter = rel.Filter[1:] + rel.SlidingRange = &memo.SlidingRange{ + LeftIndex: nil, + RightIndex: nil, + ValueExpr: &f.Value.Scalar, + MinExpr: &f.Min.Scalar, + ValueCol: valueColRef, + MinColRef: minColRef, + MaxColRef: maxColRef, + Parent: rel.JoinBase, + } + rel.Op = rel.Op.AsSlidingRange() + e.Group().Prepend(rel) return nil default: From 0e04826b0958bd54cc9decd555ef5adeb9b994fb Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 19 Jul 2023 15:04:15 -0700 Subject: [PATCH 16/46] In SlidingRange, create a Sort node instead of an IndexScan. (Eventually, SlidingRange will be able to choose which option is best.) --- sql/memo/exec_builder.go | 53 +++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index 790c8b16a5..89d9e00a24 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -133,38 +133,49 @@ func (b *ExecBuilder) buildLookupJoin(j *LookupJoin, input sql.Schema, children return plan.NewJoin(left, right, j.Op, filters).WithScopeLen(j.g.m.scopeLen), nil } -func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, input sql.Schema, children ...sql.Node) (sql.Node, error) { +func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, leftSch, rightSch sql.Schema, children ...sql.Node) (sql.Node, error) { var ret sql.Node var err error - leftSch := input[:len(input)-len(sr.Parent.Right.RelProps.OutputCols())] - rightSch := input[len(sr.Parent.Left.RelProps.OutputCols()):] - switch n := children[0].(type) { case *plan.ResolvedTable: - scan, err := b.buildIndexScan(&sr.RightIndex, input, n) + // scan, err := b.buildIndexScan(&sr.RightIndex, input, n) + sortExpr, err := b.buildScalar(*sr.MinExpr, rightSch) if err != nil { return nil, err } - ret, err = plan.NewSlidingRange(scan, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) + sf := []sql.SortField{{ + Column: sortExpr, + Order: sql.Ascending, + NullOrdering: sql.NullsFirst, + }} + sort := plan.NewSort(sf, n) + ret, err = plan.NewSlidingRange(sort, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) case *plan.TableAlias: - scan, err := b.buildIndexScan(&sr.RightIndex, input, n.Child.(*plan.ResolvedTable)) + // scan, err := b.buildIndexScan(&sr.RightIndex, input, n.Child.(*plan.ResolvedTable)) + sortExpr, err := b.buildScalar(*sr.MinExpr, rightSch) if err != nil { return nil, err } - ret, err = plan.NewSlidingRange(scan, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) + sf := []sql.SortField{{ + Column: sortExpr, + Order: sql.Ascending, + NullOrdering: sql.NullsFirst, + }} + sort := plan.NewSort(sf, n) + ret, err = plan.NewSlidingRange(sort, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) ret = plan.NewTableAlias(n.Name(), ret) case *plan.Distinct: - ret, err = b.buildSlidingRange(sr, input, n.Child) + ret, err = b.buildSlidingRange(sr, leftSch, rightSch, n.Child) ret = plan.NewDistinct(ret) case *plan.Filter: - ret, err = b.buildSlidingRange(sr, input, n.Child) + ret, err = b.buildSlidingRange(sr, leftSch, rightSch, n.Child) ret = plan.NewFilter(n.Expression, ret) case *plan.Project: - ret, err = b.buildSlidingRange(sr, input, n.Child) + ret, err = b.buildSlidingRange(sr, leftSch, rightSch, n.Child) ret = plan.NewProject(n.Projections, ret) case *plan.Limit: - ret, err = b.buildSlidingRange(sr, input, n.Child) + ret, err = b.buildSlidingRange(sr, leftSch, rightSch, n.Child) ret = plan.NewLimit(n.Limit, ret) default: panic(fmt.Sprintf("unexpected lookup child %T", n)) @@ -176,11 +187,25 @@ func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, input sql.Schema, chil } func (b *ExecBuilder) buildSlidingRangeJoin(j *SlidingRangeJoin, input sql.Schema, children ...sql.Node) (sql.Node, error) { - left, err := b.buildIndexScan(&j.SlidingRange.LeftIndex, input, children[0]) + // left, err := b.buildIndexScan(j.SlidingRange.LeftIndex, input, children[0]) + //if err != nil { + // return nil, err + //} + leftSch := input[:len(input)-len(j.Right.RelProps.OutputCols())] + rightSch := input[len(j.Left.RelProps.OutputCols()):] + + sortExpr, err := b.buildScalar(*j.SlidingRange.ValueExpr, leftSch) if err != nil { return nil, err } - right, err := b.buildSlidingRange(j.SlidingRange, input, children[1]) + sf := []sql.SortField{{ + Column: sortExpr, + Order: sql.Ascending, + NullOrdering: sql.NullsFirst, + }} + left := plan.NewSort(sf, children[0]) + + right, err := b.buildSlidingRange(j.SlidingRange, leftSch, rightSch, children[1]) if err != nil { return nil, err } From 94e28c4199debf53bea981c29c8bd7224e1153fa Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 19 Jul 2023 15:40:26 -0700 Subject: [PATCH 17/46] Simply logic for computing every possible SlidingRange index. --- sql/analyzer/indexed_joins.go | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index baa045d665..69a7f7c0d1 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -689,8 +689,16 @@ func addSlidingRangeJoin(m *memo.Memo) error { return nil } - for _, lIdx := range sortedIndexScansForTableCol(lIndexes, valueColRef, join.Left.RelProps.FuncDeps().Constants(), lFilters) { - for _, rIdx := range sortedIndexScansForTableCol(rIndexes, minColRef, join.Right.RelProps.FuncDeps().Constants(), rFilters) { + leftIndexScans := sortedIndexScansForTableCol(lIndexes, valueColRef, join.Left.RelProps.FuncDeps().Constants(), lFilters) + if leftIndexScans == nil { + leftIndexScans = []*memo.IndexScan{nil} + } + for _, lIdx := range leftIndexScans { + rightIndexScans := sortedIndexScansForTableCol(rIndexes, minColRef, join.Right.RelProps.FuncDeps().Constants(), rFilters) + if rightIndexScans == nil { + rightIndexScans = []*memo.IndexScan{nil} + } + for _, rIdx := range rightIndexScans { rel := &memo.SlidingRangeJoin{ JoinBase: join.Copy(), } @@ -712,25 +720,6 @@ func addSlidingRangeJoin(m *memo.Memo) error { e.Group().Prepend(rel) } } - rel := &memo.SlidingRangeJoin{ - JoinBase: join.Copy(), - } - // TODO: Remove the filter that was used to create the sliding range because it's no longer - // necessary to evaluate. However, removing this can cause issues if it's the only filter because - // iterjoin assumes that there's a filter condition. - // rel.Filter = rel.Filter[1:] - rel.SlidingRange = &memo.SlidingRange{ - LeftIndex: nil, - RightIndex: nil, - ValueExpr: &f.Value.Scalar, - MinExpr: &f.Min.Scalar, - ValueCol: valueColRef, - MinColRef: minColRef, - MaxColRef: maxColRef, - Parent: rel.JoinBase, - } - rel.Op = rel.Op.AsSlidingRange() - e.Group().Prepend(rel) return nil default: From bd3755a74d2c80d769039490473f0694e64145f0 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 19 Jul 2023 16:02:48 -0700 Subject: [PATCH 18/46] Allow generating SlidingRanges for joins that have multiple filters. --- sql/analyzer/indexed_joins.go | 94 ++++++++++++++++------------------- 1 file changed, 44 insertions(+), 50 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 69a7f7c0d1..79747e237d 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -664,68 +664,62 @@ func addSlidingRangeJoin(m *memo.Memo) error { } join := e.(memo.JoinRel).JoinPrivate() - if len(join.Filter) != 1 { - return nil - } _, lIndexes, lFilters := lookupCandidates(join.Left.First) _, rIndexes, rFilters := lookupCandidates(join.Right.First) - filter := join.Filter[0] + for _, filter := range join.Filter { - switch f := filter.(type) { - case *memo.Between: - if !(satisfiesScalarRefs(f.Value.Scalar, join.Left) && - satisfiesScalarRefs(f.Min.Scalar, join.Right) && - satisfiesScalarRefs(f.Max.Scalar, join.Right)) { - return nil - } - // TODO: Is this safe? If the expression references multiple columns, does this reference one - // arbitrarily? - valueColRef := getColumnRefFromScalar(f.Value.Scalar) - minColRef := getColumnRefFromScalar(f.Min.Scalar) - maxColRef := getColumnRefFromScalar(f.Max.Scalar) - if valueColRef == nil || minColRef == nil || maxColRef == nil { - return nil - } + switch f := filter.(type) { + case *memo.Between: + if !(satisfiesScalarRefs(f.Value.Scalar, join.Left) && + satisfiesScalarRefs(f.Min.Scalar, join.Right) && + satisfiesScalarRefs(f.Max.Scalar, join.Right)) { + return nil + } + // TODO: Is this safe? If the expression references multiple columns, does this reference one + // arbitrarily? + valueColRef := getColumnRefFromScalar(f.Value.Scalar) + minColRef := getColumnRefFromScalar(f.Min.Scalar) + maxColRef := getColumnRefFromScalar(f.Max.Scalar) + if valueColRef == nil || minColRef == nil || maxColRef == nil { + return nil + } - leftIndexScans := sortedIndexScansForTableCol(lIndexes, valueColRef, join.Left.RelProps.FuncDeps().Constants(), lFilters) - if leftIndexScans == nil { - leftIndexScans = []*memo.IndexScan{nil} - } - for _, lIdx := range leftIndexScans { - rightIndexScans := sortedIndexScansForTableCol(rIndexes, minColRef, join.Right.RelProps.FuncDeps().Constants(), rFilters) - if rightIndexScans == nil { - rightIndexScans = []*memo.IndexScan{nil} + leftIndexScans := sortedIndexScansForTableCol(lIndexes, valueColRef, join.Left.RelProps.FuncDeps().Constants(), lFilters) + if leftIndexScans == nil { + leftIndexScans = []*memo.IndexScan{nil} } - for _, rIdx := range rightIndexScans { - rel := &memo.SlidingRangeJoin{ - JoinBase: join.Copy(), + for _, lIdx := range leftIndexScans { + rightIndexScans := sortedIndexScansForTableCol(rIndexes, minColRef, join.Right.RelProps.FuncDeps().Constants(), rFilters) + if rightIndexScans == nil { + rightIndexScans = []*memo.IndexScan{nil} } - // TODO: Remove the filter that was used to create the sliding range because it's no longer - // necessary to evaluate. However, removing this can cause issues if it's the only filter because - // iterjoin assumes that there's a filter condition. - // rel.Filter = rel.Filter[1:] - rel.SlidingRange = &memo.SlidingRange{ - LeftIndex: lIdx, - RightIndex: rIdx, - ValueExpr: &f.Value.Scalar, - MinExpr: &f.Min.Scalar, - ValueCol: valueColRef, - MinColRef: minColRef, - MaxColRef: maxColRef, - Parent: rel.JoinBase, + for _, rIdx := range rightIndexScans { + rel := &memo.SlidingRangeJoin{ + JoinBase: join.Copy(), + } + // TODO: Remove the filter that was used to create the sliding range because it's no longer + // necessary to evaluate. However, removing this can cause issues if it's the only filter because + // iterjoin assumes that there's a filter condition. + // rel.Filter = rel.Filter[1:] + rel.SlidingRange = &memo.SlidingRange{ + LeftIndex: lIdx, + RightIndex: rIdx, + ValueExpr: &f.Value.Scalar, + MinExpr: &f.Min.Scalar, + ValueCol: valueColRef, + MinColRef: minColRef, + MaxColRef: maxColRef, + Parent: rel.JoinBase, + } + rel.Op = rel.Op.AsSlidingRange() + e.Group().Prepend(rel) } - rel.Op = rel.Op.AsSlidingRange() - e.Group().Prepend(rel) } } - - return nil - default: - return nil } - + return nil }) } From b1f28f8691cbf8181e67ecc124271749efc0fb35 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 19 Jul 2023 17:42:34 -0700 Subject: [PATCH 19/46] Avoid panic when input to SlidingRange isn't a table. --- sql/memo/exec_builder.go | 52 +++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index 89d9e00a24..d1ad7564e1 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -133,37 +133,10 @@ func (b *ExecBuilder) buildLookupJoin(j *LookupJoin, input sql.Schema, children return plan.NewJoin(left, right, j.Op, filters).WithScopeLen(j.g.m.scopeLen), nil } -func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, leftSch, rightSch sql.Schema, children ...sql.Node) (sql.Node, error) { - var ret sql.Node - var err error - +func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, leftSch, rightSch sql.Schema, children ...sql.Node) (ret sql.Node, err error) { switch n := children[0].(type) { - case *plan.ResolvedTable: - // scan, err := b.buildIndexScan(&sr.RightIndex, input, n) - sortExpr, err := b.buildScalar(*sr.MinExpr, rightSch) - if err != nil { - return nil, err - } - sf := []sql.SortField{{ - Column: sortExpr, - Order: sql.Ascending, - NullOrdering: sql.NullsFirst, - }} - sort := plan.NewSort(sf, n) - ret, err = plan.NewSlidingRange(sort, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) case *plan.TableAlias: - // scan, err := b.buildIndexScan(&sr.RightIndex, input, n.Child.(*plan.ResolvedTable)) - sortExpr, err := b.buildScalar(*sr.MinExpr, rightSch) - if err != nil { - return nil, err - } - sf := []sql.SortField{{ - Column: sortExpr, - Order: sql.Ascending, - NullOrdering: sql.NullsFirst, - }} - sort := plan.NewSort(sf, n) - ret, err = plan.NewSlidingRange(sort, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) + ret, err = b.buildSlidingRange(sr, leftSch, rightSch, n.Child) ret = plan.NewTableAlias(n.Name(), ret) case *plan.Distinct: ret, err = b.buildSlidingRange(sr, leftSch, rightSch, n.Child) @@ -178,7 +151,26 @@ func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, leftSch, rightSch sql. ret, err = b.buildSlidingRange(sr, leftSch, rightSch, n.Child) ret = plan.NewLimit(n.Limit, ret) default: - panic(fmt.Sprintf("unexpected lookup child %T", n)) + var childNode sql.Node + if sr.RightIndex != nil { + childNode, err = b.buildIndexScan(sr.RightIndex, rightSch, n) + } else { + sortExpr, err := b.buildScalar(*sr.MinExpr, rightSch) + if err != nil { + return nil, err + } + sf := []sql.SortField{{ + Column: sortExpr, + Order: sql.Ascending, + NullOrdering: sql.NullsFirst, + }} + childNode = plan.NewSort(sf, n) + } + + if err != nil { + return nil, err + } + ret, err = plan.NewSlidingRange(childNode, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) } if err != nil { return nil, err From 2e64fdbfd845fc82d14d76ce7b6f37bdfc027c3d Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 20 Jul 2023 11:37:52 -0700 Subject: [PATCH 20/46] Add additional tests for both tables with and without primary keys. --- enginetest/join_op_tests.go | 199 +++++++++++++++++++++++++++++- enginetest/join_planning_tests.go | 162 +++++++++++++++++++++++- enginetest/memory_engine_test.go | 10 -- enginetest/range_join_op_tests.go | 128 ------------------- 4 files changed, 355 insertions(+), 144 deletions(-) delete mode 100644 enginetest/range_join_op_tests.go diff --git a/enginetest/join_op_tests.go b/enginetest/join_op_tests.go index 49af8398cb..1d8f8bf60b 100644 --- a/enginetest/join_op_tests.go +++ b/enginetest/join_op_tests.go @@ -31,11 +31,12 @@ type JoinOpTests struct { } var biasedCosters = map[string]memo.Coster{ - "inner": memo.NewInnerBiasedCoster(), - "lookup": memo.NewLookupBiasedCoster(), - "hash": memo.NewHashBiasedCoster(), - "merge": memo.NewMergeBiasedCoster(), - "partial": memo.NewPartialBiasedCoster(), + "inner": memo.NewInnerBiasedCoster(), + "lookup": memo.NewLookupBiasedCoster(), + "hash": memo.NewHashBiasedCoster(), + "merge": memo.NewMergeBiasedCoster(), + "partial": memo.NewPartialBiasedCoster(), + "slidingRange": memo.NewSlidingRangeBiasedCoster(), } func TestJoinOps(t *testing.T, harness Harness) { @@ -1357,4 +1358,192 @@ SELECT SUM(x) FROM xy WHERE x IN ( }, }, }, + { + name: "primary key range join", + setup: [][]string{ + setup.MydbData[0], + { + "create table vals (val int primary key)", + "create table ranges (min int primary key, max int, unique key(min,max))", + "insert into vals values (0), (1), (2), (3), (4), (5), (6)", + "insert into ranges values (0,2), (1,3), (2,4), (3,5), (4,6)", + }, + }, + tests: []JoinOpTests{ + { + Query: "select * from vals join ranges on val between min and max", + Expected: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 0, 2}, + {2, 1, 3}, + {2, 2, 4}, + {3, 1, 3}, + {3, 2, 4}, + {3, 3, 5}, + {4, 2, 4}, + {4, 3, 5}, + {4, 4, 6}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on val > min and val < max", + Expected: []sql.Row{ + {1, 0, 2}, + {2, 1, 3}, + {3, 2, 4}, + {4, 3, 5}, + {5, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on val >= min and val < max", + Expected: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 1, 3}, + {2, 2, 4}, + {3, 2, 4}, + {3, 3, 5}, + {4, 3, 5}, + {4, 4, 6}, + {5, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on val > min and val <= max", + Expected: []sql.Row{ + {1, 0, 2}, + {2, 0, 2}, + {2, 1, 3}, + {3, 1, 3}, + {3, 2, 4}, + {4, 2, 4}, + {4, 3, 5}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on val >= min and val <= max", + Expected: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 0, 2}, + {2, 1, 3}, + {2, 2, 4}, + {3, 1, 3}, + {3, 2, 4}, + {3, 3, 5}, + {4, 2, 4}, + {4, 3, 5}, + {4, 4, 6}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + }, + }, + { + name: "keyless range join", + setup: [][]string{ + setup.MydbData[0], + { + "create table vals (val int)", + "create table ranges (min int, max int)", + "insert into vals values (0), (1), (2), (3), (4), (5), (6)", + "insert into ranges values (0,2), (1,3), (2,4), (3,5), (4,6)", + }, + }, + tests: []JoinOpTests{ + { + Query: "select * from vals join ranges on val between min and max", + Expected: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 0, 2}, + {2, 1, 3}, + {2, 2, 4}, + {3, 1, 3}, + {3, 2, 4}, + {3, 3, 5}, + {4, 2, 4}, + {4, 3, 5}, + {4, 4, 6}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on val > min and val < max", + Expected: []sql.Row{ + {1, 0, 2}, + {2, 1, 3}, + {3, 2, 4}, + {4, 3, 5}, + {5, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on val >= min and val < max", + Expected: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 1, 3}, + {2, 2, 4}, + {3, 2, 4}, + {3, 3, 5}, + {4, 3, 5}, + {4, 4, 6}, + {5, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on val > min and val <= max", + Expected: []sql.Row{ + {1, 0, 2}, + {2, 0, 2}, + {2, 1, 3}, + {3, 1, 3}, + {3, 2, 4}, + {4, 2, 4}, + {4, 3, 5}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on val >= min and val <= max", + Expected: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 0, 2}, + {2, 1, 3}, + {2, 2, 4}, + {3, 1, 3}, + {3, 2, 4}, + {3, 3, 5}, + {4, 2, 4}, + {4, 3, 5}, + {4, 4, 6}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + }, + }, } diff --git a/enginetest/join_planning_tests.go b/enginetest/join_planning_tests.go index 28136510f9..db2b9f869d 100644 --- a/enginetest/join_planning_tests.go +++ b/enginetest/join_planning_tests.go @@ -916,7 +916,7 @@ join uv d on d.u = c.x`, }, }, { - name: "simple range join", + name: "primary key range join", setup: []string{ "create table vals (val int primary key)", "create table ranges (min int primary key, max int, unique key(min,max))", @@ -945,6 +945,166 @@ join uv d on d.u = c.x`, {6, 4, 6}, }, }, + { + q: "select * from vals join ranges on val > min and val < max", + types: []plan.JoinType{plan.JoinTypeSlidingRange}, + exp: []sql.Row{ + {1, 0, 2}, + {2, 1, 3}, + {3, 2, 4}, + {4, 3, 5}, + {5, 4, 6}, + }, + }, + { + q: "select * from vals join ranges on val >= min and val < max", + types: []plan.JoinType{plan.JoinTypeSlidingRange}, + exp: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 1, 3}, + {2, 2, 4}, + {3, 2, 4}, + {3, 3, 5}, + {4, 3, 5}, + {4, 4, 6}, + {5, 4, 6}, + }, + }, + { + q: "select * from vals join ranges on val > min and val <= max", + types: []plan.JoinType{plan.JoinTypeSlidingRange}, + exp: []sql.Row{ + {1, 0, 2}, + {2, 0, 2}, + {2, 1, 3}, + {3, 1, 3}, + {3, 2, 4}, + {4, 2, 4}, + {4, 3, 5}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + { + q: "select * from vals join ranges on val >= min and val <= max", + types: []plan.JoinType{plan.JoinTypeSlidingRange}, + exp: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 0, 2}, + {2, 1, 3}, + {2, 2, 4}, + {3, 1, 3}, + {3, 2, 4}, + {3, 3, 5}, + {4, 2, 4}, + {4, 3, 5}, + {4, 4, 6}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + }, + }, + { + name: "keyless range join", + setup: []string{ + "create table vals (val int)", + "create table ranges (min int, max int)", + "insert into vals values (0), (1), (2), (3), (4), (5), (6)", + "insert into ranges values (0,2), (1,3), (2,4), (3,5), (4,6)", + }, + tests: []JoinPlanTest{ + { + q: "select * from vals join ranges on val between min and max", + types: []plan.JoinType{plan.JoinTypeSlidingRange}, + exp: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 0, 2}, + {2, 1, 3}, + {2, 2, 4}, + {3, 1, 3}, + {3, 2, 4}, + {3, 3, 5}, + {4, 2, 4}, + {4, 3, 5}, + {4, 4, 6}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + { + q: "select * from vals join ranges on val > min and val < max", + types: []plan.JoinType{plan.JoinTypeSlidingRange}, + exp: []sql.Row{ + {1, 0, 2}, + {2, 1, 3}, + {3, 2, 4}, + {4, 3, 5}, + {5, 4, 6}, + }, + }, + { + q: "select * from vals join ranges on val >= min and val < max", + types: []plan.JoinType{plan.JoinTypeSlidingRange}, + exp: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 1, 3}, + {2, 2, 4}, + {3, 2, 4}, + {3, 3, 5}, + {4, 3, 5}, + {4, 4, 6}, + {5, 4, 6}, + }, + }, + { + q: "select * from vals join ranges on val > min and val <= max", + types: []plan.JoinType{plan.JoinTypeSlidingRange}, + exp: []sql.Row{ + {1, 0, 2}, + {2, 0, 2}, + {2, 1, 3}, + {3, 1, 3}, + {3, 2, 4}, + {4, 2, 4}, + {4, 3, 5}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + { + q: "select * from vals join ranges on val >= min and val <= max", + types: []plan.JoinType{plan.JoinTypeSlidingRange}, + exp: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 0, 2}, + {2, 1, 3}, + {2, 2, 4}, + {3, 1, 3}, + {3, 2, 4}, + {3, 3, 5}, + {4, 2, 4}, + {4, 3, 5}, + {4, 4, 6}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, }, }, } diff --git a/enginetest/memory_engine_test.go b/enginetest/memory_engine_test.go index 9c37beabb6..bb568e39a7 100644 --- a/enginetest/memory_engine_test.go +++ b/enginetest/memory_engine_test.go @@ -170,16 +170,6 @@ func TestJoinOpsPrepared(t *testing.T) { enginetest.TestJoinOpsPrepared(t, enginetest.NewMemoryHarness("simple", 1, testNumPartitions, true, nil)) } -// TestJoinOps runs range-join-specific tests for merge -func TestRangeJoinOps(t *testing.T) { - enginetest.TestRangeJoinOps(t, enginetest.NewMemoryHarness("simple", 1, testNumPartitions, true, nil)) -} - -// TestJoinOpsPrepared runs prepared range-join-specific tests for merge -func TestRangeJoinOpsPrepared(t *testing.T) { - enginetest.TestRangeJoinOpsPrepared(t, enginetest.NewMemoryHarness("simple", 1, testNumPartitions, true, nil)) -} - // TestJSONTableQueries runs the canonical test queries against a single threaded index enabled harness. func TestJSONTableQueries(t *testing.T) { enginetest.TestJSONTableQueries(t, enginetest.NewMemoryHarness("simple", 1, testNumPartitions, true, nil)) diff --git a/enginetest/range_join_op_tests.go b/enginetest/range_join_op_tests.go deleted file mode 100644 index 4f58a1d930..0000000000 --- a/enginetest/range_join_op_tests.go +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright 2023 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package enginetest - -import ( - "fmt" - "testing" - - "github.com/dolthub/go-mysql-server/sql/memo" - - "github.com/dolthub/go-mysql-server/enginetest/scriptgen/setup" - "github.com/dolthub/go-mysql-server/sql" -) - -var biasedrangeCosters = map[string]memo.Coster{ - "inner": memo.NewInnerBiasedCoster(), - "lookup": memo.NewLookupBiasedCoster(), - "hash": memo.NewHashBiasedCoster(), - "merge": memo.NewMergeBiasedCoster(), - "partial": memo.NewPartialBiasedCoster(), - "slidingRange": memo.NewSlidingRangeBiasedCoster(), -} - -func TestRangeJoinOps(t *testing.T, harness Harness) { - for _, tt := range rangeJoinOpTests { - t.Run(tt.name, func(t *testing.T) { - e := mustNewEngine(t, harness) - defer e.Close() - for _, setup := range tt.setup { - for _, statement := range setup { - if sh, ok := harness.(SkippingHarness); ok { - if sh.SkipQueryTest(statement) { - t.Skip() - } - } - ctx := NewContext(harness) - RunQueryWithContext(t, e, harness, ctx, statement) - } - } - for k, c := range biasedrangeCosters { - e.Analyzer.Coster = c - for _, tt := range tt.tests { - evalJoinCorrectness(t, harness, e, fmt.Sprintf("%s join: %s", k, tt.Query), tt.Query, tt.Expected, tt.Skip) - } - } - }) - } -} - -func TestRangeJoinOpsPrepared(t *testing.T, harness Harness) { - for _, tt := range joinOpTests { - t.Run(tt.name, func(t *testing.T) { - e := mustNewEngine(t, harness) - defer e.Close() - for _, setup := range tt.setup { - for _, statement := range setup { - if sh, ok := harness.(SkippingHarness); ok { - if sh.SkipQueryTest(statement) { - t.Skip() - } - } - ctx := NewContext(harness) - RunQueryWithContext(t, e, harness, ctx, statement) - } - } - - for k, c := range biasedrangeCosters { - e.Analyzer.Coster = c - for _, tt := range tt.tests { - evalJoinCorrectnessPrepared(t, harness, e, fmt.Sprintf("%s join: %s", k, tt.Query), tt.Query, tt.Expected, tt.Skip) - } - } - }) - } -} - -var rangeJoinOpTests = []struct { - name string - setup [][]string - tests []JoinOpTests -}{ - { - name: "simple range join", - setup: [][]string{ - setup.MydbData[0], - { - "create table vals (val int primary key)", - "create table ranges (min int primary key, max int, unique key(min,max))", - "insert into vals values (0), (1), (2), (3), (4), (5), (6)", - "insert into ranges values (0,2), (1,3), (2,4), (3,5), (4,6)", - }, - }, - tests: []JoinOpTests{ - { - Query: "select * from vals join ranges on val between min and max", - Expected: []sql.Row{ - {0, 0, 2}, - {1, 0, 2}, - {1, 1, 3}, - {2, 0, 2}, - {2, 1, 3}, - {2, 2, 4}, - {3, 1, 3}, - {3, 2, 4}, - {3, 3, 5}, - {4, 2, 4}, - {4, 3, 5}, - {4, 4, 6}, - {5, 3, 5}, - {5, 4, 6}, - {6, 4, 6}, - }, - }, - }, - }, -} From b965c14cc0c9854851fc4d8b116b47b801af972f Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 20 Jul 2023 12:47:58 -0700 Subject: [PATCH 21/46] Detect pairs of inequalities for use in RangeJoin. Note that although this tracks whether the inequalities are open or closed, we don't do anything with that information. Turns out it doesn't matter for correctness: It's okay for the secondary row iter assumes the ranges are closed and return extra rows, because the filters still get checked in the parent. We probably shouldn't depend on this though. I'll track the open/closeness of the ranges in the Node in a follow-up commit. --- sql/analyzer/indexed_joins.go | 156 ++++++++++++++++++++++++--------- sql/memo/interner.go | 32 +++---- sql/memo/join_order_builder.go | 2 +- sql/memo/memo.go | 4 +- 4 files changed, 132 insertions(+), 62 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 79747e237d..f53d54f009 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -655,6 +655,79 @@ func addHashJoins(m *memo.Memo) error { }) } +type rangeFilter struct { + value, min, max *memo.ExprGroup + closedOnLowerBound, closedOnUpperBound bool +} + +func getRangeFilters(filters []memo.ScalarExpr) (ranges []rangeFilter) { + type candidateMap struct { + group *memo.ExprGroup + isClosed bool + } + lowerToUpper := make(map[uint64][]candidateMap) + upperToLower := make(map[uint64][]candidateMap) + + findUpperBounds := func(value, min *memo.ExprGroup, closedOnLowerBound bool) { + for _, max := range lowerToUpper[memo.InternExpr(value.Scalar)] { + ranges = append(ranges, rangeFilter{ + value: value, + min: min, + max: max.group, + closedOnLowerBound: closedOnLowerBound, + closedOnUpperBound: max.isClosed}) + } + } + + findLowerBounds := func(value, max *memo.ExprGroup, closedOnUpperBound bool) { + for _, min := range upperToLower[memo.InternExpr(value.Scalar)] { + ranges = append(ranges, rangeFilter{ + value: value, + min: min.group, + max: max, + closedOnLowerBound: min.isClosed, + closedOnUpperBound: closedOnUpperBound}) + } + } + + addBounds := func(lower, upper *memo.ExprGroup, isClosed bool) { + lowerIntern := memo.InternExpr(lower.Scalar) + lowerToUpper[lowerIntern] = append(lowerToUpper[lowerIntern], candidateMap{ + group: upper, + isClosed: isClosed, + }) + upperIntern := memo.InternExpr(upper.Scalar) + upperToLower[upperIntern] = append(upperToLower[upperIntern], candidateMap{ + group: lower, + isClosed: isClosed, + }) + } + + for _, filter := range filters { + switch f := filter.(type) { + case *memo.Between: + ranges = append(ranges, rangeFilter{f.Value, f.Min, f.Max, true, true}) + case *memo.Gt: + findUpperBounds(f.Left, f.Right, false) + findLowerBounds(f.Right, f.Left, false) + addBounds(f.Right, f.Left, false) + case *memo.Geq: + findUpperBounds(f.Left, f.Right, true) + findLowerBounds(f.Right, f.Left, true) + addBounds(f.Right, f.Left, true) + case *memo.Lt: + findLowerBounds(f.Left, f.Right, false) + findUpperBounds(f.Right, f.Left, false) + addBounds(f.Left, f.Right, false) + case *memo.Leq: + findLowerBounds(f.Left, f.Right, true) + findUpperBounds(f.Right, f.Left, true) + addBounds(f.Left, f.Right, false) + } + } + return ranges +} + func addSlidingRangeJoin(m *memo.Memo) error { return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { switch e.(type) { @@ -668,54 +741,51 @@ func addSlidingRangeJoin(m *memo.Memo) error { _, lIndexes, lFilters := lookupCandidates(join.Left.First) _, rIndexes, rFilters := lookupCandidates(join.Right.First) - for _, filter := range join.Filter { + for _, filter := range getRangeFilters(join.Filter) { - switch f := filter.(type) { - case *memo.Between: - if !(satisfiesScalarRefs(f.Value.Scalar, join.Left) && - satisfiesScalarRefs(f.Min.Scalar, join.Right) && - satisfiesScalarRefs(f.Max.Scalar, join.Right)) { - return nil - } - // TODO: Is this safe? If the expression references multiple columns, does this reference one - // arbitrarily? - valueColRef := getColumnRefFromScalar(f.Value.Scalar) - minColRef := getColumnRefFromScalar(f.Min.Scalar) - maxColRef := getColumnRefFromScalar(f.Max.Scalar) - if valueColRef == nil || minColRef == nil || maxColRef == nil { - return nil - } + if !(satisfiesScalarRefs(filter.value.Scalar, join.Left) && + satisfiesScalarRefs(filter.min.Scalar, join.Right) && + satisfiesScalarRefs(filter.max.Scalar, join.Right)) { + return nil + } + // TODO: Is this safe? If the expression references multiple columns, does this reference one + // arbitrarily? + valueColRef := getColumnRefFromScalar(filter.value.Scalar) + minColRef := getColumnRefFromScalar(filter.min.Scalar) + maxColRef := getColumnRefFromScalar(filter.max.Scalar) + if valueColRef == nil || minColRef == nil || maxColRef == nil { + return nil + } - leftIndexScans := sortedIndexScansForTableCol(lIndexes, valueColRef, join.Left.RelProps.FuncDeps().Constants(), lFilters) - if leftIndexScans == nil { - leftIndexScans = []*memo.IndexScan{nil} + leftIndexScans := sortedIndexScansForTableCol(lIndexes, valueColRef, join.Left.RelProps.FuncDeps().Constants(), lFilters) + if leftIndexScans == nil { + leftIndexScans = []*memo.IndexScan{nil} + } + for _, lIdx := range leftIndexScans { + rightIndexScans := sortedIndexScansForTableCol(rIndexes, minColRef, join.Right.RelProps.FuncDeps().Constants(), rFilters) + if rightIndexScans == nil { + rightIndexScans = []*memo.IndexScan{nil} } - for _, lIdx := range leftIndexScans { - rightIndexScans := sortedIndexScansForTableCol(rIndexes, minColRef, join.Right.RelProps.FuncDeps().Constants(), rFilters) - if rightIndexScans == nil { - rightIndexScans = []*memo.IndexScan{nil} + for _, rIdx := range rightIndexScans { + rel := &memo.SlidingRangeJoin{ + JoinBase: join.Copy(), } - for _, rIdx := range rightIndexScans { - rel := &memo.SlidingRangeJoin{ - JoinBase: join.Copy(), - } - // TODO: Remove the filter that was used to create the sliding range because it's no longer - // necessary to evaluate. However, removing this can cause issues if it's the only filter because - // iterjoin assumes that there's a filter condition. - // rel.Filter = rel.Filter[1:] - rel.SlidingRange = &memo.SlidingRange{ - LeftIndex: lIdx, - RightIndex: rIdx, - ValueExpr: &f.Value.Scalar, - MinExpr: &f.Min.Scalar, - ValueCol: valueColRef, - MinColRef: minColRef, - MaxColRef: maxColRef, - Parent: rel.JoinBase, - } - rel.Op = rel.Op.AsSlidingRange() - e.Group().Prepend(rel) + // TODO: Remove the filter that was used to create the sliding range because it's no longer + // necessary to evaluate. However, removing this can cause issues if it's the only filter because + // iterjoin assumes that there's a filter condition. + // rel.Filter = rel.Filter[1:] + rel.SlidingRange = &memo.SlidingRange{ + LeftIndex: lIdx, + RightIndex: rIdx, + ValueExpr: &filter.value.Scalar, + MinExpr: &filter.min.Scalar, + ValueCol: valueColRef, + MinColRef: minColRef, + MaxColRef: maxColRef, + Parent: rel.JoinBase, } + rel.Op = rel.Op.AsSlidingRange() + e.Group().Prepend(rel) } } } diff --git a/sql/memo/interner.go b/sql/memo/interner.go index 36a3ff51d6..3059c84bf4 100644 --- a/sql/memo/interner.go +++ b/sql/memo/interner.go @@ -46,7 +46,7 @@ const ( ) // TODO code gen to make sure we are not missing -func internExpr(e ScalarExpr) uint64 { +func InternExpr(e ScalarExpr) uint64 { h := xxhash.New() switch e := e.(type) { case *Literal: @@ -54,43 +54,43 @@ func internExpr(e ScalarExpr) uint64 { case *ColRef: h.Write([]byte(fmt.Sprintf("%d%d", e.ExprId(), e.Col))) case *Equal: - h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), internExpr(e.Left.Scalar), internExpr(e.Right.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), InternExpr(e.Left.Scalar), InternExpr(e.Right.Scalar)))) case *NullSafeEq: - h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), internExpr(e.Left.Scalar), internExpr(e.Right.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), InternExpr(e.Left.Scalar), InternExpr(e.Right.Scalar)))) case *Gt: - h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), internExpr(e.Left.Scalar), internExpr(e.Right.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), InternExpr(e.Left.Scalar), InternExpr(e.Right.Scalar)))) case *Lt: - h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), internExpr(e.Left.Scalar), internExpr(e.Right.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), InternExpr(e.Left.Scalar), InternExpr(e.Right.Scalar)))) case *Geq: - h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), internExpr(e.Left.Scalar), internExpr(e.Right.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), InternExpr(e.Left.Scalar), InternExpr(e.Right.Scalar)))) case *Leq: - h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), internExpr(e.Left.Scalar), internExpr(e.Right.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), InternExpr(e.Left.Scalar), InternExpr(e.Right.Scalar)))) case *IsNull: - h.Write([]byte(fmt.Sprintf("%d%d", e.ExprId(), internExpr(e.Child.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d", e.ExprId(), InternExpr(e.Child.Scalar)))) case *Bindvar: h.Write([]byte(fmt.Sprintf("%d%s", e.ExprId(), e.Name))) case *Hidden: h.Write([]byte(fmt.Sprintf("%d%s", e.ExprId(), e.String()))) case *Arithmetic: - h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), internExpr(e.Left.Scalar), internExpr(e.Right.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), InternExpr(e.Left.Scalar), InternExpr(e.Right.Scalar)))) case *Or: - h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), internExpr(e.Left.Scalar), internExpr(e.Right.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), InternExpr(e.Left.Scalar), InternExpr(e.Right.Scalar)))) case *And: - h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), internExpr(e.Left.Scalar), internExpr(e.Right.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), InternExpr(e.Left.Scalar), InternExpr(e.Right.Scalar)))) case *InTuple: - h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), internExpr(e.Left.Scalar), internExpr(e.Right.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), InternExpr(e.Left.Scalar), InternExpr(e.Right.Scalar)))) case *Tuple: h.Write([]byte(fmt.Sprintf("%d", e.ExprId()))) for _, c := range e.Values { - h.Write([]byte(fmt.Sprintf("%d", internExpr(c.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d", InternExpr(c.Scalar)))) } case *Between: - h.Write([]byte(fmt.Sprintf("%d%d%d%d", e.ExprId(), internExpr(e.Value.Scalar), internExpr(e.Min.Scalar), internExpr(e.Max.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d%d%d", e.ExprId(), InternExpr(e.Value.Scalar), InternExpr(e.Min.Scalar), InternExpr(e.Max.Scalar)))) case *Regexp: - h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), internExpr(e.Left.Scalar), internExpr(e.Right.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d%d", e.ExprId(), InternExpr(e.Left.Scalar), InternExpr(e.Right.Scalar)))) case *Not: - h.Write([]byte(fmt.Sprintf("%d%d", e.ExprId(), internExpr(e.Child.Scalar)))) + h.Write([]byte(fmt.Sprintf("%d%d", e.ExprId(), InternExpr(e.Child.Scalar)))) default: return 0 } diff --git a/sql/memo/join_order_builder.go b/sql/memo/join_order_builder.go index 66b3183873..ed2f7dc0ac 100644 --- a/sql/memo/join_order_builder.go +++ b/sql/memo/join_order_builder.go @@ -268,7 +268,7 @@ func (j *joinOrderBuilder) makeTransitiveEdge(col1, col2 sql.ColumnId) { if eqGroup == nil { eqGroup = j.m.NewExprGroup(eq) } - hash := internExpr(eqGroup.Scalar) + hash := InternExpr(eqGroup.Scalar) if hash != 0 { j.m.exprs[hash] = eqGroup } diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 137b83924d..02b94d6438 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -122,7 +122,7 @@ func (m *Memo) getColumnId(table, name string) (sql.ColumnId, bool) { } func (m *Memo) PreexistingScalar(e ScalarExpr) *ExprGroup { - hash := internExpr(e) + hash := InternExpr(e) group, _ := m.exprs[hash] return group } @@ -151,7 +151,7 @@ func (m *Memo) MemoizeScalar(e sql.Expression) *ExprGroup { default: scalar = m.memoizeHidden(e) } - hash := internExpr(scalar.Scalar) + hash := InternExpr(scalar.Scalar) if hash != 0 { m.exprs[hash] = scalar } From ca729b04be5104ec40e4517b7a8768ad21d588af Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 20 Jul 2023 14:28:56 -0700 Subject: [PATCH 22/46] Use index for left table in sliding range join if available. --- sql/memo/exec_builder.go | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index d1ad7564e1..645838224e 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -179,23 +179,28 @@ func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, leftSch, rightSch sql. } func (b *ExecBuilder) buildSlidingRangeJoin(j *SlidingRangeJoin, input sql.Schema, children ...sql.Node) (sql.Node, error) { - // left, err := b.buildIndexScan(j.SlidingRange.LeftIndex, input, children[0]) - //if err != nil { - // return nil, err - //} leftSch := input[:len(input)-len(j.Right.RelProps.OutputCols())] rightSch := input[len(j.Left.RelProps.OutputCols()):] - sortExpr, err := b.buildScalar(*j.SlidingRange.ValueExpr, leftSch) - if err != nil { - return nil, err + var left sql.Node + var err error + if j.SlidingRange.LeftIndex != nil { + left, err = b.buildIndexScan(j.SlidingRange.LeftIndex, input, children[0]) + if err != nil { + return nil, err + } + } else { + sortExpr, err := b.buildScalar(*j.SlidingRange.ValueExpr, leftSch) + if err != nil { + return nil, err + } + sf := []sql.SortField{{ + Column: sortExpr, + Order: sql.Ascending, + NullOrdering: sql.NullsFirst, + }} + left = plan.NewSort(sf, children[0]) } - sf := []sql.SortField{{ - Column: sortExpr, - Order: sql.Ascending, - NullOrdering: sql.NullsFirst, - }} - left := plan.NewSort(sf, children[0]) right, err := b.buildSlidingRange(j.SlidingRange, leftSch, rightSch, children[1]) if err != nil { From b1eb7c780173d6552197afa7bcce2d4cb9ac1858 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 20 Jul 2023 15:52:45 -0700 Subject: [PATCH 23/46] Store in the SlidingRange iter whether its ranges are open or closed. --- sql/analyzer/indexed_joins.go | 18 ++++++++++-------- sql/memo/exec_builder.go | 10 +++++++++- sql/memo/memo.go | 18 ++++++++++-------- sql/plan/sliding_range.go | 36 +++++++++++++++++++---------------- 4 files changed, 49 insertions(+), 33 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index f53d54f009..dc445fc326 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -775,14 +775,16 @@ func addSlidingRangeJoin(m *memo.Memo) error { // iterjoin assumes that there's a filter condition. // rel.Filter = rel.Filter[1:] rel.SlidingRange = &memo.SlidingRange{ - LeftIndex: lIdx, - RightIndex: rIdx, - ValueExpr: &filter.value.Scalar, - MinExpr: &filter.min.Scalar, - ValueCol: valueColRef, - MinColRef: minColRef, - MaxColRef: maxColRef, - Parent: rel.JoinBase, + LeftIndex: lIdx, + RightIndex: rIdx, + ValueExpr: &filter.value.Scalar, + MinExpr: &filter.min.Scalar, + ValueCol: valueColRef, + MinColRef: minColRef, + MaxColRef: maxColRef, + Parent: rel.JoinBase, + RangeClosedOnLowerBound: filter.closedOnLowerBound, + RangeClosedOnUpperBound: filter.closedOnUpperBound, } rel.Op = rel.Op.AsSlidingRange() e.Group().Prepend(rel) diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index 645838224e..d1aa7dc48e 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -170,7 +170,15 @@ func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, leftSch, rightSch sql. if err != nil { return nil, err } - ret, err = plan.NewSlidingRange(childNode, leftSch, rightSch, sr.ValueCol.Gf.Name(), sr.MinColRef.Gf.Name(), sr.MaxColRef.Gf.Name()) + ret, err = plan.NewSlidingRange( + childNode, + leftSch, + rightSch, + sr.ValueCol.Gf.Name(), + sr.MinColRef.Gf.Name(), + sr.MaxColRef.Gf.Name(), + sr.RangeClosedOnLowerBound, + sr.RangeClosedOnUpperBound) } if err != nil { return nil, err diff --git a/sql/memo/memo.go b/sql/memo/memo.go index 02b94d6438..e81d61ab4d 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -951,14 +951,16 @@ type IndexScan struct { } type SlidingRange struct { - LeftIndex *IndexScan - RightIndex *IndexScan - ValueCol *ColRef - MinColRef *ColRef - MaxColRef *ColRef - MinExpr *ScalarExpr - ValueExpr *ScalarExpr - Parent *JoinBase + LeftIndex *IndexScan + RightIndex *IndexScan + ValueCol *ColRef + MinColRef *ColRef + MaxColRef *ColRef + MinExpr *ScalarExpr + ValueExpr *ScalarExpr + RangeClosedOnLowerBound bool + RangeClosedOnUpperBound bool + Parent *JoinBase } // splitConjunction_memo breaks AND expressions into their left and right parts, recursively diff --git a/sql/plan/sliding_range.go b/sql/plan/sliding_range.go index 7d536dfed9..9b81fed14d 100644 --- a/sql/plan/sliding_range.go +++ b/sql/plan/sliding_range.go @@ -28,13 +28,15 @@ import ( // don't overlap, the amortized complexity is O(1) for each result row. type SlidingRange struct { UnaryNode - childRowIter sql.RowIter - activeRanges priorityQueue - pendingRow sql.Row - valueColumnIndex int - minColumnIndex int - maxColumnIndex int - comparisonType sql.Type + childRowIter sql.RowIter + activeRanges priorityQueue + pendingRow sql.Row + valueColumnIndex int + minColumnIndex int + maxColumnIndex int + comparisonType sql.Type + rangeIsClosedBelow bool + rangeIsClosedAbove bool } type priorityQueue struct { @@ -45,16 +47,18 @@ type priorityQueue struct { var _ sql.Node = (*SlidingRange)(nil) -func NewSlidingRange(child sql.Node, lhsSchema sql.Schema, rhsSchema sql.Schema, value, min, max string) (*SlidingRange, error) { +func NewSlidingRange(child sql.Node, lhsSchema sql.Schema, rhsSchema sql.Schema, value, min, max string, rangeIsClosedBelow, rangeIsClosedAbove bool) (*SlidingRange, error) { // TODO: IndexOfColName is Only safe for schemas corresponding to a single table, where the source of the column is irrelevant. maxColumnIndex := rhsSchema.IndexOfColName(max) newSr := &SlidingRange{ - activeRanges: priorityQueue{}, - pendingRow: nil, - valueColumnIndex: lhsSchema.IndexOfColName(value), - minColumnIndex: rhsSchema.IndexOfColName(min), - maxColumnIndex: maxColumnIndex, - comparisonType: rhsSchema[maxColumnIndex].Type, + activeRanges: priorityQueue{}, + pendingRow: nil, + valueColumnIndex: lhsSchema.IndexOfColName(value), + minColumnIndex: rhsSchema.IndexOfColName(min), + maxColumnIndex: maxColumnIndex, + comparisonType: rhsSchema[maxColumnIndex].Type, + rangeIsClosedBelow: rangeIsClosedBelow, + rangeIsClosedAbove: rangeIsClosedAbove, } newSr.Child = child newSr.activeRanges.slidingRange = newSr @@ -104,7 +108,7 @@ func (s *SlidingRange) AcceptRow(ctx *sql.Context, row sql.Row) (sql.RowIter, er if err != nil { return nil, err } - if compareResult > 0 { + if (s.rangeIsClosedAbove && compareResult > 0) || (!s.rangeIsClosedAbove && compareResult >= 0) { heap.Pop(&s.activeRanges) } else { break @@ -119,7 +123,7 @@ func (s *SlidingRange) AcceptRow(ctx *sql.Context, row sql.Row) (sql.RowIter, er return nil, err } - if compareResult < 0 { + if (s.rangeIsClosedBelow && compareResult < 0) && (!s.rangeIsClosedBelow && compareResult <= 0) { break } else { heap.Push(&s.activeRanges, s.pendingRow) From 2bac676d78f7373ac976d46122cb16bb8c492623 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 20 Jul 2023 16:36:49 -0700 Subject: [PATCH 24/46] Add cost for SlidingRangeJoin --- sql/memo/coster.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/memo/coster.go b/sql/memo/coster.go index d9ea25d902..58c4116f4c 100644 --- a/sql/memo/coster.go +++ b/sql/memo/coster.go @@ -192,8 +192,9 @@ func (c *coster) costLookupJoin(_ *sql.Context, n *LookupJoin, _ sql.StatsReader } func (c *coster) costSlidingRangeJoin(_ *sql.Context, n *SlidingRangeJoin, _ sql.StatsReader) (float64, error) { - // For now always favor sliding range. - return 0, nil + l := n.Left.RelProps.card + r := n.Right.RelProps.card + return l*(cpuCostFactor+randIOCostFactor) + r*seqIOCostFactor, nil } func (c *coster) costLateralCrossJoin(ctx *sql.Context, n *LateralCrossJoin, _ sql.StatsReader) (float64, error) { From 1f785e9e4e949bf920fefd80bd786e2ab3725354 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 20 Jul 2023 17:29:49 -0700 Subject: [PATCH 25/46] Update inaccurate coster docstring. --- sql/memo/coster.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/memo/coster.go b/sql/memo/coster.go index 58c4116f4c..9e72ea57cd 100644 --- a/sql/memo/coster.go +++ b/sql/memo/coster.go @@ -194,6 +194,7 @@ func (c *coster) costLookupJoin(_ *sql.Context, n *LookupJoin, _ sql.StatsReader func (c *coster) costSlidingRangeJoin(_ *sql.Context, n *SlidingRangeJoin, _ sql.StatsReader) (float64, error) { l := n.Left.RelProps.card r := n.Right.RelProps.card + return l*(cpuCostFactor+randIOCostFactor) + r*seqIOCostFactor, nil } @@ -236,10 +237,9 @@ func (c *coster) costDistinct(_ *sql.Context, n *Distinct, _ sql.StatsReader) (f return n.Child.Cost * (cpuCostFactor + .75*memCostFactor), nil } -// lookupJoinSelectivity estimates the selectivity of a join condition. -// A join with no selectivity will return n x m rows. A join with a selectivity -// of 1 will return n rows. It is possible for join selectivity to be below 1 -// if source table filters limit the number of rows returned by the left table. +// lookupJoinSelectivity estimates the selectivity of a join condition with n lhs rows and m rhs rows. +// A join with a selectivity of k will return k*(n*m) rows. +// Special case: A join with a selectivity of 0 will return n rows. func lookupJoinSelectivity(l *Lookup) float64 { var sel float64 = 1 if len(l.Index.SqlIdx().Expressions()) == len(l.KeyExprs) { From 9acbd798e8d3c0a80afa3c92a1bf551158bca118 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Fri, 21 Jul 2023 12:14:38 -0700 Subject: [PATCH 26/46] Fix bug: A lookup with a non-null key for a NullSafeEq inadvertantly returns all non-null rows. --- sql/plan/indexed_table_access.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/plan/indexed_table_access.go b/sql/plan/indexed_table_access.go index b409367c7d..a9c4781172 100644 --- a/sql/plan/indexed_table_access.go +++ b/sql/plan/indexed_table_access.go @@ -493,7 +493,8 @@ func (lb *LookupBuilder) GetLookup(key lookupBuilderKey) (sql.IndexLookup, error lb.rang[i] = sql.NullRangeColumnExpr(lb.cets[i].Type) } else { - lb.rang[i] = sql.NotNullRangeColumnExpr(lb.cets[i].Type) + lb.rang[i].LowerBound = sql.Below{Key: key[i]} + lb.rang[i].UpperBound = sql.Above{Key: key[i]} } } else { lb.rang[i].LowerBound = sql.Below{Key: key[i]} From 88a710b32a97d3e9326e0296f2d22e1cc7e5f1e2 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Mon, 24 Jul 2023 17:57:43 -0700 Subject: [PATCH 27/46] Restrict sliding range candidates to those where the filter expressions are exactly column references. --- sql/analyzer/indexed_joins.go | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index dc445fc326..aa59423724 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -748,11 +748,22 @@ func addSlidingRangeJoin(m *memo.Memo) error { satisfiesScalarRefs(filter.max.Scalar, join.Right)) { return nil } - // TODO: Is this safe? If the expression references multiple columns, does this reference one - // arbitrarily? - valueColRef := getColumnRefFromScalar(filter.value.Scalar) - minColRef := getColumnRefFromScalar(filter.min.Scalar) - maxColRef := getColumnRefFromScalar(filter.max.Scalar) + // For now, only match expressions that are exactly a column reference. + // TODO: We may be able to match more complicated expressions if they meet the necessary criteria, such as: + // - References exactly one column + // - Is monotonically increasing + valueColRef, ok := filter.value.Scalar.(*memo.ColRef) + if !ok { + return nil + } + minColRef, ok := filter.min.Scalar.(*memo.ColRef) + if !ok { + return nil + } + maxColRef, ok := filter.max.Scalar.(*memo.ColRef) + if !ok { + return nil + } if valueColRef == nil || minColRef == nil || maxColRef == nil { return nil } From 3403ed986f136cf64662049563f8695954849b46 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Mon, 24 Jul 2023 18:12:32 -0700 Subject: [PATCH 28/46] Remove the TODO about removing filters if they're provably not required. Now that we can build RangeJoins over an index that matches multiple filters, the logic for detecting unneeded filters is no longer trivial. --- sql/analyzer/indexed_joins.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index aa59423724..d5abf3330b 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -781,10 +781,6 @@ func addSlidingRangeJoin(m *memo.Memo) error { rel := &memo.SlidingRangeJoin{ JoinBase: join.Copy(), } - // TODO: Remove the filter that was used to create the sliding range because it's no longer - // necessary to evaluate. However, removing this can cause issues if it's the only filter because - // iterjoin assumes that there's a filter condition. - // rel.Filter = rel.Filter[1:] rel.SlidingRange = &memo.SlidingRange{ LeftIndex: lIdx, RightIndex: rIdx, From 19bfbf134dd54dd2606c9cb8edb651c02b11f5cb Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 25 Jul 2023 12:15:29 -0700 Subject: [PATCH 29/46] Rename SlidingJoin to RangeHeap --- enginetest/join_op_tests.go | 12 ++--- enginetest/join_planning_tests.go | 20 ++++----- optgen/cmd/source/memo.yaml | 4 +- sql/analyzer/indexed_joins.go | 10 ++--- sql/memo/coster.go | 16 +++---- sql/memo/exec_builder.go | 24 +++++----- sql/memo/memo.go | 10 ++--- sql/memo/memo.og.go | 18 ++++---- sql/plan/join.go | 14 +++--- sql/plan/jointype_string.go | 6 +-- sql/plan/{sliding_range.go => range_heap.go} | 46 ++++++++++---------- sql/rowexec/node_builder.gen.go | 4 +- sql/rowexec/rel.go | 4 +- 13 files changed, 94 insertions(+), 94 deletions(-) rename sql/plan/{sliding_range.go => range_heap.go} (76%) diff --git a/enginetest/join_op_tests.go b/enginetest/join_op_tests.go index 1d8f8bf60b..95dc758242 100644 --- a/enginetest/join_op_tests.go +++ b/enginetest/join_op_tests.go @@ -31,12 +31,12 @@ type JoinOpTests struct { } var biasedCosters = map[string]memo.Coster{ - "inner": memo.NewInnerBiasedCoster(), - "lookup": memo.NewLookupBiasedCoster(), - "hash": memo.NewHashBiasedCoster(), - "merge": memo.NewMergeBiasedCoster(), - "partial": memo.NewPartialBiasedCoster(), - "slidingRange": memo.NewSlidingRangeBiasedCoster(), + "inner": memo.NewInnerBiasedCoster(), + "lookup": memo.NewLookupBiasedCoster(), + "hash": memo.NewHashBiasedCoster(), + "merge": memo.NewMergeBiasedCoster(), + "partial": memo.NewPartialBiasedCoster(), + "rangeHeap": memo.NewRangeHeapBiasedCoster(), } func TestJoinOps(t *testing.T, harness Harness) { diff --git a/enginetest/join_planning_tests.go b/enginetest/join_planning_tests.go index db2b9f869d..850a71d43b 100644 --- a/enginetest/join_planning_tests.go +++ b/enginetest/join_planning_tests.go @@ -926,7 +926,7 @@ join uv d on d.u = c.x`, tests: []JoinPlanTest{ { q: "select * from vals join ranges on val between min and max", - types: []plan.JoinType{plan.JoinTypeSlidingRange}, + types: []plan.JoinType{plan.JoinTypeRangeHeap}, exp: []sql.Row{ {0, 0, 2}, {1, 0, 2}, @@ -947,7 +947,7 @@ join uv d on d.u = c.x`, }, { q: "select * from vals join ranges on val > min and val < max", - types: []plan.JoinType{plan.JoinTypeSlidingRange}, + types: []plan.JoinType{plan.JoinTypeRangeHeap}, exp: []sql.Row{ {1, 0, 2}, {2, 1, 3}, @@ -958,7 +958,7 @@ join uv d on d.u = c.x`, }, { q: "select * from vals join ranges on val >= min and val < max", - types: []plan.JoinType{plan.JoinTypeSlidingRange}, + types: []plan.JoinType{plan.JoinTypeRangeHeap}, exp: []sql.Row{ {0, 0, 2}, {1, 0, 2}, @@ -974,7 +974,7 @@ join uv d on d.u = c.x`, }, { q: "select * from vals join ranges on val > min and val <= max", - types: []plan.JoinType{plan.JoinTypeSlidingRange}, + types: []plan.JoinType{plan.JoinTypeRangeHeap}, exp: []sql.Row{ {1, 0, 2}, {2, 0, 2}, @@ -990,7 +990,7 @@ join uv d on d.u = c.x`, }, { q: "select * from vals join ranges on val >= min and val <= max", - types: []plan.JoinType{plan.JoinTypeSlidingRange}, + types: []plan.JoinType{plan.JoinTypeRangeHeap}, exp: []sql.Row{ {0, 0, 2}, {1, 0, 2}, @@ -1022,7 +1022,7 @@ join uv d on d.u = c.x`, tests: []JoinPlanTest{ { q: "select * from vals join ranges on val between min and max", - types: []plan.JoinType{plan.JoinTypeSlidingRange}, + types: []plan.JoinType{plan.JoinTypeRangeHeap}, exp: []sql.Row{ {0, 0, 2}, {1, 0, 2}, @@ -1043,7 +1043,7 @@ join uv d on d.u = c.x`, }, { q: "select * from vals join ranges on val > min and val < max", - types: []plan.JoinType{plan.JoinTypeSlidingRange}, + types: []plan.JoinType{plan.JoinTypeRangeHeap}, exp: []sql.Row{ {1, 0, 2}, {2, 1, 3}, @@ -1054,7 +1054,7 @@ join uv d on d.u = c.x`, }, { q: "select * from vals join ranges on val >= min and val < max", - types: []plan.JoinType{plan.JoinTypeSlidingRange}, + types: []plan.JoinType{plan.JoinTypeRangeHeap}, exp: []sql.Row{ {0, 0, 2}, {1, 0, 2}, @@ -1070,7 +1070,7 @@ join uv d on d.u = c.x`, }, { q: "select * from vals join ranges on val > min and val <= max", - types: []plan.JoinType{plan.JoinTypeSlidingRange}, + types: []plan.JoinType{plan.JoinTypeRangeHeap}, exp: []sql.Row{ {1, 0, 2}, {2, 0, 2}, @@ -1086,7 +1086,7 @@ join uv d on d.u = c.x`, }, { q: "select * from vals join ranges on val >= min and val <= max", - types: []plan.JoinType{plan.JoinTypeSlidingRange}, + types: []plan.JoinType{plan.JoinTypeRangeHeap}, exp: []sql.Row{ {0, 0, 2}, {1, 0, 2}, diff --git a/optgen/cmd/source/memo.yaml b/optgen/cmd/source/memo.yaml index f2bf79857c..e3117c2f50 100644 --- a/optgen/cmd/source/memo.yaml +++ b/optgen/cmd/source/memo.yaml @@ -13,10 +13,10 @@ exprs: join: true attrs: - [lookup, "*Lookup"] -- name: "SlidingRangeJoin" +- name: "RangeHeapJoin" join: true attrs: - - [slidingRange, "*SlidingRange"] + - [rangeHeap, "*RangeHeap"] - name: "ConcatJoin" join: true attrs: diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index d5abf3330b..36dd47c9f2 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -201,7 +201,7 @@ func replanJoin(ctx *sql.Context, n *plan.JoinNode, a *Analyzer, scope *plan.Sco if err != nil { return nil, err } - err = addSlidingRangeJoin(m) + err = addRangeHeapJoin(m) if err != nil { return nil, err } @@ -728,7 +728,7 @@ func getRangeFilters(filters []memo.ScalarExpr) (ranges []rangeFilter) { return ranges } -func addSlidingRangeJoin(m *memo.Memo) error { +func addRangeHeapJoin(m *memo.Memo) error { return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { switch e.(type) { case *memo.InnerJoin, *memo.LeftJoin: @@ -778,10 +778,10 @@ func addSlidingRangeJoin(m *memo.Memo) error { rightIndexScans = []*memo.IndexScan{nil} } for _, rIdx := range rightIndexScans { - rel := &memo.SlidingRangeJoin{ + rel := &memo.RangeHeapJoin{ JoinBase: join.Copy(), } - rel.SlidingRange = &memo.SlidingRange{ + rel.RangeHeap = &memo.RangeHeap{ LeftIndex: lIdx, RightIndex: rIdx, ValueExpr: &filter.value.Scalar, @@ -793,7 +793,7 @@ func addSlidingRangeJoin(m *memo.Memo) error { RangeClosedOnLowerBound: filter.closedOnLowerBound, RangeClosedOnUpperBound: filter.closedOnUpperBound, } - rel.Op = rel.Op.AsSlidingRange() + rel.Op = rel.Op.AsRangeHeap() e.Group().Prepend(rel) } } diff --git a/sql/memo/coster.go b/sql/memo/coster.go index 9e72ea57cd..75213a3bd4 100644 --- a/sql/memo/coster.go +++ b/sql/memo/coster.go @@ -71,8 +71,8 @@ func (c *coster) costRel(ctx *sql.Context, n RelExpr, s sql.StatsReader) (float6 return c.costMergeJoin(ctx, n, s) case *LookupJoin: return c.costLookupJoin(ctx, n, s) - case *SlidingRangeJoin: - return c.costSlidingRangeJoin(ctx, n, s) + case *RangeHeapJoin: + return c.costRangeHeapJoin(ctx, n, s) case *LateralCrossJoin: return c.costLateralCrossJoin(ctx, n, s) case *LateralInnerJoin: @@ -191,7 +191,7 @@ func (c *coster) costLookupJoin(_ *sql.Context, n *LookupJoin, _ sql.StatsReader return l*r*sel*(cpuCostFactor+randIOCostFactor) - r*seqIOCostFactor, nil } -func (c *coster) costSlidingRangeJoin(_ *sql.Context, n *SlidingRangeJoin, _ sql.StatsReader) (float64, error) { +func (c *coster) costRangeHeapJoin(_ *sql.Context, n *RangeHeapJoin, _ sql.StatsReader) (float64, error) { l := n.Left.RelProps.card r := n.Right.RelProps.card @@ -524,17 +524,17 @@ func (c *partialBiasedCoster) EstimateCost(ctx *sql.Context, r RelExpr, s sql.St } } -type slidingRangeBiasedCoster struct { +type rangeHeapBiasedCoster struct { *coster } -func NewSlidingRangeBiasedCoster() Coster { - return &slidingRangeBiasedCoster{coster: &coster{}} +func NewRangeHeapBiasedCoster() Coster { + return &rangeHeapBiasedCoster{coster: &coster{}} } -func (c *slidingRangeBiasedCoster) EstimateCost(ctx *sql.Context, r RelExpr, s sql.StatsReader) (float64, error) { +func (c *rangeHeapBiasedCoster) EstimateCost(ctx *sql.Context, r RelExpr, s sql.StatsReader) (float64, error) { switch r.(type) { - case *SlidingRangeJoin: + case *RangeHeapJoin: return -biasFactor, nil default: return c.costRel(ctx, r, s) diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index d1aa7dc48e..c662542ab3 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -133,22 +133,22 @@ func (b *ExecBuilder) buildLookupJoin(j *LookupJoin, input sql.Schema, children return plan.NewJoin(left, right, j.Op, filters).WithScopeLen(j.g.m.scopeLen), nil } -func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, leftSch, rightSch sql.Schema, children ...sql.Node) (ret sql.Node, err error) { +func (b *ExecBuilder) buildRangeHeap(sr *RangeHeap, leftSch, rightSch sql.Schema, children ...sql.Node) (ret sql.Node, err error) { switch n := children[0].(type) { case *plan.TableAlias: - ret, err = b.buildSlidingRange(sr, leftSch, rightSch, n.Child) + ret, err = b.buildRangeHeap(sr, leftSch, rightSch, n.Child) ret = plan.NewTableAlias(n.Name(), ret) case *plan.Distinct: - ret, err = b.buildSlidingRange(sr, leftSch, rightSch, n.Child) + ret, err = b.buildRangeHeap(sr, leftSch, rightSch, n.Child) ret = plan.NewDistinct(ret) case *plan.Filter: - ret, err = b.buildSlidingRange(sr, leftSch, rightSch, n.Child) + ret, err = b.buildRangeHeap(sr, leftSch, rightSch, n.Child) ret = plan.NewFilter(n.Expression, ret) case *plan.Project: - ret, err = b.buildSlidingRange(sr, leftSch, rightSch, n.Child) + ret, err = b.buildRangeHeap(sr, leftSch, rightSch, n.Child) ret = plan.NewProject(n.Projections, ret) case *plan.Limit: - ret, err = b.buildSlidingRange(sr, leftSch, rightSch, n.Child) + ret, err = b.buildRangeHeap(sr, leftSch, rightSch, n.Child) ret = plan.NewLimit(n.Limit, ret) default: var childNode sql.Node @@ -170,7 +170,7 @@ func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, leftSch, rightSch sql. if err != nil { return nil, err } - ret, err = plan.NewSlidingRange( + ret, err = plan.NewRangeHeap( childNode, leftSch, rightSch, @@ -186,19 +186,19 @@ func (b *ExecBuilder) buildSlidingRange(sr *SlidingRange, leftSch, rightSch sql. return ret, nil } -func (b *ExecBuilder) buildSlidingRangeJoin(j *SlidingRangeJoin, input sql.Schema, children ...sql.Node) (sql.Node, error) { +func (b *ExecBuilder) buildRangeHeapJoin(j *RangeHeapJoin, input sql.Schema, children ...sql.Node) (sql.Node, error) { leftSch := input[:len(input)-len(j.Right.RelProps.OutputCols())] rightSch := input[len(j.Left.RelProps.OutputCols()):] var left sql.Node var err error - if j.SlidingRange.LeftIndex != nil { - left, err = b.buildIndexScan(j.SlidingRange.LeftIndex, input, children[0]) + if j.RangeHeap.LeftIndex != nil { + left, err = b.buildIndexScan(j.RangeHeap.LeftIndex, input, children[0]) if err != nil { return nil, err } } else { - sortExpr, err := b.buildScalar(*j.SlidingRange.ValueExpr, leftSch) + sortExpr, err := b.buildScalar(*j.RangeHeap.ValueExpr, leftSch) if err != nil { return nil, err } @@ -210,7 +210,7 @@ func (b *ExecBuilder) buildSlidingRangeJoin(j *SlidingRangeJoin, input sql.Schem left = plan.NewSort(sf, children[0]) } - right, err := b.buildSlidingRange(j.SlidingRange, leftSch, rightSch, children[1]) + right, err := b.buildRangeHeap(j.RangeHeap, leftSch, rightSch, children[1]) if err != nil { return nil, err } diff --git a/sql/memo/memo.go b/sql/memo/memo.go index e81d61ab4d..b87552974a 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -397,8 +397,8 @@ func (m *Memo) MemoizeLookupJoin(grp, left, right *ExprGroup, op plan.JoinType, return grp } -func (m *Memo) MemoizeSlidingRangeJoin(grp, left, right *ExprGroup, op plan.JoinType, filter []ScalarExpr, slidingRange *SlidingRange) *ExprGroup { - newJoin := &SlidingRangeJoin{ +func (m *Memo) MemoizeRangeHeapJoin(grp, left, right *ExprGroup, op plan.JoinType, filter []ScalarExpr, rangeHeap *RangeHeap) *ExprGroup { + newJoin := &RangeHeapJoin{ JoinBase: &JoinBase{ relBase: &relBase{}, Left: left, @@ -406,9 +406,9 @@ func (m *Memo) MemoizeSlidingRangeJoin(grp, left, right *ExprGroup, op plan.Join Op: op, Filter: filter, }, - SlidingRange: slidingRange, + RangeHeap: rangeHeap, } - newJoin.SlidingRange.Parent = newJoin.JoinBase + newJoin.RangeHeap.Parent = newJoin.JoinBase if grp == nil { return m.NewExprGroup(newJoin) @@ -950,7 +950,7 @@ type IndexScan struct { Parent *JoinBase } -type SlidingRange struct { +type RangeHeap struct { LeftIndex *IndexScan RightIndex *IndexScan ValueCol *ColRef diff --git a/sql/memo/memo.og.go b/sql/memo/memo.og.go index c66e74e247..b9adaead35 100644 --- a/sql/memo/memo.og.go +++ b/sql/memo/memo.og.go @@ -102,19 +102,19 @@ func (r *LookupJoin) JoinPrivate() *JoinBase { return r.JoinBase } -type SlidingRangeJoin struct { +type RangeHeapJoin struct { *JoinBase - SlidingRange *SlidingRange + RangeHeap *RangeHeap } -var _ RelExpr = (*SlidingRangeJoin)(nil) -var _ JoinRel = (*SlidingRangeJoin)(nil) +var _ RelExpr = (*RangeHeapJoin)(nil) +var _ JoinRel = (*RangeHeapJoin)(nil) -func (r *SlidingRangeJoin) String() string { +func (r *RangeHeapJoin) String() string { return FormatExpr(r) } -func (r *SlidingRangeJoin) JoinPrivate() *JoinBase { +func (r *RangeHeapJoin) JoinPrivate() *JoinBase { return r.JoinBase } @@ -947,7 +947,7 @@ func FormatExpr(r exprType) string { return fmt.Sprintf("antijoin %d %d", r.Left.Id, r.Right.Id) case *LookupJoin: return fmt.Sprintf("lookupjoin %d %d", r.Left.Id, r.Right.Id) - case *SlidingRangeJoin: + case *RangeHeapJoin: return fmt.Sprintf("slidingrangejoin %d %d", r.Left.Id, r.Right.Id) case *ConcatJoin: return fmt.Sprintf("concatjoin %d %d", r.Left.Id, r.Right.Id) @@ -1051,8 +1051,8 @@ func buildRelExpr(b *ExecBuilder, r RelExpr, input sql.Schema, children ...sql.N result, err = b.buildAntiJoin(r, input, children...) case *LookupJoin: result, err = b.buildLookupJoin(r, input, children...) - case *SlidingRangeJoin: - result, err = b.buildSlidingRangeJoin(r, input, children...) + case *RangeHeapJoin: + result, err = b.buildRangeHeapJoin(r, input, children...) case *ConcatJoin: result, err = b.buildConcatJoin(r, input, children...) case *HashJoin: diff --git a/sql/plan/join.go b/sql/plan/join.go index 893bb5803f..cf6cade7df 100644 --- a/sql/plan/join.go +++ b/sql/plan/join.go @@ -45,8 +45,8 @@ const ( JoinTypeLeftOuterHashExcludeNulls // LeftOuterHashJoinExcludeNulls JoinTypeMerge // MergeJoin JoinTypeLeftOuterMerge // LeftOuterMergeJoin - JoinTypeSlidingRange // SlidingRangeJoin - JoinTypeLeftOuterSlidingRange // LeftOuterSlidingRangeJoin + JoinTypeRangeHeap // RangeHeapJoin + JoinTypeLeftOuterRangeHeap // LeftOuterRangeHeapJoin JoinTypeSemiHash // SemiHashJoin JoinTypeAntiHash // AntiHashJoin JoinTypeSemiLookup // SemiLookupJoin @@ -63,7 +63,7 @@ const ( func (i JoinType) IsLeftOuter() bool { switch i { - case JoinTypeLeftOuter, JoinTypeLeftOuterExcludeNulls, JoinTypeLeftOuterLookup, JoinTypeLeftOuterHash, JoinTypeLeftOuterHashExcludeNulls, JoinTypeLeftOuterMerge, JoinTypeLeftOuterSlidingRange: + case JoinTypeLeftOuter, JoinTypeLeftOuterExcludeNulls, JoinTypeLeftOuterLookup, JoinTypeLeftOuterHash, JoinTypeLeftOuterHashExcludeNulls, JoinTypeLeftOuterMerge, JoinTypeLeftOuterRangeHeap: return true default: return false @@ -95,7 +95,7 @@ func (i JoinType) IsPhysical() bool { JoinTypeSemiLookup, JoinTypeSemiMerge, JoinTypeSemiHash, JoinTypeHash, JoinTypeLeftOuterHash, JoinTypeLeftOuterHashExcludeNulls, JoinTypeMerge, JoinTypeLeftOuterMerge, - JoinTypeAntiLookup, JoinTypeAntiMerge, JoinTypeAntiHash, JoinTypeSlidingRange, JoinTypeLeftOuterSlidingRange: + JoinTypeAntiLookup, JoinTypeAntiMerge, JoinTypeAntiHash, JoinTypeRangeHeap, JoinTypeLeftOuterRangeHeap: return true default: return false @@ -211,12 +211,12 @@ func (i JoinType) AsHash() JoinType { } } -func (i JoinType) AsSlidingRange() JoinType { +func (i JoinType) AsRangeHeap() JoinType { switch i { case JoinTypeInner: - return JoinTypeSlidingRange + return JoinTypeRangeHeap case JoinTypeLeftOuter: - return JoinTypeLeftOuterSlidingRange + return JoinTypeLeftOuterRangeHeap default: return i } diff --git a/sql/plan/jointype_string.go b/sql/plan/jointype_string.go index 3648d8eec8..4863a9d8e2 100644 --- a/sql/plan/jointype_string.go +++ b/sql/plan/jointype_string.go @@ -26,8 +26,8 @@ func _() { _ = x[JoinTypeLeftOuterHashExcludeNulls-15] _ = x[JoinTypeMerge-16] _ = x[JoinTypeLeftOuterMerge-17] - _ = x[JoinTypeSlidingRange-18] - _ = x[JoinTypeLeftOuterSlidingRange-19] + _ = x[JoinTypeRangeHeap-18] + _ = x[JoinTypeLeftOuterRangeHeap-19] _ = x[JoinTypeSemiHash-20] _ = x[JoinTypeAntiHash-21] _ = x[JoinTypeSemiLookup-22] @@ -41,7 +41,7 @@ func _() { _ = x[JoinTypeLateralRight-30] } -const _JoinType_name = "UnknownJoinCrossJoinCrossHashJoinInnerJoinSemiJoinAntiJoinLeftOuterJoinLeftOuterJoinExcludingNullsFullOuterJoinGroupByJoinRightJoinLookupJoinLeftOuterLookupJoinHashJoinLeftOuterHashJoinLeftOuterHashJoinExcludeNullsMergeJoinLeftOuterMergeJoinSlidingRangeJoinLeftOuterSlidingRangeJoinSemiHashJoinAntiHashJoinSemiLookupJoinAntiLookupJoinSemiMergeJoinAntiMergeJoinNaturalJoinLateralCrossJoinLateralInnerJoinLateralLeftJoinLateralLeftJoin" +const _JoinType_name = "UnknownJoinCrossJoinCrossHashJoinInnerJoinSemiJoinAntiJoinLeftOuterJoinLeftOuterJoinExcludingNullsFullOuterJoinGroupByJoinRightJoinLookupJoinLeftOuterLookupJoinHashJoinLeftOuterHashJoinLeftOuterHashJoinExcludeNullsMergeJoinLeftOuterMergeJoinRangeHeapJoinLeftOuterRangeHeapJoinSemiHashJoinAntiHashJoinSemiLookupJoinAntiLookupJoinSemiMergeJoinAntiMergeJoinNaturalJoinLateralCrossJoinLateralInnerJoinLateralLeftJoinLateralLeftJoin" var _JoinType_index = [...]uint16{0, 11, 20, 33, 42, 50, 58, 71, 98, 111, 122, 131, 141, 160, 168, 185, 214, 223, 241, 257, 282, 294, 306, 320, 334, 347, 360, 371, 387, 403, 418, 433} diff --git a/sql/plan/sliding_range.go b/sql/plan/range_heap.go similarity index 76% rename from sql/plan/sliding_range.go rename to sql/plan/range_heap.go index 9b81fed14d..22e6973ac8 100644 --- a/sql/plan/sliding_range.go +++ b/sql/plan/range_heap.go @@ -23,10 +23,10 @@ import ( "github.com/dolthub/go-mysql-server/sql" ) -// SlidingRange is a Node that wraps a table with min and max range columns. When used as a secondary provider in Join +// RangeHeap is a Node that wraps a table with min and max range columns. When used as a secondary provider in Join // operations, it can efficiently compute the rows whose ranges bound the value from the other table. When the ranges // don't overlap, the amortized complexity is O(1) for each result row. -type SlidingRange struct { +type RangeHeap struct { UnaryNode childRowIter sql.RowIter activeRanges priorityQueue @@ -40,17 +40,17 @@ type SlidingRange struct { } type priorityQueue struct { - slidingRange *SlidingRange - rows []sql.Row - err error + rangeHeap *RangeHeap + rows []sql.Row + err error } -var _ sql.Node = (*SlidingRange)(nil) +var _ sql.Node = (*RangeHeap)(nil) -func NewSlidingRange(child sql.Node, lhsSchema sql.Schema, rhsSchema sql.Schema, value, min, max string, rangeIsClosedBelow, rangeIsClosedAbove bool) (*SlidingRange, error) { +func NewRangeHeap(child sql.Node, lhsSchema sql.Schema, rhsSchema sql.Schema, value, min, max string, rangeIsClosedBelow, rangeIsClosedAbove bool) (*RangeHeap, error) { // TODO: IndexOfColName is Only safe for schemas corresponding to a single table, where the source of the column is irrelevant. maxColumnIndex := rhsSchema.IndexOfColName(max) - newSr := &SlidingRange{ + newSr := &RangeHeap{ activeRanges: priorityQueue{}, pendingRow: nil, valueColumnIndex: lhsSchema.IndexOfColName(value), @@ -61,15 +61,15 @@ func NewSlidingRange(child sql.Node, lhsSchema sql.Schema, rhsSchema sql.Schema, rangeIsClosedAbove: rangeIsClosedAbove, } newSr.Child = child - newSr.activeRanges.slidingRange = newSr + newSr.activeRanges.rangeHeap = newSr return newSr, nil } -func (s *SlidingRange) String() string { +func (s *RangeHeap) String() string { return s.Child.String() } -func (s *SlidingRange) WithChildren(children ...sql.Node) (sql.Node, error) { +func (s *RangeHeap) WithChildren(children ...sql.Node) (sql.Node, error) { if len(children) != 1 { return nil, fmt.Errorf("ds") } @@ -79,28 +79,28 @@ func (s *SlidingRange) WithChildren(children ...sql.Node) (sql.Node, error) { return &s2, nil } -func (s *SlidingRange) CheckPrivileges(ctx *sql.Context, opChecker sql.PrivilegedOperationChecker) bool { +func (s *RangeHeap) CheckPrivileges(ctx *sql.Context, opChecker sql.PrivilegedOperationChecker) bool { return s.Child.CheckPrivileges(ctx, opChecker) } -var _ sql.Node = (*SlidingRange)(nil) +var _ sql.Node = (*RangeHeap)(nil) -func (s *SlidingRange) Initialize(ctx *sql.Context, childRowIter sql.RowIter) (err error) { +func (s *RangeHeap) Initialize(ctx *sql.Context, childRowIter sql.RowIter) (err error) { s.childRowIter = childRowIter s.activeRanges = priorityQueue{ - slidingRange: s, - rows: nil, - err: nil, + rangeHeap: s, + rows: nil, + err: nil, } s.pendingRow, err = childRowIter.Next(ctx) return err } -func (s *SlidingRange) IsInitialized() bool { +func (s *RangeHeap) IsInitialized() bool { return s.childRowIter != nil } -func (s *SlidingRange) AcceptRow(ctx *sql.Context, row sql.Row) (sql.RowIter, error) { +func (s *RangeHeap) AcceptRow(ctx *sql.Context, row sql.Row) (sql.RowIter, error) { // Remove rows from the heap if we've advanced beyond their max value. for s.activeRanges.Len() > 0 { maxValue := s.activeRanges.Peek() @@ -147,8 +147,8 @@ func (s *SlidingRange) AcceptRow(ctx *sql.Context, row sql.Row) (sql.RowIter, er func (pq priorityQueue) Len() int { return len(pq.rows) } func (pq *priorityQueue) Less(i, j int) bool { - lhs := pq.rows[i][pq.slidingRange.maxColumnIndex] - rhs := pq.rows[j][pq.slidingRange.maxColumnIndex] + lhs := pq.rows[i][pq.rangeHeap.maxColumnIndex] + rhs := pq.rows[j][pq.rangeHeap.maxColumnIndex] // compareResult will be 0 if lhs==rhs, -1 if lhs < rhs, and +1 if lhs > rhs. compareResult, err := pq.SortedType().Compare(lhs, rhs) if pq.err == nil && err != nil { @@ -175,9 +175,9 @@ func (pq *priorityQueue) Pop() any { func (pq *priorityQueue) Peek() interface{} { n := len(pq.rows) - return pq.rows[n-1][pq.slidingRange.maxColumnIndex] + return pq.rows[n-1][pq.rangeHeap.maxColumnIndex] } func (pq *priorityQueue) SortedType() sql.Type { - return pq.slidingRange.comparisonType + return pq.rangeHeap.comparisonType } diff --git a/sql/rowexec/node_builder.gen.go b/sql/rowexec/node_builder.gen.go index 663de0b1e1..77fff1e891 100644 --- a/sql/rowexec/node_builder.gen.go +++ b/sql/rowexec/node_builder.gen.go @@ -360,8 +360,8 @@ func (b *BaseBuilder) buildNodeExec(ctx *sql.Context, n sql.Node, row sql.Row) ( return n.RowIter(ctx, row) case *plan.CreateSpatialRefSys: return b.buildCreateSpatialRefSys(ctx, n, row) - case *plan.SlidingRange: - return b.buildSlidingRange(ctx, n, row) + case *plan.RangeHeap: + return b.buildRangeHeap(ctx, n, row) default: return nil, fmt.Errorf("exec builder found unknown Node type %T", n) } diff --git a/sql/rowexec/rel.go b/sql/rowexec/rel.go index e53a703740..dd427ec97a 100644 --- a/sql/rowexec/rel.go +++ b/sql/rowexec/rel.go @@ -735,9 +735,9 @@ func (b *BaseBuilder) buildTableCount(_ *sql.Context, n *plan.TableCountLookup, return sql.RowsToRowIter(sql.Row{int64(n.Count())}), nil } -func (b *BaseBuilder) buildSlidingRange(ctx *sql.Context, n *plan.SlidingRange, row sql.Row) (sql.RowIter, error) { +func (b *BaseBuilder) buildRangeHeap(ctx *sql.Context, n *plan.RangeHeap, row sql.Row) (sql.RowIter, error) { // The first time, initialize the child rowIter and the heap - span, ctx := ctx.Span("plan.SlidingRange") + span, ctx := ctx.Span("plan.RangeHeap") if !n.IsInitialized() { i, err := b.buildNodeExec(ctx, n.Child, row) if err != nil { From 57784d6e03b59d25253e31bcc5d514d204a2f2cd Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 25 Jul 2023 14:54:20 -0700 Subject: [PATCH 30/46] Add docstring and examples to `getRangeFilters` and `getColumnRefFromScalar` Also fix the typo that the examples uncovered. --- sql/analyzer/indexed_joins.go | 6 +- sql/analyzer/range_filter_test.go | 138 ++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 sql/analyzer/range_filter_test.go diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 36dd47c9f2..9ee96ba805 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -660,6 +660,9 @@ type rangeFilter struct { closedOnLowerBound, closedOnUpperBound bool } +// getRangeFilters takes the filter expressions on a join and identifies "ranges" where a given expression +// is constrained between two other expressions. (For instance, detecting "x > 5" and "x <= 10" and creating a range +// object representing "5 < x <= 10". See range_filter_test.go for examples. func getRangeFilters(filters []memo.ScalarExpr) (ranges []rangeFilter) { type candidateMap struct { group *memo.ExprGroup @@ -722,7 +725,7 @@ func getRangeFilters(filters []memo.ScalarExpr) (ranges []rangeFilter) { case *memo.Leq: findLowerBounds(f.Left, f.Right, true) findUpperBounds(f.Right, f.Left, true) - addBounds(f.Left, f.Right, false) + addBounds(f.Left, f.Right, true) } } return ranges @@ -809,6 +812,7 @@ func satisfiesScalarRefs(e memo.ScalarExpr, grp *memo.ExprGroup) bool { return e.Group().ScalarProps().Tables.Difference(grp.RelProps.OutputTables()).Len() == 0 } +// getColumnRefFromScalar returns the first column reference used in a scalar expression. func getColumnRefFromScalar(s memo.ScalarExpr) *memo.ColRef { var result *memo.ColRef memo.DfsScalar(s, func(e memo.ScalarExpr) (err error) { diff --git a/sql/analyzer/range_filter_test.go b/sql/analyzer/range_filter_test.go new file mode 100644 index 0000000000..739879b0e2 --- /dev/null +++ b/sql/analyzer/range_filter_test.go @@ -0,0 +1,138 @@ +package analyzer + +import ( + "github.com/dolthub/go-mysql-server/sql/memo" + "github.com/dolthub/go-mysql-server/sql/types" + "github.com/stretchr/testify/require" + "testing" +) + +func variable(name string) *memo.ExprGroup { + return &memo.ExprGroup{Scalar: &memo.Bindvar{ + Name: name, + Typ: types.Int8, + }} +} + +func literal(value int) *memo.ExprGroup { + return &memo.ExprGroup{Scalar: &memo.Literal{ + Val: value, + Typ: types.Int8, + }} +} + +func TestHintParsing(t *testing.T) { + tests := []struct { + name string + filterExpressions []memo.ScalarExpr + expectedRanges []rangeFilter + }{ + { + name: "simple Between test", + filterExpressions: []memo.ScalarExpr{ + &memo.Between{ + Value: variable("x"), + Min: literal(0), + Max: literal(10), + }, + }, + expectedRanges: []rangeFilter{ + { + value: variable("x"), + min: literal(0), + max: literal(10), + closedOnLowerBound: true, + closedOnUpperBound: true, + }, + }, + }, + { + name: "simple less than / greater than test", + filterExpressions: []memo.ScalarExpr{ + &memo.Gt{ + Left: variable("y"), + Right: literal(11), + }, + &memo.Lt{ + Left: variable("y"), + Right: literal(20), + }, + }, + expectedRanges: []rangeFilter{ + { + value: variable("y"), + min: literal(11), + max: literal(20), + closedOnLowerBound: false, + closedOnUpperBound: false, + }, + }, + }, + { + name: "simple greater than / greater than test", + filterExpressions: []memo.ScalarExpr{ + &memo.Geq{ + Left: variable("z"), + Right: literal(21), + }, + &memo.Geq{ + Left: literal(30), + Right: variable("z"), + }, + }, + expectedRanges: []rangeFilter{ + { + value: variable("z"), + min: literal(21), + max: literal(30), + closedOnLowerBound: true, + closedOnUpperBound: true, + }, + }, + }, + { + name: "multiple ranges", + filterExpressions: []memo.ScalarExpr{ + &memo.Leq{ + Left: variable("a"), + Right: literal(40), + }, + &memo.Lt{ + Left: literal(31), + Right: variable("a"), + }, + &memo.Lt{ + Left: variable("b"), + Right: literal(50), + }, + &memo.Leq{ + Left: literal(41), + Right: variable("b"), + }, + }, + expectedRanges: []rangeFilter{ + { + value: variable("a"), + min: literal(31), + max: literal(40), + closedOnLowerBound: false, + closedOnUpperBound: true, + }, + { + value: variable("b"), + min: literal(41), + max: literal(50), + closedOnLowerBound: true, + closedOnUpperBound: false, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + res := getRangeFilters(tt.filterExpressions) + require.ElementsMatch(t, tt.expectedRanges, res) + }) + } +} From 72836c58bd1dfac05093d4eb5ec917d355fa448c Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 25 Jul 2023 16:39:48 -0700 Subject: [PATCH 31/46] Remove unnecessary check. --- sql/analyzer/indexed_joins.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 9ee96ba805..8a102ccdc3 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -731,6 +731,8 @@ func getRangeFilters(filters []memo.ScalarExpr) (ranges []rangeFilter) { return ranges } +// addRangeHeapJoin checks whether the join can be implemented as a RangeHeap, and if so, adds a memo.RangeHeap plan +// to the memo. func addRangeHeapJoin(m *memo.Memo) error { return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { switch e.(type) { @@ -767,9 +769,6 @@ func addRangeHeapJoin(m *memo.Memo) error { if !ok { return nil } - if valueColRef == nil || minColRef == nil || maxColRef == nil { - return nil - } leftIndexScans := sortedIndexScansForTableCol(lIndexes, valueColRef, join.Left.RelProps.FuncDeps().Constants(), lFilters) if leftIndexScans == nil { From 9dc95ecbfd5924249248ee6148fee9752e60ce4c Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 25 Jul 2023 17:06:15 -0700 Subject: [PATCH 32/46] Add docstring for `memo.RangeHeap`, and rename fields for clarity. --- sql/analyzer/indexed_joins.go | 4 ++-- sql/memo/exec_builder.go | 8 ++++---- sql/memo/memo.go | 15 +++++++++++---- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 8a102ccdc3..7bc5b24fcb 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -784,8 +784,8 @@ func addRangeHeapJoin(m *memo.Memo) error { JoinBase: join.Copy(), } rel.RangeHeap = &memo.RangeHeap{ - LeftIndex: lIdx, - RightIndex: rIdx, + ValueIndex: lIdx, + MinIndex: rIdx, ValueExpr: &filter.value.Scalar, MinExpr: &filter.min.Scalar, ValueCol: valueColRef, diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index c662542ab3..33b07c0831 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -152,8 +152,8 @@ func (b *ExecBuilder) buildRangeHeap(sr *RangeHeap, leftSch, rightSch sql.Schema ret = plan.NewLimit(n.Limit, ret) default: var childNode sql.Node - if sr.RightIndex != nil { - childNode, err = b.buildIndexScan(sr.RightIndex, rightSch, n) + if sr.MinIndex != nil { + childNode, err = b.buildIndexScan(sr.MinIndex, rightSch, n) } else { sortExpr, err := b.buildScalar(*sr.MinExpr, rightSch) if err != nil { @@ -192,8 +192,8 @@ func (b *ExecBuilder) buildRangeHeapJoin(j *RangeHeapJoin, input sql.Schema, chi var left sql.Node var err error - if j.RangeHeap.LeftIndex != nil { - left, err = b.buildIndexScan(j.RangeHeap.LeftIndex, input, children[0]) + if j.RangeHeap.ValueIndex != nil { + left, err = b.buildIndexScan(j.RangeHeap.ValueIndex, input, children[0]) if err != nil { return nil, err } diff --git a/sql/memo/memo.go b/sql/memo/memo.go index b87552974a..7cb14cb7cc 100644 --- a/sql/memo/memo.go +++ b/sql/memo/memo.go @@ -950,14 +950,21 @@ type IndexScan struct { Parent *JoinBase } +// RangeHeap contains all the information necessary to construct a RangeHeap join. +// Because both sides of the join can be implemented either by an index or a sorted node, +// we require that exactly one of ValueIndex and ValueExpr is non-nil, and exactly one +// of MinIndex and MinExpr is non-nil. If the index is non-nil, we will use it to construct +// a plan.IndexedTableAccess. Otherwise we use the expression to construct a plan.Sort. type RangeHeap struct { - LeftIndex *IndexScan - RightIndex *IndexScan + ValueIndex *IndexScan + ValueExpr *ScalarExpr + + MinIndex *IndexScan + MinExpr *ScalarExpr + ValueCol *ColRef MinColRef *ColRef MaxColRef *ColRef - MinExpr *ScalarExpr - ValueExpr *ScalarExpr RangeClosedOnLowerBound bool RangeClosedOnUpperBound bool Parent *JoinBase From 9d556bf865d8fac64d490a3452da6cc32fbe6c4f Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 25 Jul 2023 17:29:06 -0700 Subject: [PATCH 33/46] Add docstring and examples for `addRangeHeapJoin` --- sql/analyzer/indexed_joins.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 7bc5b24fcb..9a883bac7c 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -731,8 +731,13 @@ func getRangeFilters(filters []memo.ScalarExpr) (ranges []rangeFilter) { return ranges } -// addRangeHeapJoin checks whether the join can be implemented as a RangeHeap, and if so, adds a memo.RangeHeap plan -// to the memo. +// addRangeHeapJoin checks whether the join can be implemented as a RangeHeap, and if so, prefixes a memo.RangeHeap plan +// to the memo join group. We can apply a range heap join for any join plan where a filter (or pair of filters) restricts a column the left child +// to be between two columns the right child. +// +// Some example joins that can be implemented as RangeHeap joins: +// - SELECT * FROM a JOIN b on a.value BETWEEN b.min AND b.max +// - SELECT * FROM a JOIN b on b.min <= a.value AND a.value < b.max func addRangeHeapJoin(m *memo.Memo) error { return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { switch e.(type) { From e1c3f8a9e002483bfe611ce8633715ff1f53fbf4 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 25 Jul 2023 22:34:51 -0700 Subject: [PATCH 34/46] Update coster for RangeHeapJoin. This means that RangeHeapJoin will always cost better then InnerJoin, but a lookup join with multiple key expressions can cost even better. --- sql/memo/coster.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sql/memo/coster.go b/sql/memo/coster.go index 75213a3bd4..deafdf9804 100644 --- a/sql/memo/coster.go +++ b/sql/memo/coster.go @@ -195,7 +195,10 @@ func (c *coster) costRangeHeapJoin(_ *sql.Context, n *RangeHeapJoin, _ sql.Stats l := n.Left.RelProps.card r := n.Right.RelProps.card - return l*(cpuCostFactor+randIOCostFactor) + r*seqIOCostFactor, nil + // TODO: We can probably get a better estimate somehow. + expectedNumberOfOverlappingJoins := r * perKeyCostReductionFactor + + return (l+r)*randIOCostFactor + l*expectedNumberOfOverlappingJoins*(cpuCostFactor), nil } func (c *coster) costLateralCrossJoin(ctx *sql.Context, n *LateralCrossJoin, _ sql.StatsReader) (float64, error) { @@ -237,6 +240,8 @@ func (c *coster) costDistinct(_ *sql.Context, n *Distinct, _ sql.StatsReader) (f return n.Child.Cost * (cpuCostFactor + .75*memCostFactor), nil } +const perKeyCostReductionFactor = 0.5 + // lookupJoinSelectivity estimates the selectivity of a join condition with n lhs rows and m rhs rows. // A join with a selectivity of k will return k*(n*m) rows. // Special case: A join with a selectivity of 0 will return n rows. @@ -245,7 +250,7 @@ func lookupJoinSelectivity(l *Lookup) float64 { if len(l.Index.SqlIdx().Expressions()) == len(l.KeyExprs) { sel = 0.1 } else { - sel = math.Pow(0.5, float64(len(l.KeyExprs))) + sel = math.Pow(perKeyCostReductionFactor, float64(len(l.KeyExprs))) } if !l.Index.SqlIdx().IsUnique() { return sel From 6405061b8431c3a56cd5dc08323c8180ce855a01 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 25 Jul 2023 22:36:08 -0700 Subject: [PATCH 35/46] Update JoinType strings to accommodate renaming SlidingRange to RangeHeap. --- sql/plan/jointype_string.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/plan/jointype_string.go b/sql/plan/jointype_string.go index 4863a9d8e2..4f89813a28 100644 --- a/sql/plan/jointype_string.go +++ b/sql/plan/jointype_string.go @@ -43,7 +43,7 @@ func _() { const _JoinType_name = "UnknownJoinCrossJoinCrossHashJoinInnerJoinSemiJoinAntiJoinLeftOuterJoinLeftOuterJoinExcludingNullsFullOuterJoinGroupByJoinRightJoinLookupJoinLeftOuterLookupJoinHashJoinLeftOuterHashJoinLeftOuterHashJoinExcludeNullsMergeJoinLeftOuterMergeJoinRangeHeapJoinLeftOuterRangeHeapJoinSemiHashJoinAntiHashJoinSemiLookupJoinAntiLookupJoinSemiMergeJoinAntiMergeJoinNaturalJoinLateralCrossJoinLateralInnerJoinLateralLeftJoinLateralLeftJoin" -var _JoinType_index = [...]uint16{0, 11, 20, 33, 42, 50, 58, 71, 98, 111, 122, 131, 141, 160, 168, 185, 214, 223, 241, 257, 282, 294, 306, 320, 334, 347, 360, 371, 387, 403, 418, 433} +var _JoinType_index = [...]uint16{0, 11, 20, 33, 42, 50, 58, 71, 98, 111, 122, 131, 141, 160, 168, 185, 214, 223, 241, 254, 276, 288, 300, 314, 328, 341, 354, 365, 381, 397, 412, 427} func (i JoinType) String() string { if i >= JoinType(len(_JoinType_index)-1) { From 11ff9ddb1dd13da344dff75137cb07c6503ec7b6 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 26 Jul 2023 15:07:33 -0700 Subject: [PATCH 36/46] When sorting the input for RangeHeapJoin, match the current behavior of putting Nulls after non-Nulls. --- sql/memo/exec_builder.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index 33b07c0831..d9d98b3bbc 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -162,7 +162,7 @@ func (b *ExecBuilder) buildRangeHeap(sr *RangeHeap, leftSch, rightSch sql.Schema sf := []sql.SortField{{ Column: sortExpr, Order: sql.Ascending, - NullOrdering: sql.NullsFirst, + NullOrdering: sql.NullsLast, // Due to /~https://github.com/dolthub/go-mysql-server/issues/1903 }} childNode = plan.NewSort(sf, n) } @@ -205,7 +205,7 @@ func (b *ExecBuilder) buildRangeHeapJoin(j *RangeHeapJoin, input sql.Schema, chi sf := []sql.SortField{{ Column: sortExpr, Order: sql.Ascending, - NullOrdering: sql.NullsFirst, + NullOrdering: sql.NullsLast, // Due to /~https://github.com/dolthub/go-mysql-server/issues/1903 }} left = plan.NewSort(sf, children[0]) } From 741894151c2cedfbf11afd011c56c91352ce7ddb Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 26 Jul 2023 15:38:47 -0700 Subject: [PATCH 37/46] Don't push RangeHeap into a TableAlias. --- sql/memo/exec_builder.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index d9d98b3bbc..0968f0350a 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -135,9 +135,6 @@ func (b *ExecBuilder) buildLookupJoin(j *LookupJoin, input sql.Schema, children func (b *ExecBuilder) buildRangeHeap(sr *RangeHeap, leftSch, rightSch sql.Schema, children ...sql.Node) (ret sql.Node, err error) { switch n := children[0].(type) { - case *plan.TableAlias: - ret, err = b.buildRangeHeap(sr, leftSch, rightSch, n.Child) - ret = plan.NewTableAlias(n.Name(), ret) case *plan.Distinct: ret, err = b.buildRangeHeap(sr, leftSch, rightSch, n.Child) ret = plan.NewDistinct(ret) From ab05cf5e20e677b234717bdc78e6d117595348f0 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Wed, 26 Jul 2023 16:35:49 -0700 Subject: [PATCH 38/46] Add additional tests for sliding join. --- enginetest/join_op_tests.go | 318 +++++++++++++++--------------- enginetest/join_planning_tests.go | 58 ++++++ 2 files changed, 213 insertions(+), 163 deletions(-) diff --git a/enginetest/join_op_tests.go b/enginetest/join_op_tests.go index 95dc758242..3d20b6265d 100644 --- a/enginetest/join_op_tests.go +++ b/enginetest/join_op_tests.go @@ -1369,88 +1369,7 @@ SELECT SUM(x) FROM xy WHERE x IN ( "insert into ranges values (0,2), (1,3), (2,4), (3,5), (4,6)", }, }, - tests: []JoinOpTests{ - { - Query: "select * from vals join ranges on val between min and max", - Expected: []sql.Row{ - {0, 0, 2}, - {1, 0, 2}, - {1, 1, 3}, - {2, 0, 2}, - {2, 1, 3}, - {2, 2, 4}, - {3, 1, 3}, - {3, 2, 4}, - {3, 3, 5}, - {4, 2, 4}, - {4, 3, 5}, - {4, 4, 6}, - {5, 3, 5}, - {5, 4, 6}, - {6, 4, 6}, - }, - }, - { - Query: "select * from vals join ranges on val > min and val < max", - Expected: []sql.Row{ - {1, 0, 2}, - {2, 1, 3}, - {3, 2, 4}, - {4, 3, 5}, - {5, 4, 6}, - }, - }, - { - Query: "select * from vals join ranges on val >= min and val < max", - Expected: []sql.Row{ - {0, 0, 2}, - {1, 0, 2}, - {1, 1, 3}, - {2, 1, 3}, - {2, 2, 4}, - {3, 2, 4}, - {3, 3, 5}, - {4, 3, 5}, - {4, 4, 6}, - {5, 4, 6}, - }, - }, - { - Query: "select * from vals join ranges on val > min and val <= max", - Expected: []sql.Row{ - {1, 0, 2}, - {2, 0, 2}, - {2, 1, 3}, - {3, 1, 3}, - {3, 2, 4}, - {4, 2, 4}, - {4, 3, 5}, - {5, 3, 5}, - {5, 4, 6}, - {6, 4, 6}, - }, - }, - { - Query: "select * from vals join ranges on val >= min and val <= max", - Expected: []sql.Row{ - {0, 0, 2}, - {1, 0, 2}, - {1, 1, 3}, - {2, 0, 2}, - {2, 1, 3}, - {2, 2, 4}, - {3, 1, 3}, - {3, 2, 4}, - {3, 3, 5}, - {4, 2, 4}, - {4, 3, 5}, - {4, 4, 6}, - {5, 3, 5}, - {5, 4, 6}, - {6, 4, 6}, - }, - }, - }, + tests: rangeJoinOpTests, }, { name: "keyless range join", @@ -1463,87 +1382,160 @@ SELECT SUM(x) FROM xy WHERE x IN ( "insert into ranges values (0,2), (1,3), (2,4), (3,5), (4,6)", }, }, - tests: []JoinOpTests{ - { - Query: "select * from vals join ranges on val between min and max", - Expected: []sql.Row{ - {0, 0, 2}, - {1, 0, 2}, - {1, 1, 3}, - {2, 0, 2}, - {2, 1, 3}, - {2, 2, 4}, - {3, 1, 3}, - {3, 2, 4}, - {3, 3, 5}, - {4, 2, 4}, - {4, 3, 5}, - {4, 4, 6}, - {5, 3, 5}, - {5, 4, 6}, - {6, 4, 6}, - }, - }, - { - Query: "select * from vals join ranges on val > min and val < max", - Expected: []sql.Row{ - {1, 0, 2}, - {2, 1, 3}, - {3, 2, 4}, - {4, 3, 5}, - {5, 4, 6}, - }, - }, - { - Query: "select * from vals join ranges on val >= min and val < max", - Expected: []sql.Row{ - {0, 0, 2}, - {1, 0, 2}, - {1, 1, 3}, - {2, 1, 3}, - {2, 2, 4}, - {3, 2, 4}, - {3, 3, 5}, - {4, 3, 5}, - {4, 4, 6}, - {5, 4, 6}, - }, - }, - { - Query: "select * from vals join ranges on val > min and val <= max", - Expected: []sql.Row{ - {1, 0, 2}, - {2, 0, 2}, - {2, 1, 3}, - {3, 1, 3}, - {3, 2, 4}, - {4, 2, 4}, - {4, 3, 5}, - {5, 3, 5}, - {5, 4, 6}, - {6, 4, 6}, - }, - }, - { - Query: "select * from vals join ranges on val >= min and val <= max", - Expected: []sql.Row{ - {0, 0, 2}, - {1, 0, 2}, - {1, 1, 3}, - {2, 0, 2}, - {2, 1, 3}, - {2, 2, 4}, - {3, 1, 3}, - {3, 2, 4}, - {3, 3, 5}, - {4, 2, 4}, - {4, 3, 5}, - {4, 4, 6}, - {5, 3, 5}, - {5, 4, 6}, - {6, 4, 6}, - }, - }, + tests: rangeJoinOpTests, + }, + { + name: "recursive range join", + setup: [][]string{ + setup.MydbData[0], + }, + tests: []JoinOpTests{{ + Query: "with recursive vals as (select 0 as val union all select val + 1 from vals where val < 6), " + + "ranges as (select 0 as min, 2 as max union all select min+1, max+1 from ranges where max < 6) " + + "select * from vals join ranges on val > min and val < max", + Expected: []sql.Row{ + {1, 0, 2}, + {2, 1, 3}, + {3, 2, 4}, + {4, 3, 5}, + {5, 4, 6}, + }, + }}, + }, +} + +var rangeJoinOpTests = []JoinOpTests{ + { + Query: "select * from vals join ranges on val between min and max", + Expected: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 0, 2}, + {2, 1, 3}, + {2, 2, 4}, + {3, 1, 3}, + {3, 2, 4}, + {3, 3, 5}, + {4, 2, 4}, + {4, 3, 5}, + {4, 4, 6}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on val > min and val < max", + Expected: []sql.Row{ + {1, 0, 2}, + {2, 1, 3}, + {3, 2, 4}, + {4, 3, 5}, + {5, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on min < val and max > val", + Expected: []sql.Row{ + {1, 0, 2}, + {2, 1, 3}, + {3, 2, 4}, + {4, 3, 5}, + {5, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on val >= min and val < max", + Expected: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 1, 3}, + {2, 2, 4}, + {3, 2, 4}, + {3, 3, 5}, + {4, 3, 5}, + {4, 4, 6}, + {5, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on val > min and val <= max", + Expected: []sql.Row{ + {1, 0, 2}, + {2, 0, 2}, + {2, 1, 3}, + {3, 1, 3}, + {3, 2, 4}, + {4, 2, 4}, + {4, 3, 5}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + { + Query: "select * from vals join ranges on val >= min and val <= max", + Expected: []sql.Row{ + {0, 0, 2}, + {1, 0, 2}, + {1, 1, 3}, + {2, 0, 2}, + {2, 1, 3}, + {2, 2, 4}, + {3, 1, 3}, + {3, 2, 4}, + {3, 3, 5}, + {4, 2, 4}, + {4, 3, 5}, + {4, 4, 6}, + {5, 3, 5}, + {5, 4, 6}, + {6, 4, 6}, + }, + }, + { + Query: "select * from vals left join ranges on val > min and val < max", + Expected: []sql.Row{ + {0, nil, nil}, + {1, 0, 2}, + {2, 1, 3}, + {3, 2, 4}, + {4, 3, 5}, + {5, 4, 6}, + {6, nil, nil}, + }, + }, + { + Query: "select * from ranges l join ranges r on l.min > r.min and l.min < r.max", + Expected: []sql.Row{ + {1, 3, 0, 2}, + {2, 4, 1, 3}, + {3, 5, 2, 4}, + {4, 6, 3, 5}, + }, + }, + { + Query: "select * from vals left join ranges r1 on val > r1.min and val < r1.max left join ranges r2 on r1.min > r2.min and r1.min < r2.max", + Expected: []sql.Row{ + {0, nil, nil, nil, nil}, + {1, 0, 2, nil, nil}, + {2, 1, 3, 0, 2}, + {3, 2, 4, 1, 3}, + {4, 3, 5, 2, 4}, + {5, 4, 6, 3, 5}, + {6, nil, nil, nil, nil}, + }, + }, + { + Query: "select * from (select vals.val * 2 as val from vals) as newVals join (select ranges.min * 2 as min, ranges.max * 2 as max from ranges) as newRanges on val > min and val < max;", + Expected: []sql.Row{ + {2, 0, 4}, + {4, 2, 6}, + {6, 4, 8}, + {8, 6, 10}, + {10, 8, 12}, }, }, } diff --git a/enginetest/join_planning_tests.go b/enginetest/join_planning_tests.go index 850a71d43b..08a0611e84 100644 --- a/enginetest/join_planning_tests.go +++ b/enginetest/join_planning_tests.go @@ -1052,6 +1052,17 @@ join uv d on d.u = c.x`, {5, 4, 6}, }, }, + { + q: "select * from vals join ranges on min < val and max > val", + types: []plan.JoinType{plan.JoinTypeRangeHeap}, + exp: []sql.Row{ + {1, 0, 2}, + {2, 1, 3}, + {3, 2, 4}, + {4, 3, 5}, + {5, 4, 6}, + }, + }, { q: "select * from vals join ranges on val >= min and val < max", types: []plan.JoinType{plan.JoinTypeRangeHeap}, @@ -1105,6 +1116,53 @@ join uv d on d.u = c.x`, {6, 4, 6}, }, }, + { + q: "select * from vals left join ranges on val > min and val < max", + types: []plan.JoinType{plan.JoinTypeLeftOuterRangeHeap}, + exp: []sql.Row{ + {0, nil, nil}, + {1, 0, 2}, + {2, 1, 3}, + {3, 2, 4}, + {4, 3, 5}, + {5, 4, 6}, + {6, nil, nil}, + }, + }, + { + q: "select * from ranges l join ranges r on l.min > r.min and l.min < r.max", + types: []plan.JoinType{plan.JoinTypeRangeHeap}, + exp: []sql.Row{ + {1, 3, 0, 2}, + {2, 4, 1, 3}, + {3, 5, 2, 4}, + {4, 6, 3, 5}, + }, + }, + { + q: "select * from vals left join ranges r1 on val > r1.min and val < r1.max left join ranges r2 on r1.min > r2.min and r1.min < r2.max", + types: []plan.JoinType{plan.JoinTypeLeftOuterRangeHeap, plan.JoinTypeLeftOuterRangeHeap}, + exp: []sql.Row{ + {0, nil, nil, nil, nil}, + {1, 0, 2, nil, nil}, + {2, 1, 3, 0, 2}, + {3, 2, 4, 1, 3}, + {4, 3, 5, 2, 4}, + {5, 4, 6, 3, 5}, + {6, nil, nil, nil, nil}, + }, + }, + { + q: "select * from (select vals.val * 2 as val from vals) as newVals join (select ranges.min * 2 as min, ranges.max * 2 as max from ranges) as newRanges on val > min and val < max;", + types: []plan.JoinType{plan.JoinTypeRangeHeap}, + exp: []sql.Row{ + {2, 0, 4}, + {4, 2, 6}, + {6, 4, 8}, + {8, 6, 10}, + {10, 8, 12}, + }, + }, }, }, } From d3ed9cfcdbc4ec9ba8da38aba6cddfeddd8560e8 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 27 Jul 2023 16:28:12 -0700 Subject: [PATCH 39/46] Move all of RangeHeap's mutating state into its own iteraotr class, `rangeHeapJoinIter`. Nodes represent physical plans, not executions. It should be possible to spawn multiple executions from a physical plan. To that end, Nodes should not have state that changes during execution. --- sql/plan/join.go | 9 + sql/plan/range_heap.go | 136 ++------------ sql/rowexec/node_builder.gen.go | 2 - sql/rowexec/range_heap_iter.go | 309 ++++++++++++++++++++++++++++++++ sql/rowexec/rel.go | 19 +- 5 files changed, 331 insertions(+), 144 deletions(-) create mode 100644 sql/rowexec/range_heap_iter.go diff --git a/sql/plan/join.go b/sql/plan/join.go index cf6cade7df..55ebacc9fc 100644 --- a/sql/plan/join.go +++ b/sql/plan/join.go @@ -192,6 +192,15 @@ func (i JoinType) IsLateral() bool { } } +func (i JoinType) IsRange() bool { + switch i { + case JoinTypeRangeHeap, JoinTypeLeftOuterRangeHeap: + return true + default: + return false + } +} + func (i JoinType) AsHash() JoinType { switch i { case JoinTypeInner: diff --git a/sql/plan/range_heap.go b/sql/plan/range_heap.go index 22e6973ac8..5ddef4d94c 100644 --- a/sql/plan/range_heap.go +++ b/sql/plan/range_heap.go @@ -15,11 +15,7 @@ package plan import ( - "container/heap" - "errors" "fmt" - "io" - "github.com/dolthub/go-mysql-server/sql" ) @@ -28,21 +24,12 @@ import ( // don't overlap, the amortized complexity is O(1) for each result row. type RangeHeap struct { UnaryNode - childRowIter sql.RowIter - activeRanges priorityQueue - pendingRow sql.Row - valueColumnIndex int - minColumnIndex int - maxColumnIndex int - comparisonType sql.Type - rangeIsClosedBelow bool - rangeIsClosedAbove bool -} - -type priorityQueue struct { - rangeHeap *RangeHeap - rows []sql.Row - err error + ValueColumnIndex int + MinColumnIndex int + MaxColumnIndex int + ComparisonType sql.Type + RangeIsClosedBelow bool + RangeIsClosedAbove bool } var _ sql.Node = (*RangeHeap)(nil) @@ -51,17 +38,13 @@ func NewRangeHeap(child sql.Node, lhsSchema sql.Schema, rhsSchema sql.Schema, va // TODO: IndexOfColName is Only safe for schemas corresponding to a single table, where the source of the column is irrelevant. maxColumnIndex := rhsSchema.IndexOfColName(max) newSr := &RangeHeap{ - activeRanges: priorityQueue{}, - pendingRow: nil, - valueColumnIndex: lhsSchema.IndexOfColName(value), - minColumnIndex: rhsSchema.IndexOfColName(min), - maxColumnIndex: maxColumnIndex, - comparisonType: rhsSchema[maxColumnIndex].Type, - rangeIsClosedBelow: rangeIsClosedBelow, - rangeIsClosedAbove: rangeIsClosedAbove, + ValueColumnIndex: lhsSchema.IndexOfColName(value), + MinColumnIndex: rhsSchema.IndexOfColName(min), + MaxColumnIndex: maxColumnIndex, + RangeIsClosedBelow: rangeIsClosedBelow, + RangeIsClosedAbove: rangeIsClosedAbove, } newSr.Child = child - newSr.activeRanges.rangeHeap = newSr return newSr, nil } @@ -84,100 +67,3 @@ func (s *RangeHeap) CheckPrivileges(ctx *sql.Context, opChecker sql.PrivilegedOp } var _ sql.Node = (*RangeHeap)(nil) - -func (s *RangeHeap) Initialize(ctx *sql.Context, childRowIter sql.RowIter) (err error) { - s.childRowIter = childRowIter - s.activeRanges = priorityQueue{ - rangeHeap: s, - rows: nil, - err: nil, - } - s.pendingRow, err = childRowIter.Next(ctx) - return err -} - -func (s *RangeHeap) IsInitialized() bool { - return s.childRowIter != nil -} - -func (s *RangeHeap) AcceptRow(ctx *sql.Context, row sql.Row) (sql.RowIter, error) { - // Remove rows from the heap if we've advanced beyond their max value. - for s.activeRanges.Len() > 0 { - maxValue := s.activeRanges.Peek() - compareResult, err := s.comparisonType.Compare(row[s.valueColumnIndex], maxValue) - if err != nil { - return nil, err - } - if (s.rangeIsClosedAbove && compareResult > 0) || (!s.rangeIsClosedAbove && compareResult >= 0) { - heap.Pop(&s.activeRanges) - } else { - break - } - } - - // Advance the child iterator until we encounter a row whose min value is beyond the range. - for s.pendingRow != nil { - minValue := s.pendingRow[s.minColumnIndex] - compareResult, err := s.comparisonType.Compare(row[s.valueColumnIndex], minValue) - if err != nil { - return nil, err - } - - if (s.rangeIsClosedBelow && compareResult < 0) && (!s.rangeIsClosedBelow && compareResult <= 0) { - break - } else { - heap.Push(&s.activeRanges, s.pendingRow) - } - - s.pendingRow, err = s.childRowIter.Next(ctx) - if err != nil { - if errors.Is(err, io.EOF) { - // We've already imported every range into the priority queue. - s.pendingRow = nil - break - } - return nil, err - } - } - - // Every active row must match the accepted row. - return sql.RowsToRowIter(s.activeRanges.rows...), nil -} - -func (pq priorityQueue) Len() int { return len(pq.rows) } - -func (pq *priorityQueue) Less(i, j int) bool { - lhs := pq.rows[i][pq.rangeHeap.maxColumnIndex] - rhs := pq.rows[j][pq.rangeHeap.maxColumnIndex] - // compareResult will be 0 if lhs==rhs, -1 if lhs < rhs, and +1 if lhs > rhs. - compareResult, err := pq.SortedType().Compare(lhs, rhs) - if pq.err == nil && err != nil { - pq.err = err - } - return compareResult < 0 -} - -func (pq *priorityQueue) Swap(i, j int) { - pq.rows[i], pq.rows[j] = pq.rows[j], pq.rows[i] -} - -func (pq *priorityQueue) Push(x any) { - item := x.(sql.Row) - pq.rows = append(pq.rows, item) -} - -func (pq *priorityQueue) Pop() any { - n := len(pq.rows) - x := pq.rows[n-1] - pq.rows = pq.rows[0 : n-1] - return x -} - -func (pq *priorityQueue) Peek() interface{} { - n := len(pq.rows) - return pq.rows[n-1][pq.rangeHeap.maxColumnIndex] -} - -func (pq *priorityQueue) SortedType() sql.Type { - return pq.rangeHeap.comparisonType -} diff --git a/sql/rowexec/node_builder.gen.go b/sql/rowexec/node_builder.gen.go index 77fff1e891..e132d04a36 100644 --- a/sql/rowexec/node_builder.gen.go +++ b/sql/rowexec/node_builder.gen.go @@ -360,8 +360,6 @@ func (b *BaseBuilder) buildNodeExec(ctx *sql.Context, n sql.Node, row sql.Row) ( return n.RowIter(ctx, row) case *plan.CreateSpatialRefSys: return b.buildCreateSpatialRefSys(ctx, n, row) - case *plan.RangeHeap: - return b.buildRangeHeap(ctx, n, row) default: return nil, fmt.Errorf("exec builder found unknown Node type %T", n) } diff --git a/sql/rowexec/range_heap_iter.go b/sql/rowexec/range_heap_iter.go new file mode 100644 index 0000000000..75fad9d83e --- /dev/null +++ b/sql/rowexec/range_heap_iter.go @@ -0,0 +1,309 @@ +package rowexec + +import ( + "container/heap" + "errors" + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/plan" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" + "io" + "reflect" +) + +func newRangeHeapJoinIter(ctx *sql.Context, b sql.NodeExecBuilder, j *plan.JoinNode, row sql.Row) (sql.RowIter, error) { + var leftName, rightName string + if leftTable, ok := j.Left().(sql.Nameable); ok { + leftName = leftTable.Name() + } else { + leftName = reflect.TypeOf(j.Left()).String() + } + + if rightTable, ok := j.Right().(sql.Nameable); ok { + rightName = rightTable.Name() + } else { + rightName = reflect.TypeOf(j.Right()).String() + } + + span, ctx := ctx.Span("plan.rangeHeapJoinIter", trace.WithAttributes( + attribute.String("left", leftName), + attribute.String("right", rightName), + )) + + l, err := b.Build(ctx, j.Left(), row) + if err != nil { + span.End() + return nil, err + } + return sql.NewSpanIter(span, &rangeHeapJoinIter{ + parentRow: row, + primary: l, + cond: j.Filter, + joinType: j.Op, + rowSize: len(row) + len(j.Left().Schema()) + len(j.Right().Schema()), + scopeLen: j.ScopeLen, + b: b, + rangeHeapPlan: j.Right().(*plan.RangeHeap), + }), nil +} + +// joinIter is an iterator that iterates over every row in the primary table and performs an index lookup in +// the secondary table for each value +type rangeHeapJoinIter struct { + parentRow sql.Row + primary sql.RowIter + primaryRow sql.Row + secondary sql.RowIter + cond sql.Expression + joinType plan.JoinType + + foundMatch bool + rowSize int + scopeLen int + b sql.NodeExecBuilder + + rangeHeapPlan *plan.RangeHeap + childRowIter sql.RowIter + pendingRow sql.Row + + activeRanges []sql.Row + err error +} + +func (iter *rangeHeapJoinIter) loadPrimary(ctx *sql.Context) error { + if iter.primaryRow == nil { + r, err := iter.primary.Next(ctx) + if err != nil { + return err + } + + iter.primaryRow = iter.parentRow.Append(r) + iter.foundMatch = false + + iter.initializeHeap(ctx, iter.b, iter.primaryRow) + } + + return nil +} + +func (iter *rangeHeapJoinIter) loadSecondary(ctx *sql.Context) (sql.Row, error) { + if iter.secondary == nil { + rowIter, err := iter.getActiveRanges(ctx, iter.b, iter.primaryRow) + + if err != nil { + return nil, err + } + if plan.IsEmptyIter(rowIter) { + return nil, plan.ErrEmptyCachedResult + } + iter.secondary = rowIter + } + + secondaryRow, err := iter.secondary.Next(ctx) + if err != nil { + if err == io.EOF { + err = iter.secondary.Close(ctx) + iter.secondary = nil + if err != nil { + return nil, err + } + iter.primaryRow = nil + return nil, io.EOF + } + return nil, err + } + + return secondaryRow, nil +} + +func (iter *rangeHeapJoinIter) Next(ctx *sql.Context) (sql.Row, error) { + for { + if err := iter.loadPrimary(ctx); err != nil { + return nil, err + } + + primary := iter.primaryRow + secondary, err := iter.loadSecondary(ctx) + if err != nil { + if errors.Is(err, io.EOF) { + if !iter.foundMatch && iter.joinType.IsLeftOuter() { + iter.primaryRow = nil + row := iter.buildRow(primary, nil) + return iter.removeParentRow(row), nil + } + continue + } else if errors.Is(err, plan.ErrEmptyCachedResult) { + if !iter.foundMatch && iter.joinType.IsLeftOuter() { + iter.primaryRow = nil + row := iter.buildRow(primary, nil) + return iter.removeParentRow(row), nil + } + + return nil, io.EOF + } + return nil, err + } + + row := iter.buildRow(primary, secondary) + res, err := iter.cond.Eval(ctx, row) + matches := res == true + if err != nil { + return nil, err + } + + if res == nil && iter.joinType.IsExcludeNulls() { + err = iter.secondary.Close(ctx) + iter.secondary = nil + if err != nil { + return nil, err + } + iter.primaryRow = nil + continue + } + + if !matches { + continue + } + + iter.foundMatch = true + return iter.removeParentRow(row), nil + } +} + +func (iter *rangeHeapJoinIter) removeParentRow(r sql.Row) sql.Row { + copy(r[iter.scopeLen:], r[len(iter.parentRow):]) + r = r[:len(r)-len(iter.parentRow)+iter.scopeLen] + return r +} + +// buildRow builds the result set row using the rows from the primary and secondary tables +func (iter *rangeHeapJoinIter) buildRow(primary, secondary sql.Row) sql.Row { + row := make(sql.Row, iter.rowSize) + + copy(row, primary) + copy(row[len(primary):], secondary) + + return row +} + +func (iter *rangeHeapJoinIter) Close(ctx *sql.Context) (err error) { + if iter.primary != nil { + if err = iter.primary.Close(ctx); err != nil { + if iter.secondary != nil { + _ = iter.secondary.Close(ctx) + } + return err + } + } + + if iter.secondary != nil { + err = iter.secondary.Close(ctx) + iter.secondary = nil + } + + return err +} + +type rangeHeapRowIterProvider struct { +} + +func (iter *rangeHeapJoinIter) initializeHeap(ctx *sql.Context, builder sql.NodeExecBuilder, primaryRow sql.Row) (err error) { + iter.childRowIter, err = builder.Build(ctx, iter.rangeHeapPlan.Child, primaryRow) + if err != nil { + return err + } + iter.activeRanges = nil + iter.rangeHeapPlan.ComparisonType = iter.rangeHeapPlan.Schema()[iter.rangeHeapPlan.MaxColumnIndex].Type + + iter.pendingRow, err = iter.childRowIter.Next(ctx) + return err +} + +func (iter *rangeHeapJoinIter) getActiveRanges(ctx *sql.Context, _ sql.NodeExecBuilder, row sql.Row) (sql.RowIter, error) { + + // Remove rows from the heap if we've advanced beyond their max value. + for iter.Len() > 0 { + maxValue := iter.Peek() + compareResult, err := iter.rangeHeapPlan.ComparisonType.Compare(row[iter.rangeHeapPlan.ValueColumnIndex], maxValue) + if err != nil { + return nil, err + } + if (iter.rangeHeapPlan.RangeIsClosedAbove && compareResult > 0) || (!iter.rangeHeapPlan.RangeIsClosedAbove && compareResult >= 0) { + heap.Pop(iter) + if iter.err != nil { + err = iter.err + iter.err = nil + return nil, err + } + } else { + break + } + } + + // Advance the child iterator until we encounter a row whose min value is beyond the range. + for iter.pendingRow != nil { + minValue := iter.pendingRow[iter.rangeHeapPlan.MinColumnIndex] + compareResult, err := iter.rangeHeapPlan.ComparisonType.Compare(row[iter.rangeHeapPlan.ValueColumnIndex], minValue) + if err != nil { + return nil, err + } + + if (iter.rangeHeapPlan.RangeIsClosedBelow && compareResult < 0) || (!iter.rangeHeapPlan.RangeIsClosedBelow && compareResult <= 0) { + break + } else { + heap.Push(iter, iter.pendingRow) + if iter.err != nil { + err = iter.err + iter.err = nil + return nil, err + } + } + + iter.pendingRow, err = iter.childRowIter.Next(ctx) + if err != nil { + if errors.Is(err, io.EOF) { + // We've already imported every range into the priority queue. + iter.pendingRow = nil + break + } + return nil, err + } + } + + // Every active row must match the accepted row. + return sql.RowsToRowIter(iter.activeRanges...), nil +} + +func (iter rangeHeapJoinIter) Len() int { return len(iter.activeRanges) } + +func (iter *rangeHeapJoinIter) Less(i, j int) bool { + lhs := iter.activeRanges[i][iter.rangeHeapPlan.MaxColumnIndex] + rhs := iter.activeRanges[j][iter.rangeHeapPlan.MaxColumnIndex] + // compareResult will be 0 if lhs==rhs, -1 if lhs < rhs, and +1 if lhs > rhs. + compareResult, err := iter.rangeHeapPlan.ComparisonType.Compare(lhs, rhs) + if iter.err == nil && err != nil { + iter.err = err + } + return compareResult < 0 +} + +func (iter *rangeHeapJoinIter) Swap(i, j int) { + iter.activeRanges[i], iter.activeRanges[j] = iter.activeRanges[j], iter.activeRanges[i] +} + +func (iter *rangeHeapJoinIter) Push(x any) { + item := x.(sql.Row) + iter.activeRanges = append(iter.activeRanges, item) +} + +func (iter *rangeHeapJoinIter) Pop() any { + n := len(iter.activeRanges) + x := iter.activeRanges[n-1] + iter.activeRanges = iter.activeRanges[0 : n-1] + return x +} + +func (iter *rangeHeapJoinIter) Peek() interface{} { + n := len(iter.activeRanges) + return iter.activeRanges[n-1][iter.rangeHeapPlan.MaxColumnIndex] +} diff --git a/sql/rowexec/rel.go b/sql/rowexec/rel.go index dd427ec97a..48b5859659 100644 --- a/sql/rowexec/rel.go +++ b/sql/rowexec/rel.go @@ -265,6 +265,8 @@ func (b *BaseBuilder) buildJoinNode(ctx *sql.Context, n *plan.JoinNode, row sql. return newMergeJoinIter(ctx, b, n, row) case n.Op.IsLateral(): return newLateralJoinIter(ctx, b, n, row) + case n.Op.IsRange(): + return newRangeHeapJoinIter(ctx, b, n, row) default: return newJoinIter(ctx, b, n, row) } @@ -734,20 +736,3 @@ func (b *BaseBuilder) buildResolvedTable(ctx *sql.Context, n *plan.ResolvedTable func (b *BaseBuilder) buildTableCount(_ *sql.Context, n *plan.TableCountLookup, _ sql.Row) (sql.RowIter, error) { return sql.RowsToRowIter(sql.Row{int64(n.Count())}), nil } - -func (b *BaseBuilder) buildRangeHeap(ctx *sql.Context, n *plan.RangeHeap, row sql.Row) (sql.RowIter, error) { - // The first time, initialize the child rowIter and the heap - span, ctx := ctx.Span("plan.RangeHeap") - if !n.IsInitialized() { - i, err := b.buildNodeExec(ctx, n.Child, row) - if err != nil { - span.End() - return nil, err - } - err = n.Initialize(ctx, i) - if err != nil { - return nil, err - } - } - return n.AcceptRow(ctx, row) -} From 52541e94164944980a8779cc4c4f3ac0e3ac509e Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 27 Jul 2023 16:29:32 -0700 Subject: [PATCH 40/46] Use `Schema.IndexOf` instead of `Schema.IndexOfColName` when making RangeHeaps. This guarentees correct behavior even when multiple tables have columns with the same name. --- sql/memo/exec_builder.go | 6 +++--- sql/plan/range_heap.go | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/memo/exec_builder.go b/sql/memo/exec_builder.go index 0968f0350a..17e1fb8ead 100644 --- a/sql/memo/exec_builder.go +++ b/sql/memo/exec_builder.go @@ -171,9 +171,9 @@ func (b *ExecBuilder) buildRangeHeap(sr *RangeHeap, leftSch, rightSch sql.Schema childNode, leftSch, rightSch, - sr.ValueCol.Gf.Name(), - sr.MinColRef.Gf.Name(), - sr.MaxColRef.Gf.Name(), + sr.ValueCol.Gf, + sr.MinColRef.Gf, + sr.MaxColRef.Gf, sr.RangeClosedOnLowerBound, sr.RangeClosedOnUpperBound) } diff --git a/sql/plan/range_heap.go b/sql/plan/range_heap.go index 5ddef4d94c..8621b7d191 100644 --- a/sql/plan/range_heap.go +++ b/sql/plan/range_heap.go @@ -17,6 +17,7 @@ package plan import ( "fmt" "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/expression" ) // RangeHeap is a Node that wraps a table with min and max range columns. When used as a secondary provider in Join @@ -34,12 +35,11 @@ type RangeHeap struct { var _ sql.Node = (*RangeHeap)(nil) -func NewRangeHeap(child sql.Node, lhsSchema sql.Schema, rhsSchema sql.Schema, value, min, max string, rangeIsClosedBelow, rangeIsClosedAbove bool) (*RangeHeap, error) { - // TODO: IndexOfColName is Only safe for schemas corresponding to a single table, where the source of the column is irrelevant. - maxColumnIndex := rhsSchema.IndexOfColName(max) +func NewRangeHeap(child sql.Node, lhsSchema sql.Schema, rhsSchema sql.Schema, value, min, max *expression.GetField, rangeIsClosedBelow, rangeIsClosedAbove bool) (*RangeHeap, error) { + maxColumnIndex := rhsSchema.IndexOf(max.Name(), max.Table()) newSr := &RangeHeap{ - ValueColumnIndex: lhsSchema.IndexOfColName(value), - MinColumnIndex: rhsSchema.IndexOfColName(min), + ValueColumnIndex: lhsSchema.IndexOf(value.Name(), value.Table()), + MinColumnIndex: rhsSchema.IndexOf(min.Name(), min.Table()), MaxColumnIndex: maxColumnIndex, RangeIsClosedBelow: rangeIsClosedBelow, RangeIsClosedAbove: rangeIsClosedAbove, From 70260ff69d29397b17114448eeaa77c55c9260c0 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 27 Jul 2023 17:45:35 -0700 Subject: [PATCH 41/46] Add additional planning test. --- enginetest/join_planning_tests.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/enginetest/join_planning_tests.go b/enginetest/join_planning_tests.go index 08a0611e84..b80598cad5 100644 --- a/enginetest/join_planning_tests.go +++ b/enginetest/join_planning_tests.go @@ -1163,6 +1163,23 @@ join uv d on d.u = c.x`, {10, 8, 12}, }, }, + { + // This tests that the RangeHeapJoin node functions correctly even if its rows are iterated over multiple times. + q: "select * from (select 1 union select 2) as l left join (select * from vals join ranges on val > min and val < max) as r on max = max", + types: []plan.JoinType{plan.JoinTypeLeftOuter, plan.JoinTypeRangeHeap}, + exp: []sql.Row{ + {1, 1, 0, 2}, + {1, 2, 1, 3}, + {1, 3, 2, 4}, + {1, 4, 3, 5}, + {1, 5, 4, 6}, + {2, 1, 0, 2}, + {2, 2, 1, 3}, + {2, 3, 2, 4}, + {2, 4, 3, 5}, + {2, 5, 4, 6}, + }, + }, }, }, } From 038f099a6446657261bda6d2f9144827f7d62c1d Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Thu, 27 Jul 2023 17:48:29 -0700 Subject: [PATCH 42/46] Move constant to top of coster.go. --- sql/memo/coster.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/memo/coster.go b/sql/memo/coster.go index 66b94fd4f9..537d1b9cf0 100644 --- a/sql/memo/coster.go +++ b/sql/memo/coster.go @@ -32,6 +32,8 @@ const ( degeneratePenalty = 2.0 optimisticJoinSel = .10 biasFactor = 1e5 + + perKeyCostReductionFactor = 0.5 ) func NewDefaultCoster() Coster { @@ -240,8 +242,6 @@ func (c *coster) costDistinct(_ *sql.Context, n *Distinct, _ sql.StatsReader) (f return n.Child.Cost * (cpuCostFactor + .75*memCostFactor), nil } -const perKeyCostReductionFactor = 0.5 - // lookupJoinSelectivity estimates the selectivity of a join condition with n lhs rows and m rhs rows. // A join with a selectivity of k will return k*(n*m) rows. // Special case: A join with a selectivity of 0 will return n rows. From e766ae38784dc0383b78799b2b163a1ebf343a4d Mon Sep 17 00:00:00 2001 From: nicktobey Date: Fri, 28 Jul 2023 17:19:06 +0000 Subject: [PATCH 43/46] [ga-format-pr] Run ./format_repo.sh to fix formatting --- sql/analyzer/range_filter_test.go | 6 ++++-- sql/plan/range_heap.go | 1 + sql/rowexec/range_heap_iter.go | 10 ++++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/sql/analyzer/range_filter_test.go b/sql/analyzer/range_filter_test.go index 739879b0e2..7c63c0d5f8 100644 --- a/sql/analyzer/range_filter_test.go +++ b/sql/analyzer/range_filter_test.go @@ -1,10 +1,12 @@ package analyzer import ( + "testing" + + "github.com/stretchr/testify/require" + "github.com/dolthub/go-mysql-server/sql/memo" "github.com/dolthub/go-mysql-server/sql/types" - "github.com/stretchr/testify/require" - "testing" ) func variable(name string) *memo.ExprGroup { diff --git a/sql/plan/range_heap.go b/sql/plan/range_heap.go index 8621b7d191..51c5408b68 100644 --- a/sql/plan/range_heap.go +++ b/sql/plan/range_heap.go @@ -16,6 +16,7 @@ package plan import ( "fmt" + "github.com/dolthub/go-mysql-server/sql" "github.com/dolthub/go-mysql-server/sql/expression" ) diff --git a/sql/rowexec/range_heap_iter.go b/sql/rowexec/range_heap_iter.go index 75fad9d83e..63dfb9b137 100644 --- a/sql/rowexec/range_heap_iter.go +++ b/sql/rowexec/range_heap_iter.go @@ -3,12 +3,14 @@ package rowexec import ( "container/heap" "errors" - "github.com/dolthub/go-mysql-server/sql" - "github.com/dolthub/go-mysql-server/sql/plan" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/trace" "io" "reflect" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" + + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/plan" ) func newRangeHeapJoinIter(ctx *sql.Context, b sql.NodeExecBuilder, j *plan.JoinNode, row sql.Row) (sql.RowIter, error) { From 1901ea646e021737c59780f3b06855e32e5a63e6 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Fri, 28 Jul 2023 12:17:24 -0700 Subject: [PATCH 44/46] Correctly handle an empty RHS in RangeHeapJoins. --- enginetest/join_planning_tests.go | 13 +++++++++++++ sql/rowexec/range_heap_iter.go | 9 ++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/enginetest/join_planning_tests.go b/enginetest/join_planning_tests.go index d450b0a6ca..8a84959e2f 100644 --- a/enginetest/join_planning_tests.go +++ b/enginetest/join_planning_tests.go @@ -1228,6 +1228,19 @@ join uv d on d.u = c.x`, {2, 5, 4, 6}, }, }, + { + q: "select * from vals left join (select * from ranges where 0) as newRanges on val > min and val < max;", + types: []plan.JoinType{plan.JoinTypeRangeHeap}, + exp: []sql.Row{ + {0, nil, nil}, + {1, nil, nil}, + {2, nil, nil}, + {3, nil, nil}, + {4, nil, nil}, + {5, nil, nil}, + {6, nil, nil}, + }, + }, }, }, } diff --git a/sql/rowexec/range_heap_iter.go b/sql/rowexec/range_heap_iter.go index 75fad9d83e..fe9dfac481 100644 --- a/sql/rowexec/range_heap_iter.go +++ b/sql/rowexec/range_heap_iter.go @@ -80,7 +80,10 @@ func (iter *rangeHeapJoinIter) loadPrimary(ctx *sql.Context) error { iter.primaryRow = iter.parentRow.Append(r) iter.foundMatch = false - iter.initializeHeap(ctx, iter.b, iter.primaryRow) + err = iter.initializeHeap(ctx, iter.b, iter.primaryRow) + if err != nil { + return err + } } return nil @@ -216,6 +219,10 @@ func (iter *rangeHeapJoinIter) initializeHeap(ctx *sql.Context, builder sql.Node iter.rangeHeapPlan.ComparisonType = iter.rangeHeapPlan.Schema()[iter.rangeHeapPlan.MaxColumnIndex].Type iter.pendingRow, err = iter.childRowIter.Next(ctx) + if err == io.EOF { + iter.pendingRow = nil + return nil + } return err } From a05358da5700c60c4d0bbf06713830de9a453427 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Fri, 28 Jul 2023 12:48:57 -0700 Subject: [PATCH 45/46] Fix typo in planning test. --- enginetest/join_planning_tests.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enginetest/join_planning_tests.go b/enginetest/join_planning_tests.go index 8a84959e2f..08e653eec3 100644 --- a/enginetest/join_planning_tests.go +++ b/enginetest/join_planning_tests.go @@ -1230,7 +1230,7 @@ join uv d on d.u = c.x`, }, { q: "select * from vals left join (select * from ranges where 0) as newRanges on val > min and val < max;", - types: []plan.JoinType{plan.JoinTypeRangeHeap}, + types: []plan.JoinType{plan.JoinTypeLeftOuterRangeHeap}, exp: []sql.Row{ {0, nil, nil}, {1, nil, nil}, From b9f3fc671cd1e082422a996d469e69004085d273 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Fri, 28 Jul 2023 12:51:20 -0700 Subject: [PATCH 46/46] Add op tests to match planning tests. --- enginetest/join_op_tests.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/enginetest/join_op_tests.go b/enginetest/join_op_tests.go index 3d20b6265d..466527a412 100644 --- a/enginetest/join_op_tests.go +++ b/enginetest/join_op_tests.go @@ -1538,4 +1538,32 @@ var rangeJoinOpTests = []JoinOpTests{ {10, 8, 12}, }, }, + { + // This tests that the RangeHeapJoin node functions correctly even if its rows are iterated over multiple times. + Query: "select * from (select 1 union select 2) as l left join (select * from vals join ranges on val > min and val < max) as r on max = max", + Expected: []sql.Row{ + {1, 1, 0, 2}, + {1, 2, 1, 3}, + {1, 3, 2, 4}, + {1, 4, 3, 5}, + {1, 5, 4, 6}, + {2, 1, 0, 2}, + {2, 2, 1, 3}, + {2, 3, 2, 4}, + {2, 4, 3, 5}, + {2, 5, 4, 6}, + }, + }, + { + Query: "select * from vals left join (select * from ranges where 0) as newRanges on val > min and val < max;", + Expected: []sql.Row{ + {0, nil, nil}, + {1, nil, nil}, + {2, nil, nil}, + {3, nil, nil}, + {4, nil, nil}, + {5, nil, nil}, + {6, nil, nil}, + }, + }, }