Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plan, statistics: maintain HistColl in DataSource's StatsInfo #7385

Merged
merged 19 commits into from
Aug 30, 2018
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion expression/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ type Column struct {
// We'll try to remove it in the future.
ID int64
// UniqueID is the unique id of this column.
UniqueID int
UniqueID int64
// IsAggOrSubq means if this column is referenced to a Aggregation column or a Subquery column.
// If so, this column's name will be the plain sql text.
IsAggOrSubq bool
Expand Down Expand Up @@ -375,3 +375,22 @@ func IndexInfo2Cols(cols []*Column, index *model.IndexInfo) ([]*Column, []int) {
}
return retCols, lengths
}

// FindColumnsByUniqueIDs will find columns by checking the unique id.
func FindColumnsByUniqueIDs(cols []*Column, ids []int64) []*Column {
retCols := make([]*Column, 0, len(ids))
for _, id := range ids {
found := false
for _, col := range cols {
if col.UniqueID == id {
retCols = append(retCols, col)
found = true
break
}
}
if !found {
break
}
}
return retCols
}
2 changes: 1 addition & 1 deletion expression/column_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func (s *testEvaluatorSuite) TestColumn2Expr(c *C) {

cols := make([]*Column, 0, 5)
for i := 0; i < 5; i++ {
cols = append(cols, &Column{UniqueID: i})
cols = append(cols, &Column{UniqueID: int64(i)})
}

exprs := Column2Exprs(cols)
Expand Down
2 changes: 1 addition & 1 deletion expression/constant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ type testExpressionSuite struct{}

func newColumn(id int) *Column {
return &Column{
UniqueID: id,
UniqueID: int64(id),
ColName: model.NewCIStr(fmt.Sprint(id)),
TblName: model.NewCIStr("t"),
DBName: model.NewCIStr("test"),
Expand Down
4 changes: 2 additions & 2 deletions expression/distsql_builtin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ import (
var _ = Suite(&testEvalSuite{})

type testEvalSuite struct {
colID int
colID int64
}

func (s *testEvalSuite) SetUpSuite(c *C) {
s.colID = 0
}

func (s *testEvalSuite) allocColID() int {
func (s *testEvalSuite) allocColID() int64 {
s.colID++
return s.colID
}
Expand Down
2 changes: 1 addition & 1 deletion expression/expression_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func tableInfoToSchemaForTest(tableInfo *model.TableInfo) *Schema {
schema := NewSchema(make([]*Column, 0, len(columns))...)
for i, col := range columns {
schema.Append(&Column{
UniqueID: i,
UniqueID: int64(i),
TblName: tableInfo.Name,
ColName: col.Name,
ID: col.ID,
Expand Down
8 changes: 4 additions & 4 deletions plan/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
if err != nil {
return false, errors.Trace(err)
}
path.countAfterAccess, err = ds.statisticTable.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges)
path.countAfterAccess, err = ds.stats.histColl.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges)
if err != nil {
return false, errors.Trace(err)
}
Expand All @@ -435,8 +435,8 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
}
path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.tableFilters, path.index.Columns, ds.tableInfo)
if corColInAccessConds {
idxHist, ok := ds.statisticTable.Indices[path.index.ID]
if ok && !ds.statisticTable.Pseudo {
idxHist, ok := ds.stats.histColl.Indices[path.index.ID]
if ok && !ds.stats.histColl.Pseudo {
path.countAfterAccess = idxHist.AvgCountPerValue(ds.statisticTable.Count)
} else {
path.countAfterAccess = ds.statisticTable.PseudoAvgCountPerValue()
Expand All @@ -448,7 +448,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath) (bool, error) {
path.countAfterAccess = math.Min(ds.stats.count/selectionFactor, float64(ds.statisticTable.Count))
}
if path.indexFilters != nil {
selectivity, err := ds.statisticTable.Selectivity(ds.ctx, path.indexFilters)
selectivity, err := ds.stats.histColl.Selectivity(ds.ctx, path.indexFilters)
if err != nil {
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
selectivity = selectionFactor
Expand Down
2 changes: 1 addition & 1 deletion plan/point_get_plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ func colInfoToColumn(db model.CIStr, tblName model.CIStr, asName model.CIStr, co
TblName: tblName,
RetType: &col.FieldType,
ID: col.ID,
UniqueID: col.Offset,
UniqueID: int64(col.Offset),
Index: idx,
}
}
6 changes: 5 additions & 1 deletion plan/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

"github.com/juju/errors"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/statistics"
log "github.com/sirupsen/logrus"
)

Expand All @@ -27,6 +28,7 @@ type statsInfo struct {
count float64
cardinality []float64

histColl statistics.HistColl
// usePseudoStats indicates whether the statsInfo is calculated using the
// pseudo statistics on a table.
usePseudoStats bool
Expand All @@ -45,6 +47,7 @@ func (s *statsInfo) scale(factor float64) *statsInfo {
profile := &statsInfo{
count: s.count * factor,
cardinality: make([]float64, len(s.cardinality)),
histColl: s.histColl,
usePseudoStats: s.usePseudoStats,
}
for i := range profile.cardinality {
Expand Down Expand Up @@ -111,6 +114,7 @@ func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *statsInfo {
profile := &statsInfo{
count: float64(ds.statisticTable.Count),
cardinality: make([]float64, len(ds.Columns)),
histColl: ds.statisticTable.GenerateHistCollFromColumnInfo(ds.Columns, ds.schema.Columns),
usePseudoStats: ds.statisticTable.Pseudo,
}
for i, col := range ds.Columns {
Expand All @@ -123,7 +127,7 @@ func (ds *DataSource) getStatsByFilter(conds expression.CNFExprs) *statsInfo {
}
}
ds.stats = profile
selectivity, err := ds.statisticTable.Selectivity(ds.ctx, conds)
selectivity, err := profile.histColl.Selectivity(ds.ctx, conds)
if err != nil {
log.Warnf("An error happened: %v, we have to use the default selectivity", err.Error())
selectivity = selectionFactor
Expand Down
4 changes: 2 additions & 2 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ type SessionVars struct {
PlanID int

// PlanColumnID is the unique id for column when building plan.
PlanColumnID int
PlanColumnID int64

// User is the user identity with which the session login.
User *auth.UserIdentity
Expand Down Expand Up @@ -364,7 +364,7 @@ func (s *SessionVars) CleanBuffers() {
}

// AllocPlanColumnID allocates column id for planner.
func (s *SessionVars) AllocPlanColumnID() int {
func (s *SessionVars) AllocPlanColumnID() int64 {
s.PlanColumnID++
return s.PlanColumnID
}
Expand Down
3 changes: 0 additions & 3 deletions statistics/boostrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ func (h *Handle) initStatsMeta4Chunk(is infoschema.InfoSchema, tables statsCache
ModifyCount: row.GetInt64(2),
Columns: make(map[int64]*Column, len(tableInfo.Columns)),
Indices: make(map[int64]*Index, len(tableInfo.Indices)),
colName2Idx: make(map[string]int64, len(tableInfo.Columns)),
colName2ID: make(map[string]int64, len(tableInfo.Columns)),
}
tbl := &Table{
HistColl: newHistColl,
Expand Down Expand Up @@ -238,7 +236,6 @@ func (h *Handle) initStatsBuckets(tables statsCache) error {
}
col.PreCalculateScalar()
}
table.buildColNameMapper()
}
return nil
}
Expand Down
15 changes: 5 additions & 10 deletions statistics/feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -845,17 +845,12 @@ func logForIndex(prefix string, t *Table, idx *Index, ranges []*ranger.Range, ac
colName := idx.Info.Columns[rangePosition].Name.L
var rangeString string
// prefer index stats over column stats
if idx, ok := t.colName2Idx[colName]; ok {
if t.Indices[idx] == nil {
return
}
rangeString = logForIndexRange(t.Indices[idx], &rang, -1, factor)
if idxHist := t.indexStartWithColumnForDebugLog(colName); idxHist != nil {
rangeString = logForIndexRange(idxHist, &rang, -1, factor)
} else if colHist := t.columnByNameForDebugLog(colName); colHist != nil {
rangeString = colRangeToStr(colHist, &rang, -1, factor)
} else {
id := t.colName2ID[colName]
if t.Columns[id] == nil {
return
}
rangeString = colRangeToStr(t.Columns[t.colName2ID[colName]], &rang, -1, factor)
return
}
log.Debugf("%s index: %s, actual: %d, equality: %s, expected equality: %d, %s", prefix, idx.Info.Name.O,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line can be removed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems not?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the else branch in line 852 returns the execution of this function. So this is a dead code.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, its meaning changed after i changing the code. This log should be in the if branch now.

actual[i], equalityString, equalityCount, rangeString)
Expand Down
18 changes: 11 additions & 7 deletions statistics/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp

// Deal with the correlated column.
for _, expr := range exprs {
if c := isColEqCorCol(expr); c != nil && !coll.ColumnIsInvalid(sc, c.ID) {
colHist := coll.Columns[c.ID]
if c := isColEqCorCol(expr); c != nil && !coll.ColumnIsInvalid(sc, c.UniqueID) {
colHist := coll.Columns[c.UniqueID]
if colHist.NDV > 0 {
ret *= 1 / float64(colHist.NDV)
}
Expand All @@ -172,27 +172,31 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp

extractedCols := make([]*expression.Column, 0, len(coll.Columns))
extractedCols = expression.ExtractColumnsFromExpressions(extractedCols, remainedExprs, nil)
for _, colInfo := range coll.Columns {
for id, colInfo := range coll.Columns {
col := expression.ColInfo2Col(extractedCols, colInfo.Info)
if col != nil {
maskCovered, ranges, err := getMaskAndRanges(ctx, remainedExprs, ranger.ColumnRangeType, nil, col)
if err != nil {
return 0, errors.Trace(err)
}
sets = append(sets, &exprSet{tp: colType, ID: col.ID, mask: maskCovered, ranges: ranges, numCols: 1})
sets = append(sets, &exprSet{tp: colType, ID: id, mask: maskCovered, ranges: ranges, numCols: 1})
if mysql.HasPriKeyFlag(colInfo.Info.Flag) {
sets[len(sets)-1].tp = pkType
}
}
}
for _, idxInfo := range coll.Indices {
idxCols, lengths := expression.IndexInfo2Cols(extractedCols, idxInfo.Info)
for id, idxInfo := range coll.Indices {
idxCols := expression.FindColumnsByUniqueIDs(extractedCols, coll.Idx2ColumnIDs[id])
if len(idxCols) > 0 {
lengths := make([]int, 0, len(idxCols))
for i := 0; i < len(idxCols); i++ {
lengths = append(lengths, idxInfo.Info.Columns[i].Length)
}
maskCovered, ranges, err := getMaskAndRanges(ctx, remainedExprs, ranger.IndexRangeType, lengths, idxCols...)
if err != nil {
return 0, errors.Trace(err)
}
sets = append(sets, &exprSet{tp: indexType, ID: idxInfo.ID, mask: maskCovered, ranges: ranges, numCols: len(idxInfo.Info.Columns)})
sets = append(sets, &exprSet{tp: indexType, ID: id, mask: maskCovered, ranges: ranges, numCols: len(idxInfo.Info.Columns)})
}
}
sets = getUsableSetsByGreedy(sets)
Expand Down
14 changes: 10 additions & 4 deletions statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,19 +188,25 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) {
stmts, err := session.Parse(ctx, sql)
c.Assert(err, IsNil, Commentf("error %v, for expr %s", err, tt.exprs))
c.Assert(stmts, HasLen, 1)

err = plan.Preprocess(ctx, stmts[0], is, false)
c.Assert(err, IsNil, comment)
p, err := plan.BuildLogicalPlan(ctx, stmts[0], is)
c.Assert(err, IsNil, Commentf("error %v, for building plan, expr %s", err, tt.exprs))
ratio, err := statsTbl.Selectivity(ctx, p.(plan.LogicalPlan).Children()[0].(*plan.LogicalSelection).Conditions)

sel := p.(plan.LogicalPlan).Children()[0].(*plan.LogicalSelection)
ds := sel.Children()[0].(*plan.DataSource)

histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.Columns, ds.Schema().Columns)

ratio, err := histColl.Selectivity(ctx, sel.Conditions)
c.Assert(err, IsNil, comment)
c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio))

statsTbl.Count *= 10
ratio, err = statsTbl.Selectivity(ctx, p.(plan.LogicalPlan).Children()[0].(*plan.LogicalSelection).Conditions)
histColl.Count *= 10
ratio, err = histColl.Selectivity(ctx, sel.Conditions)
c.Assert(err, IsNil, comment)
c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio))
statsTbl.Count /= 10
}
}

Expand Down
Loading