From e56c6db4d28ac5e18cefd8c8acb532a34e388722 Mon Sep 17 00:00:00 2001 From: Chengpeng Yan <41809508+Reminiscent@users.noreply.github.com> Date: Tue, 3 Sep 2019 19:21:58 +0800 Subject: [PATCH] Expression: Add test framework for vectorized expression evaluation functions (#11963) --- expression/bench_test.go | 470 +++++++++++++++++++++++++++++++++++---- 1 file changed, 422 insertions(+), 48 deletions(-) diff --git a/expression/bench_test.go b/expression/bench_test.go index de674bf1399ce..c8bd623f3b6e4 100644 --- a/expression/bench_test.go +++ b/expression/bench_test.go @@ -23,12 +23,14 @@ import ( "testing" "time" + . "github.com/pingcap/check" "github.com/pingcap/parser/ast" "github.com/pingcap/parser/charset" "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/types/json" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/mock" ) @@ -194,13 +196,14 @@ func BenchmarkScalarFunctionClone(b *testing.B) { } type vecExprBenchCase struct { - builtinFuncName string - retEvalType types.EvalType - childrenTypes []types.EvalType + retEvalType types.EvalType + childrenTypes []types.EvalType } -var vecExprBenchCases = []vecExprBenchCase{ - {ast.Cast, types.ETInt, []types.EvalType{types.ETInt}}, +var vecExprBenchCases = map[string][]vecExprBenchCase{ + ast.Cast: { + {types.ETInt, []types.EvalType{types.ETInt}}, + }, } func fillColumn(eType types.EvalType, chk *chunk.Chunk, colIdx int) { @@ -219,8 +222,71 @@ func fillColumn(eType types.EvalType, chk *chunk.Chunk, colIdx int) { } } } + case types.ETReal: + for i := 0; i < batchSize; i++ { + if rand.Float64() < nullRatio { + chk.AppendNull(colIdx) + } else { + if rand.Float64() < 0.5 { + chk.AppendFloat64(colIdx, -rand.Float64()) + } else { + chk.AppendFloat64(colIdx, rand.Float64()) + } + } + } + case types.ETDecimal: + for i := 0; i < batchSize; i++ { + if rand.Float64() < nullRatio { + chk.AppendNull(colIdx) + } else { + d := new(types.MyDecimal) + f := rand.Float64() * 100000 + if err := d.FromFloat64(f); err != nil { + panic(err) + } + chk.AppendMyDecimal(colIdx, d) + } + } + case types.ETDatetime, types.ETTimestamp: + for i := 0; i < batchSize; i++ { + if rand.Float64() < nullRatio { + chk.AppendNull(colIdx) + } else { + gt := types.FromDate(rand.Intn(2200), rand.Intn(10)+1, rand.Intn(20)+1, rand.Intn(12), rand.Intn(60), rand.Intn(60), rand.Intn(1000)) + t := types.Time{Time: gt, Type: convertETType(eType)} + chk.AppendTime(colIdx, t) + } + } + case types.ETDuration: + for i := 0; i < batchSize; i++ { + if rand.Float64() < nullRatio { + chk.AppendNull(colIdx) + } else { + d := types.Duration{Duration: time.Duration(rand.Int())} + chk.AppendDuration(colIdx, d) + } + } + case types.ETJson: + for i := 0; i < batchSize; i++ { + if rand.Float64() < nullRatio { + chk.AppendNull(colIdx) + } else { + j := new(json.BinaryJSON) + if err := j.UnmarshalJSON([]byte(fmt.Sprintf(`{"key":%v}`, rand.Int()))); err != nil { + panic(err) + } + chk.AppendJSON(colIdx, *j) + } + } + case types.ETString: + for i := 0; i < batchSize; i++ { + if rand.Float64() < nullRatio { + chk.AppendNull(colIdx) + } else { + chk.AppendString(colIdx, fmt.Sprintf("%v", rand.Int())) + } + } default: - // TODO: support all EvalTypes later. panic(fmt.Sprintf("EvalType=%v is not supported.", eType)) } } @@ -229,84 +295,392 @@ func eType2FieldType(eType types.EvalType) *types.FieldType { switch eType { case types.ETInt: return types.NewFieldType(mysql.TypeLonglong) + case types.ETReal: + return types.NewFieldType(mysql.TypeDouble) + case types.ETDecimal: + return types.NewFieldType(mysql.TypeNewDecimal) + case types.ETDatetime, types.ETTimestamp: + return types.NewFieldType(mysql.TypeDate) + case types.ETDuration: + return types.NewFieldType(mysql.TypeDuration) + case types.ETJson: + return types.NewFieldType(mysql.TypeJSON) + case types.ETString: + return types.NewFieldType(mysql.TypeVarString) + default: + panic(fmt.Sprintf("EvalType=%v is not supported.", eType)) } - // TODO: support all EvalTypes later. - panic(fmt.Sprintf("EvalType=%v is not supported.", eType)) } -func genVecExprBenchCase(ctx sessionctx.Context, testCase vecExprBenchCase) (Expression, *chunk.Chunk, *chunk.Chunk) { +func genVecExprBenchCase(ctx sessionctx.Context, funcName string, testCase vecExprBenchCase) (expr Expression, input *chunk.Chunk, output *chunk.Chunk) { fts := make([]*types.FieldType, len(testCase.childrenTypes)) for i, eType := range testCase.childrenTypes { fts[i] = eType2FieldType(eType) } cols := make([]Expression, len(testCase.childrenTypes)) - input := chunk.New(fts, 1024, 1024) + input = chunk.New(fts, 1024, 1024) for i, eType := range testCase.childrenTypes { fillColumn(eType, input, i) cols[i] = &Column{Index: i, RetType: fts[i]} } - expr, err := NewFunction(ctx, testCase.builtinFuncName, eType2FieldType(testCase.retEvalType), cols...) + expr, err := NewFunction(ctx, funcName, eType2FieldType(testCase.retEvalType), cols...) if err != nil { panic(err) } - output := chunk.New([]*types.FieldType{eType2FieldType(testCase.retEvalType)}, 1024, 1024) + output = chunk.New([]*types.FieldType{eType2FieldType(testCase.retEvalType)}, 1024, 1024) return expr, input, output } -func TestVectorizedExpression(t *testing.T) { +func (s *testEvaluatorSuite) TestVectorizedEvalOneVec(c *C) { ctx := mock.NewContext() - for _, testCase := range vecExprBenchCases { - expr, input, output := genVecExprBenchCase(ctx, testCase) - output2 := output.CopyConstruct() - if err := evalOneVec(ctx, expr, input, output, 0); err != nil { - t.Fatal(err) + for funcName, testCases := range vecExprBenchCases { + for _, testCase := range testCases { + expr, input, output := genVecExprBenchCase(ctx, funcName, testCase) + output2 := output.CopyConstruct() + c.Assert(evalOneVec(ctx, expr, input, output, 0), IsNil) + it := chunk.NewIterator4Chunk(input) + c.Assert(evalOneColumn(ctx, expr, it, output2, 0), IsNil) + + c1, c2 := output.Column(0), output2.Column(0) + switch testCase.retEvalType { + case types.ETInt: + for i := 0; i < input.NumRows(); i++ { + c.Assert(c1.IsNull(i) != c2.IsNull(i) || (!c1.IsNull(i) && c1.GetInt64(i) != c2.GetInt64(i)), IsFalse) + } + case types.ETReal: + for i := 0; i < input.NumRows(); i++ { + c.Assert(c1.IsNull(i) != c2.IsNull(i) || (!c1.IsNull(i) && c1.GetFloat64(i) != c2.GetFloat64(i)), IsFalse) + } + case types.ETDecimal: + for i := 0; i < input.NumRows(); i++ { + c.Assert(c1.IsNull(i) != c2.IsNull(i) || (!c1.IsNull(i) && c1.GetDecimal(i).Compare(c2.GetDecimal(i)) != 0), IsFalse) + } + case types.ETDatetime, types.ETTimestamp: + for i := 0; i < input.NumRows(); i++ { + c.Assert(c1.IsNull(i) != c2.IsNull(i) || (!c1.IsNull(i) && c1.GetTime(i).Compare(c2.GetTime(i)) != 0), IsFalse) + } + case types.ETDuration: + for i := 0; i < input.NumRows(); i++ { + c.Assert(c1.IsNull(i) != c2.IsNull(i) || (!c1.IsNull(i) && c1.GetDuration(i, 0) != c2.GetDuration(i, 0)), IsFalse) + } + case types.ETJson: + for i := 0; i < input.NumRows(); i++ { + c.Assert(c1.IsNull(i) != c2.IsNull(i) || (!c1.IsNull(i) && c1.GetJSON(i).String() != c2.GetJSON(i).String()), IsFalse) + } + case types.ETString: + for i := 0; i < input.NumRows(); i++ { + c.Assert(c1.IsNull(i) != c2.IsNull(i) || (!c1.IsNull(i) && c1.GetString(i) != c2.GetString(i)), IsFalse) + } + } } + } +} + +func BenchmarkVectorizedEvalOneVec(b *testing.B) { + ctx := mock.NewContext() + for funcName, testCases := range vecExprBenchCases { + for _, testCase := range testCases { + expr, input, output := genVecExprBenchCase(ctx, funcName, testCase) + exprName := expr.String() + if sf, ok := expr.(*ScalarFunction); ok { + exprName = fmt.Sprintf("%v", reflect.TypeOf(sf.Function)) + tmp := strings.Split(exprName, ".") + exprName = tmp[len(tmp)-1] + } - it := chunk.NewIterator4Chunk(input) - if err := evalOneColumn(ctx, expr, it, output2, 0); err != nil { - t.Fatal(err) + b.Run(exprName+"-EvalOneVec", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := evalOneVec(ctx, expr, input, output, 0); err != nil { + b.Fatal(err) + } + } + }) + b.Run(exprName+"-EvalOneCol", func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + it := chunk.NewIterator4Chunk(input) + if err := evalOneColumn(ctx, expr, it, output, 0); err != nil { + b.Fatal(err) + } + } + }) } + } +} + +func genVecBuiltinFuncBenchCase(ctx sessionctx.Context, funcName string, testCase vecExprBenchCase) (baseFunc builtinFunc, input *chunk.Chunk, result *chunk.Column) { + childrenNumber := len(testCase.childrenTypes) + fts := make([]*types.FieldType, childrenNumber) + for i, eType := range testCase.childrenTypes { + fts[i] = eType2FieldType(eType) + } + cols := make([]Expression, childrenNumber) + input = chunk.New(fts, 1024, 1024) + for i, eType := range testCase.childrenTypes { + fillColumn(eType, input, i) + cols[i] = &Column{Index: i, RetType: fts[i]} + } + var err error + if funcName == ast.Cast { + var fc functionClass + tp := eType2FieldType(testCase.retEvalType) switch testCase.retEvalType { case types.ETInt: - if !reflect.DeepEqual(output.Column(0).Int64s(), output2.Column(0).Int64s()) { - t.Fatal(fmt.Sprintf("error testCase %v", testCase)) - } - default: - t.Fatal(fmt.Sprintf("evalType=%v is not supported", testCase.retEvalType)) + fc = &castAsIntFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp} + case types.ETDecimal: + fc = &castAsDecimalFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp} + case types.ETReal: + fc = &castAsRealFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp} + case types.ETDatetime, types.ETTimestamp: + fc = &castAsTimeFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp} + case types.ETDuration: + fc = &castAsDurationFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp} + case types.ETJson: + fc = &castAsJSONFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp} + case types.ETString: + fc = &castAsStringFunctionClass{baseFunctionClass{ast.Cast, 1, 1}, tp} } + baseFunc, err = fc.getFunction(ctx, cols) + } else { + baseFunc, err = funcs[funcName].getFunction(ctx, cols) + } + if err != nil { + panic(err) } + result = chunk.NewColumn(eType2FieldType(testCase.retEvalType), 1024) + return baseFunc, input, result } -func BenchmarkVectorizedExpression(b *testing.B) { +func (s *testEvaluatorSuite) TestVectorizedBuiltinFunc(c *C) { ctx := mock.NewContext() - for _, testCase := range vecExprBenchCases { - expr, input, output := genVecExprBenchCase(ctx, testCase) - exprName := expr.String() - if sf, ok := expr.(*ScalarFunction); ok { - exprName = fmt.Sprintf("%v", reflect.TypeOf(sf.Function)) - tmp := strings.Split(exprName, ".") - exprName = tmp[len(tmp)-1] + for funcName, testCases := range vecExprBenchCases { + for _, testCase := range testCases { + baseFunc, input, output := genVecBuiltinFuncBenchCase(ctx, funcName, testCase) + it := chunk.NewIterator4Chunk(input) + i := 0 + switch testCase.retEvalType { + case types.ETInt: + err := baseFunc.vecEvalInt(input, output) + c.Assert(err, IsNil) + i64s := output.Int64s() + for row := it.Begin(); row != it.End(); row = it.Next() { + val, isNull, err := baseFunc.evalInt(row) + c.Assert(err, IsNil) + c.Assert(isNull, Equals, output.IsNull(i)) + if !isNull { + c.Assert(val, Equals, i64s[i]) + } + i++ + } + case types.ETReal: + err := baseFunc.vecEvalReal(input, output) + c.Assert(err, IsNil) + f64s := output.Float64s() + for row := it.Begin(); row != it.End(); row = it.Next() { + val, isNull, err := baseFunc.evalReal(row) + c.Assert(err, IsNil) + c.Assert(isNull, Equals, output.IsNull(i)) + if !isNull { + c.Assert(val, Equals, f64s[i]) + } + i++ + } + case types.ETDecimal: + err := baseFunc.vecEvalDecimal(input, output) + c.Assert(err, IsNil) + d64s := output.Decimals() + for row := it.Begin(); row != it.End(); row = it.Next() { + val, isNull, err := baseFunc.evalDecimal(row) + c.Assert(err, IsNil) + c.Assert(isNull, Equals, output.IsNull(i)) + if !isNull { + c.Assert(val, Equals, d64s[i]) + } + i++ + } + case types.ETDatetime, types.ETTimestamp: + err := baseFunc.vecEvalTime(input, output) + c.Assert(err, IsNil) + t64s := output.Times() + for row := it.Begin(); row != it.End(); row = it.Next() { + val, isNull, err := baseFunc.evalTime(row) + c.Assert(err, IsNil) + c.Assert(isNull, Equals, output.IsNull(i)) + if !isNull { + c.Assert(val, Equals, t64s[i]) + } + i++ + } + case types.ETDuration: + err := baseFunc.vecEvalDuration(input, output) + c.Assert(err, IsNil) + d64s := output.GoDurations() + for row := it.Begin(); row != it.End(); row = it.Next() { + val, isNull, err := baseFunc.evalDuration(row) + c.Assert(err, IsNil) + c.Assert(isNull, Equals, output.IsNull(i)) + if !isNull { + c.Assert(val, Equals, d64s[i]) + } + i++ + } + case types.ETJson: + err := baseFunc.vecEvalJSON(input, output) + c.Assert(err, IsNil) + for row := it.Begin(); row != it.End(); row = it.Next() { + val, isNull, err := baseFunc.evalDuration(row) + c.Assert(err, IsNil) + c.Assert(isNull, Equals, output.IsNull(i)) + if !isNull { + c.Assert(val, Equals, output.GetJSON(i)) + } + i++ + } + case types.ETString: + err := baseFunc.vecEvalString(input, output) + c.Assert(err, IsNil) + for row := it.Begin(); row != it.End(); row = it.Next() { + val, isNull, err := baseFunc.evalDuration(row) + c.Assert(err, IsNil) + c.Assert(isNull, Equals, output.IsNull(i)) + if !isNull { + c.Assert(val, Equals, output.GetString(i)) + } + i++ + } + default: + c.Fatal(fmt.Sprintf("evalType=%v is not supported", testCase.retEvalType)) + } } + } +} - b.Run(exprName+"-VecExpr", func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { - if err := evalOneVec(ctx, expr, input, output, 0); err != nil { - b.Fatal(err) +func BenchmarkVectorizedBuiltinFunc(b *testing.B) { + ctx := mock.NewContext() + for funcName, testCases := range vecExprBenchCases { + for _, testCase := range testCases { + baseFunc, input, output := genVecBuiltinFuncBenchCase(ctx, funcName, testCase) + baseFuncName := fmt.Sprintf("%v", reflect.TypeOf(baseFunc)) + tmp := strings.Split(baseFuncName, ".") + baseFuncName = tmp[len(tmp)-1] + + b.Run(baseFuncName+"-VecBuiltinFunc", func(b *testing.B) { + b.ResetTimer() + switch testCase.retEvalType { + case types.ETInt: + for i := 0; i < b.N; i++ { + if err := baseFunc.vecEvalInt(input, output); err != nil { + b.Fatal(err) + } + } + case types.ETReal: + for i := 0; i < b.N; i++ { + if err := baseFunc.vecEvalReal(input, output); err != nil { + b.Fatal(err) + } + } + case types.ETDecimal: + for i := 0; i < b.N; i++ { + if err := baseFunc.vecEvalDecimal(input, output); err != nil { + b.Fatal(err) + } + } + case types.ETDatetime, types.ETTimestamp: + for i := 0; i < b.N; i++ { + if err := baseFunc.vecEvalTime(input, output); err != nil { + b.Fatal(err) + } + } + case types.ETDuration: + for i := 0; i < b.N; i++ { + if err := baseFunc.vecEvalDuration(input, output); err != nil { + b.Fatal(err) + } + } + case types.ETJson: + for i := 0; i < b.N; i++ { + if err := baseFunc.vecEvalJSON(input, output); err != nil { + b.Fatal(err) + } + } + case types.ETString: + for i := 0; i < b.N; i++ { + if err := baseFunc.vecEvalString(input, output); err != nil { + b.Fatal(err) + } + } + default: + b.Fatal(fmt.Sprintf("evalType=%v is not supported", testCase.retEvalType)) } - } - }) - b.Run(exprName+"-NonVecExpr", func(b *testing.B) { - b.ResetTimer() - for i := 0; i < b.N; i++ { + }) + b.Run(baseFuncName+"-NonVecBuiltinFunc", func(b *testing.B) { + b.ResetTimer() it := chunk.NewIterator4Chunk(input) - if err := evalOneColumn(ctx, expr, it, output, 0); err != nil { - b.Fatal(err) + switch testCase.retEvalType { + case types.ETInt: + for i := 0; i < b.N; i++ { + for row := it.Begin(); row != it.End(); row = it.Next() { + if _, _, err := baseFunc.evalInt(row); err != nil { + b.Fatal(err) + } + } + } + case types.ETReal: + for i := 0; i < b.N; i++ { + for row := it.Begin(); row != it.End(); row = it.Next() { + if _, _, err := baseFunc.evalReal(row); err != nil { + b.Fatal(err) + } + } + } + case types.ETDecimal: + for i := 0; i < b.N; i++ { + for row := it.Begin(); row != it.End(); row = it.Next() { + if _, _, err := baseFunc.evalDecimal(row); err != nil { + b.Fatal(err) + } + } + } + case types.ETDatetime, types.ETTimestamp: + for i := 0; i < b.N; i++ { + for row := it.Begin(); row != it.End(); row = it.Next() { + if _, _, err := baseFunc.evalTime(row); err != nil { + b.Fatal(err) + } + } + } + case types.ETDuration: + for i := 0; i < b.N; i++ { + for row := it.Begin(); row != it.End(); row = it.Next() { + if _, _, err := baseFunc.evalDuration(row); err != nil { + b.Fatal(err) + } + } + } + case types.ETJson: + for i := 0; i < b.N; i++ { + for row := it.Begin(); row != it.End(); row = it.Next() { + if _, _, err := baseFunc.evalJSON(row); err != nil { + b.Fatal(err) + } + } + } + case types.ETString: + for i := 0; i < b.N; i++ { + for row := it.Begin(); row != it.End(); row = it.Next() { + if _, _, err := baseFunc.evalString(row); err != nil { + b.Fatal(err) + } + } + } + default: + b.Fatal(fmt.Sprintf("evalType=%v is not supported", testCase.retEvalType)) } - } - }) + }) + } } }