Skip to content

Commit

Permalink
[FLASH-386] DeltaMerge DDL support (pingcap#190)
Browse files Browse the repository at this point in the history
* cast DataType while reading from PageStorage

* add isLossyCast function

* alter for StroageDeltaMerge, WIP

* add Alter for StorageDeltaMerge

* add Alter for StorageDeltaMerge

* add TableInfo in StroageDeltaMerge

* rename table for StorageDeltaMerge

* fix bug: the TableInfo from TiDB

* add comments for DeltaMerge flush && cache

* more faster(?) way to cast mismatch datatype

* support cast for numeric type null/not null

* support for other data type just change null / not null

* isLossyCast -> isSupportedDataTypeCast

* isSupportedDataTypeCast add decimal detect

* rename function

* fix compile errors in gtests

* small fix

* fix broken tests

* support new column with non-zero default value

* remove unused code

* fix compile error in CI

* fix bug in table rename

* small fix

* minor fix

* refine cast function in chunk

* update DeltaMergeStore's segments within lock

* [WIP]Add test cases for default value ddl. Still has bugs

* fix bugs after rebasing to latest master

* add some TODO marks

* fix compile error in gtests && remove unused comments

* fix broken gtests

* flush cached chunks in delta instead of doing DeltaMerge when ddl-changes apply

* use TypeIndex instead of typeid_cast

* clean up data after tests

* address comment
  • Loading branch information
JaySon-Huang authored and zanmato1984 committed Nov 1, 2019
1 parent 271d3b3 commit e6a12be
Show file tree
Hide file tree
Showing 50 changed files with 2,610 additions and 357 deletions.
4 changes: 2 additions & 2 deletions dbms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -290,8 +290,8 @@ if (TEST_COVERAGE AND CMAKE_BUILD_TYPE STREQUAL "Debug")
)
SETUP_TARGET_FOR_COVERAGE_LCOV(
NAME tiflash_lcov_coverage
DEPENDENCIES unit_tests_dbms
EXECUTABLE unit_tests_dbms
DEPENDENCIES gtests_dbms
EXECUTABLE gtests_dbms
)
set(COVERAGE_GCOVR_EXCLUDES
'contrib'
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Columns/ColumnVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class ColumnVector final : public COWPtrHelper<ColumnVectorHelper, ColumnVector<

private:
ColumnVector() {}
ColumnVector(const size_t n) : data(n) {}
explicit ColumnVector(const size_t n) : data(n) {}
ColumnVector(const size_t n, const value_type x) : data(n, x) {}
ColumnVector(const ColumnVector & src) : data(src.data.begin(), src.data.end()) {};

Expand Down
4 changes: 3 additions & 1 deletion dbms/src/Core/Block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ Block Block::cloneWithColumns(MutableColumns && columns) const

size_t num_columns = data.size();
for (size_t i = 0; i < num_columns; ++i)
res.insert({ std::move(columns[i]), data[i].type, data[i].name });
res.insert({ std::move(columns[i]), data[i].type, data[i].name, data[i].column_id });

return res;
}
Expand Down Expand Up @@ -410,6 +410,8 @@ static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, cons
return on_error("Block structure mismatch in " + context_description + " stream: different columns:\n"
+ lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE);

// TODO should we check column_id here?

if (actual.column->isColumnConst() && expected.column->isColumnConst())
{
Field actual_value = static_cast<const ColumnConst &>(*actual.column).getField();
Expand Down
2 changes: 2 additions & 0 deletions dbms/src/Core/ColumnWithTypeAndName.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ ColumnWithTypeAndName ColumnWithTypeAndName::cloneEmpty() const

res.name = name;
res.type = type;
res.column_id = column_id;
if (column)
res.column = column->cloneEmpty();

Expand All @@ -22,6 +23,7 @@ ColumnWithTypeAndName ColumnWithTypeAndName::cloneEmpty() const

bool ColumnWithTypeAndName::operator==(const ColumnWithTypeAndName & other) const
{
// TODO should we check column_id here?
return name == other.name
&& ((!type && !other.type) || (type && other.type && type->equals(*other.type)))
&& ((!column && !other.column) || (column && other.column && column->getName() == other.column->getName()));
Expand Down
8 changes: 5 additions & 3 deletions dbms/src/Core/ColumnWithTypeAndName.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#include <utility>

#pragma once

#include <Columns/IColumn.h>
Expand All @@ -24,9 +26,9 @@ struct ColumnWithTypeAndName
/// TODO Handle column_id properly after we support DDL.
Int64 column_id;

ColumnWithTypeAndName() {}
ColumnWithTypeAndName(const ColumnPtr & column_, const DataTypePtr & type_, const String & name_)
: column(column_), type(type_), name(name_) {}
ColumnWithTypeAndName(): ColumnWithTypeAndName(nullptr, nullptr, "") {}
ColumnWithTypeAndName(ColumnPtr column_, const DataTypePtr & type_, const String & name_, Int64 column_id_ = 0)
: column(std::move(column_)), type(type_), name(name_), column_id(column_id_) {}

/// Uses type->createColumn() to create column
ColumnWithTypeAndName(const DataTypePtr & type_, const String & name_)
Expand Down
2 changes: 2 additions & 0 deletions dbms/src/Core/NamesAndTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ using NamesAndTypes = std::vector<NameAndTypePair>;
class NamesAndTypesList : public std::list<NameAndTypePair>
{
public:
using Iterator = std::list<NameAndTypePair>::iterator;

NamesAndTypesList() {}

NamesAndTypesList(std::initializer_list<NameAndTypePair> init) : std::list<NameAndTypePair>(init) {}
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/DataTypes/DataTypeDecimal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ bool DataTypeDecimal<T>::equals(const IDataType & rhs) const
// make sure rhs has same underlying type with this type.
if (auto ptr = checkDecimal<T>(rhs))
{
return ptr->getScale() == scale;
return ptr->getScale() == scale && ptr->getPrec() == precision;
}
return false;
}
Expand Down
134 changes: 134 additions & 0 deletions dbms/src/DataTypes/isSupportedDataTypeCast.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#include <DataTypes/isSupportedDataTypeCast.h>
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypesNumber.h>

namespace DB
{

bool isSupportedDataTypeCast(const DataTypePtr &from, const DataTypePtr &to)
{
assert(from != nullptr && to != nullptr);
/// `to` is equal to `from`
if (to->equals(*from))
{
return true;
}

/// For Nullable, unwrap DataTypeNullable
{
bool has_nullable = false;
DataTypePtr from_not_null;
if (const DataTypeNullable * type_nullable = typeid_cast<const DataTypeNullable *>(from.get()))
{
has_nullable = true;
from_not_null = type_nullable->getNestedType();
}
else
{
from_not_null = from;
}

DataTypePtr to_not_null;
if (const DataTypeNullable * type_nullable = typeid_cast<const DataTypeNullable *>(to.get()))
{
has_nullable = true;
to_not_null = type_nullable->getNestedType();
}
else
{
to_not_null = to;
}

if (has_nullable)
return isSupportedDataTypeCast(from_not_null, to_not_null);
}

/// For numeric types (integer, floats)
if (from->isNumber() && to->isNumber())
{
/// int <-> float, or float32 <-> float64, is not supported
if (!from->isInteger() || !to->isInteger())
{
return false;
}
/// Change from signed to unsigned, or vice versa, is not supported
// use xor(^)
if ((from->isUnsignedInteger()) ^ (to->isUnsignedInteger()))
{
return false;
}

/// Both signed or unsigned, compare the sizeof(Type)
size_t from_sz = from->getSizeOfValueInMemory();
size_t to_sz = to->getSizeOfValueInMemory();
return from_sz <= to_sz;
}

/// For String / FixedString
if (from->isStringOrFixedString() && to->isStringOrFixedString())
{
size_t from_sz = std::numeric_limits<size_t>::max();
if (const DataTypeFixedString * type_fixed_str = typeid_cast<const DataTypeFixedString *>(from.get()))
from_sz = type_fixed_str->getN();
size_t to_sz = std::numeric_limits<size_t>::max();
if (const DataTypeFixedString * type_fixed_str = typeid_cast<const DataTypeFixedString *>(to.get()))
to_sz = type_fixed_str->getN();
return from_sz <= to_sz;
}

/// For Date and DateTime, not supported
if (from->isDateOrDateTime() || to->isDateOrDateTime())
{
return false;
}

{
bool from_is_decimal = IsDecimalDataType(from);
bool to_is_decimal = IsDecimalDataType(to);
if (from_is_decimal || to_is_decimal)
{
if (from_is_decimal && to_is_decimal)
{
// not support change Decimal to other type, neither other type to Decimal
return false;
}

return from->equals(*to);
}
}

// TODO enums, set?

/// some DataTypes that support in ClickHouse but not in TiDB

// Cast to Nothing / from Nothing is lossy
if (typeid_cast<const DataTypeNothing *>(from.get()) || typeid_cast<const DataTypeNothing *>(to.get()))
{
return true;
}

// Cast to Array / from Array is not supported
if (typeid_cast<const DataTypeArray *>(from.get()) || typeid_cast<const DataTypeArray *>(to.get()))
{
return false;
}

// Cast to Tuple / from Tuple is not supported
if (typeid_cast<const DataTypeTuple *>(from.get()) || typeid_cast<const DataTypeTuple *>(to.get()))
{
return false;
}

return false;
}

} // namespace DB
11 changes: 11 additions & 0 deletions dbms/src/DataTypes/isSupportedDataTypeCast.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#pragma once

#include <DataTypes/IDataType.h>

namespace DB
{

/// Is TiDB / TiFlash support casting DataType `from` to `to` in DDL
bool isSupportedDataTypeCast(const DataTypePtr &from, const DataTypePtr &to);

} // namespace DB
2 changes: 1 addition & 1 deletion dbms/src/DataTypes/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ target_link_libraries (data_types_number_fixed dbms)
add_executable (data_type_string data_type_string.cpp ${SRCS})
target_link_libraries (data_type_string dbms)

add_executable (data_type_get_common_type data_type_get_common_type.cpp ${SRCS})
add_executable (data_type_get_common_type gtest_data_type_get_common_type.cpp ${SRCS})
target_link_libraries (data_type_get_common_type dbms gtest_main)
Loading

0 comments on commit e6a12be

Please sign in to comment.