Skip to content

Commit

Permalink
Validate Dimension Datatype upon Dimension construction
Browse files Browse the repository at this point in the history
Now that Dimension is C.41 compliant, the constructor must take on the responsibility of validation. A new API has been added that validates the incoming Datatype for Dimensions, disallowing certain types.

---

SC-17968

---
TYPE: IMPROVEMENT
DESC: Validate Dimension datatype
  • Loading branch information
ihnorton committed Aug 2, 2022
1 parent d48f874 commit ed40669
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 1 deletion.
52 changes: 52 additions & 0 deletions test/src/unit-capi-array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2168,3 +2168,55 @@ TEST_CASE_METHOD(
remove_temp_dir(local_fs.file_prefix() + local_fs.temp_dir());
#endif
}

TEST_CASE_METHOD(
ArrayFx, "Test dimension datatypes", "[array][dimension][datatypes]") {
SupportedFsLocal local_fs;
std::string array_name =
local_fs.file_prefix() + local_fs.temp_dir() + "array_dim_types";
create_temp_dir(local_fs.file_prefix() + local_fs.temp_dir());

uint64_t dim_domain[] = {1, 10, 1, 10};
uint64_t tile_extent = 2;
tiledb_dimension_t* dim;

SECTION("- valid and supported Datatypes") {
std::vector<tiledb_datatype_t> valid_supported_types = {TILEDB_UINT64,
TILEDB_INT64};

for (auto dim_type : valid_supported_types) {
int rc = tiledb_dimension_alloc(
ctx_, "dim", dim_type, dim_domain, &tile_extent, &dim);
REQUIRE(rc == TILEDB_OK);
}
}

SECTION("- valid and unsupported Datatypes") {
std::vector<tiledb_datatype_t> valid_unsupported_types = {TILEDB_CHAR,
TILEDB_BOOL};

for (auto dim_type : valid_unsupported_types) {
int rc = tiledb_dimension_alloc(
ctx_, "dim", dim_type, dim_domain, &tile_extent, &dim);
REQUIRE(rc == TILEDB_ERR);
}
}

SECTION("- invalid Datatypes") {
std::vector<std::underlying_type_t<tiledb_datatype_t>> invalid_datatypes = {
42, 100};

for (auto dim_type : invalid_datatypes) {
int rc = tiledb_dimension_alloc(
ctx_,
"dim",
tiledb_datatype_t(dim_type),
dim_domain,
&tile_extent,
&dim);
REQUIRE(rc == TILEDB_ERR);
}
}

tiledb_dimension_free(&dim);
}
32 changes: 32 additions & 0 deletions tiledb/sm/array_schema/dimension.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ namespace sm {
Dimension::Dimension(const std::string& name, Datatype type)
: name_(name)
, type_(type) {
ensure_datatype_is_supported(type_);
cell_val_num_ = (datatype_is_string(type)) ? constants::var_num : 1;
set_ceil_to_tile_func();
set_coincides_with_tiles_func();
Expand Down Expand Up @@ -95,6 +96,7 @@ Dimension::Dimension(
, name_(name)
, tile_extent_(tile_extent)
, type_(type) {
ensure_datatype_is_supported(type_);
set_ceil_to_tile_func();
set_coincides_with_tiles_func();
set_compute_mbr_func();
Expand Down Expand Up @@ -127,6 +129,9 @@ Dimension::Dimension(const Dimension* dim) {
tile_extent_ = dim->tile_extent();
type_ = dim->type_;

// Validate type
ensure_datatype_is_supported(type_);

// Set fuctions
ceil_to_tile_func_ = dim->ceil_to_tile_func_;
coincides_with_tiles_func_ = dim->coincides_with_tiles_func_;
Expand Down Expand Up @@ -1842,6 +1847,33 @@ std::string Dimension::domain_str() const {
return "";
}

void Dimension::ensure_datatype_is_supported(Datatype type) const {
try {
ensure_datatype_is_valid(type);
} catch (...) {
std::throw_with_nested(
std::logic_error("[Dimension::ensure_datatype_is_supported] "));
return;
}

switch (type) {
case Datatype::CHAR:
case Datatype::BLOB:
case Datatype::BOOL:
case Datatype::STRING_UTF8:
case Datatype::STRING_UTF16:
case Datatype::STRING_UTF32:
case Datatype::STRING_UCS2:
case Datatype::STRING_UCS4:
case Datatype::ANY:
throw std::logic_error(
"Datatype::" + datatype_str(type) +
" is not a valid Dimension Datatype");
default:
return;
}
}

std::string Dimension::tile_extent_str() const {
std::stringstream ss;

Expand Down
12 changes: 11 additions & 1 deletion tiledb/sm/array_schema/dimension.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,14 @@ class FilterPipeline;
enum class Compressor : uint8_t;
enum class Datatype : uint8_t;

/** Manipulates a TileDB dimension. */
/** Manipulates a TileDB dimension.
*
* Note: as laid out in the Storage Format,
* the following Datatypes are not valid for Dimension:
* TILEDB_CHAR, TILEDB_BLOB, TILEDB_BOOL, TILEDB_STRING_UTF8,
* TILEDB_STRING_UTF16, TILEDB_STRING_UTF32, TILEDB_STRING_UCS2,
* TILEDB_STRING_UCS4, TILEDB_ANY
*/
class Dimension {
public:
/* ********************************* */
Expand Down Expand Up @@ -1000,6 +1007,9 @@ class Dimension {
/** Returns the domain in string format. */
std::string domain_str() const;

/** Throws error if the input type is not a supported Dimension Datatype. */
void ensure_datatype_is_supported(Datatype type) const;

/** Returns the tile extent in string format. */
std::string tile_extent_str() const;

Expand Down
69 changes: 69 additions & 0 deletions tiledb/sm/array_schema/test/unit_dimension.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,75 @@ TEST_CASE("Dimension: Test deserialize,string", "[dimension][deserialize]") {
CHECK(dim.value()->var_size() == true);
}

TEST_CASE("Dimension: Test datatypes", "[dimension][datatypes]") {
std::string dim_name = "dim";

SECTION("- valid and supported Datatypes") {
std::vector<Datatype> valid_supported_datatypes = {
Datatype::INT32, Datatype::INT64,
Datatype::FLOAT32, Datatype::FLOAT64,
Datatype::INT8, Datatype::UINT8,
Datatype::INT16, Datatype::UINT16,
Datatype::UINT32, Datatype::UINT64,
Datatype::STRING_ASCII, Datatype::DATETIME_YEAR,
Datatype::DATETIME_MONTH, Datatype::DATETIME_WEEK,
Datatype::DATETIME_DAY, Datatype::DATETIME_HR,
Datatype::DATETIME_MIN, Datatype::DATETIME_SEC,
Datatype::DATETIME_MS, Datatype::DATETIME_US,
Datatype::DATETIME_NS, Datatype::DATETIME_PS,
Datatype::DATETIME_FS, Datatype::DATETIME_AS,
Datatype::TIME_HR, Datatype::TIME_MIN,
Datatype::TIME_SEC, Datatype::TIME_MS,
Datatype::TIME_US, Datatype::TIME_NS,
Datatype::TIME_PS, Datatype::TIME_FS,
Datatype::TIME_AS};

for (Datatype type : valid_supported_datatypes) {
try {
Dimension dim{dim_name, type};
} catch (...) {
throw std::logic_error("Uncaught exception in Dimension constructor");
}
}
}

SECTION("- valid and unsupported Datatypes") {
std::vector<Datatype> valid_unsupported_datatypes = {Datatype::CHAR,
Datatype::BLOB,
Datatype::BOOL,
Datatype::STRING_UTF8,
Datatype::STRING_UTF16,
Datatype::STRING_UTF32,
Datatype::STRING_UCS2,
Datatype::STRING_UCS4,
Datatype::ANY};

for (Datatype type : valid_unsupported_datatypes) {
try {
Dimension dim{dim_name, type};
} catch (std::exception& e) {
CHECK(
e.what() == "Datatype::" + datatype_str(type) +
" is not a valid Dimension Datatype");
}
}
}

SECTION("- invalid Datatypes") {
std::vector<std::underlying_type_t<Datatype>> invalid_datatypes = {42, 100};

for (auto type : invalid_datatypes) {
try {
Dimension dim{dim_name, Datatype(type)};
} catch (std::exception& e) {
CHECK(
std::string(e.what()) ==
"[Dimension::ensure_datatype_is_supported] ");
}
}
}
}

typedef tuple<
int8_t,
int16_t,
Expand Down
1 change: 1 addition & 0 deletions tiledb/sm/c_api/tiledb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1370,6 +1370,7 @@ int32_t tiledb_dimension_alloc(
// Create a new Dimension object
(*dim)->dim_ = new (std::nothrow)
tiledb::sm::Dimension(name, static_cast<tiledb::sm::Datatype>(type));

if ((*dim)->dim_ == nullptr) {
delete *dim;
*dim = nullptr;
Expand Down
6 changes: 6 additions & 0 deletions tiledb/sm/c_api/tiledb.h
Original file line number Diff line number Diff line change
Expand Up @@ -2604,6 +2604,12 @@ TILEDB_EXPORT int32_t tiledb_domain_dump(
* ctx, "dim_0", TILEDB_INT64, dim_domain, &tile_extent, &dim);
* @endcode
*
* Note: as laid out in the Storage Format,
* the following Datatypes are not valid for Dimension:
* TILEDB_CHAR, TILEDB_BLOB, TILEDB_BOOL, TILEDB_STRING_UTF8,
* TILEDB_STRING_UTF16, TILEDB_STRING_UTF32, TILEDB_STRING_UCS2,
* TILEDB_STRING_UCS4, TILEDB_ANY
*
* @param ctx The TileDB context.
* @param name The dimension name.
* @param type The dimension type.
Expand Down
6 changes: 6 additions & 0 deletions tiledb/sm/cpp_api/dimension.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ namespace tiledb {
* // Create a dimension with inclusive domain [0,1000] and tile extent 100.
* domain.add_dimension(Dimension::create<int32_t>(ctx, "d", {{0, 1000}}, 100));
* @endcode
*
* Note: as laid out in the Storage Format,
* the following Datatypes are not valid for Dimension:
* TILEDB_CHAR, TILEDB_BLOB, TILEDB_BOOL, TILEDB_STRING_UTF8,
* TILEDB_STRING_UTF16, TILEDB_STRING_UTF32, TILEDB_STRING_UCS2,
* TILEDB_STRING_UCS4, TILEDB_ANY
**/
class Dimension {
public:
Expand Down
14 changes: 14 additions & 0 deletions tiledb/storage_format/DIRECTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,17 @@ Here's a partial list of concerns that are out of scope:

* **Storage access.** Nothing in this directory has direct access to an object store or a file system. A user of these classes must provide the results of a storage operation to this code for parsing and interpretation.
* **State maintenance.** This code has the notion of an "array in a directory", but it does not provide any notion of "the current state" of an array in storage. The concept here is that of an array a data type, not as a variable of that data type.

## Useful Information
The following is the list of Datatypes that are **not** supported by the Dimension class.
Please note that this information is repeated in the C API, C++ API, and the Dimension class.

* TILEDB_CHAR
* TILEDB_BLOB
* TILEDB_BOOL
* TILEDB_STRING_UTF8
* TILEDB_STRING_UTF16
* TILEDB_STRING_UTF32
* TILEDB_STRING_UCS2
* TILEDB_STRING_UCS4
* TILEDB_ANY

0 comments on commit ed40669

Please sign in to comment.