Skip to content

Commit

Permalink
Merge Metadata and GroupMetadata into one Python class (#2099)
Browse files Browse the repository at this point in the history
* Delete Cython
* Add new pybind11 Adapter class
* Move Python class + fixes
* Fix tests
  • Loading branch information
kounelisagis committed Dec 20, 2024
1 parent 69cdf4c commit 66c401d
Show file tree
Hide file tree
Showing 13 changed files with 488 additions and 894 deletions.
12 changes: 10 additions & 2 deletions tiledb/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .ctx import Config, Ctx, default_ctx
from .domain_indexer import DomainIndexer
from .enumeration import Enumeration
from .libtiledb import Metadata
from .metadata import Metadata

# Integer types supported by Python / System
_inttypes = (int, np.integer)
Expand Down Expand Up @@ -340,7 +340,7 @@ def __init__(
self.__buffers = None

self.last_fragment_info = dict()
self.meta = Metadata(self)
self._meta = Metadata(self.array)

def __capsule__(self):
return self.array.__capsule__()
Expand Down Expand Up @@ -533,6 +533,14 @@ def timestamp_range(self):

return (timestamp_start, timestamp_end)

@property
def meta(self) -> Metadata:
"""
:return: The Array's metadata as a key-value structure
:rtype: Metadata
"""
return self._meta

def subarray(self, selection, attrs=None, coords=False, order=None):
raise NotImplementedError()

Expand Down
1 change: 1 addition & 0 deletions tiledb/cc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pybind11_add_module(
filestore.cc
filter.cc
group.cc
metadata.h
object.cc
query.cc
schema.cc
Expand Down
56 changes: 26 additions & 30 deletions tiledb/cc/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include <tiledb/tiledb> // C++
#include <tiledb/tiledb_experimental>

#include "common.h"
#include "metadata.h"

#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
Expand Down Expand Up @@ -235,32 +235,24 @@ void init_array(py::module &m) {
py::overload_cast<const Context &, const std::string &,
Config *const>(&Array::consolidate_metadata))
.def("_put_metadata",
[](Array &self, std::string &key, tiledb_datatype_t tdb_type,
const py::buffer &b) {
py::buffer_info info = b.request();

size_t size = 1;
for (auto s : info.shape) {
size *= s;
}
self.put_metadata(key, tdb_type, size, info.ptr);
[](Array &array, const std::string &key, py::array value) {
MetadataAdapter<Array> a;
a.put_metadata_numpy(array, key, value);
})
.def("_get_metadata",
[](Array &self, std::string &key) -> py::buffer {
tiledb_datatype_t tdb_type;
uint32_t value_num = 0;
const void *data_ptr = nullptr;

self.get_metadata(key, &tdb_type, &value_num, &data_ptr);

if (data_ptr == nullptr && value_num != 1) {
throw py::key_error();
}

assert(data_ptr != nullptr);
return py::memoryview::from_memory(
data_ptr, value_num * tiledb_datatype_size(tdb_type));
.def("_put_metadata",
[](Array &array, const std::string &key,
tiledb_datatype_t value_type, uint32_t value_num,
py::buffer value) {
MetadataAdapter<Array> a;
a.put_metadata(array, key, value_type, value_num, value);
})
.def(
"_get_metadata",
[](Array &array, const std::string &key, bool is_ndarray) {
MetadataAdapter<Array> a;
return a.get_metadata(array, key, is_ndarray);
},
py::arg("key"), py::arg("is_ndarray") = false)
.def("_get_metadata_from_index",
[](Array &self, uint64_t index) -> py::tuple {
tiledb_datatype_t tdb_type;
Expand All @@ -282,14 +274,18 @@ void init_array(py::module &m) {

return py::make_tuple(tdb_type, buf);
})
.def("_get_key_from_index",
[](Array &array, uint64_t index) {
MetadataAdapter<Array> a;
return a.get_key_from_index(array, index);
})
.def("_delete_metadata", &Array::delete_metadata)
.def("_has_metadata",
[](Array &self, std::string &key) -> py::tuple {
tiledb_datatype_t has_type;
bool has_it = self.has_metadata(key, &has_type);
return py::make_tuple(has_it, has_type);
[](Array &array, const std::string &key) {
MetadataAdapter<Array> a;
return a.has_metadata(array, key);
})
.def("metadata_num", &Array::metadata_num)
.def("_metadata_num", &Array::metadata_num)
.def("_delete_array",
py::overload_cast<const Context &, const std::string &>(
&Array::delete_array))
Expand Down
196 changes: 33 additions & 163 deletions tiledb/cc/group.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,169 +6,14 @@
#include <pybind11/pytypes.h>
#include <pybind11/stl.h>

#include "common.h"
#include "metadata.h"

namespace libtiledbcpp {

using namespace tiledb;
using namespace tiledbpy::common;
namespace py = pybind11;

void put_metadata_numpy(Group &group, const std::string &key, py::array value) {
tiledb_datatype_t value_type;
try {
value_type = np_to_tdb_dtype(value.dtype());
} catch (const TileDBPyError &e) {
throw py::type_error(e.what());
}

if (value.ndim() != 1)
throw py::type_error("Only 1D Numpy arrays can be stored as metadata");

py::size_t ncells = get_ncells(value.dtype());
// we can't store multi-cell arrays as metadata
// e.g. an array of strings containing strings of more than one character
if (ncells != 1 && value.size() > 1)
throw py::type_error("Unsupported dtype '" +
std::string(py::str(value.dtype())) +
"' for metadata");

auto value_num = is_tdb_str(value_type) ? value.nbytes() : value.size();
group.put_metadata(key, value_type, value_num,
value_num > 0 ? value.data() : nullptr);
}

void put_metadata(Group &group, const std::string &key,
tiledb_datatype_t value_type, uint32_t value_num,
py::buffer &value) {

py::buffer_info info = value.request();
group.put_metadata(key, value_type, value_num, info.ptr);
}

bool has_metadata(Group &group, const std::string &key) {
tiledb_datatype_t _unused_value_type;
return group.has_metadata(key, &_unused_value_type);
}

std::string get_key_from_index(Group &group, uint64_t index) {
std::string key;
tiledb_datatype_t tdb_type;
uint32_t value_num;
const void *value;

group.get_metadata_from_index(index, &key, &tdb_type, &value_num, &value);

return key;
}

py::object unpack_metadata_val(tiledb_datatype_t value_type, uint32_t value_num,
const char *value_ptr) {
if (value_num == 0)
throw TileDBError("internal error: unexpected value_num==0");

if (value_type == TILEDB_STRING_UTF8) {
return value_ptr == nullptr ? py::str() : py::str(value_ptr, value_num);
}

if (value_type == TILEDB_BLOB || value_type == TILEDB_CHAR ||
value_type == TILEDB_STRING_ASCII) {
return value_ptr == nullptr ? py::bytes() : py::bytes(value_ptr, value_num);
}

if (value_ptr == nullptr)
return py::tuple();

py::tuple unpacked(value_num);
for (uint32_t i = 0; i < value_num; i++) {
switch (value_type) {
case TILEDB_INT64:
unpacked[i] = *((int64_t *)value_ptr);
break;
case TILEDB_FLOAT64:
unpacked[i] = *((double *)value_ptr);
break;
case TILEDB_FLOAT32:
unpacked[i] = *((float *)value_ptr);
break;
case TILEDB_INT32:
unpacked[i] = *((int32_t *)value_ptr);
break;
case TILEDB_UINT32:
unpacked[i] = *((uint32_t *)value_ptr);
break;
case TILEDB_UINT64:
unpacked[i] = *((uint64_t *)value_ptr);
break;
case TILEDB_INT8:
unpacked[i] = *((int8_t *)value_ptr);
break;
case TILEDB_UINT8:
unpacked[i] = *((uint8_t *)value_ptr);
break;
case TILEDB_INT16:
unpacked[i] = *((int16_t *)value_ptr);
break;
case TILEDB_UINT16:
unpacked[i] = *((uint16_t *)value_ptr);
break;
default:
throw TileDBError("TileDB datatype not supported");
}
value_ptr += tiledb_datatype_size(value_type);
}

if (value_num > 1)
return unpacked;

// for single values, return the value directly
return unpacked[0];
}

py::array unpack_metadata_ndarray(tiledb_datatype_t value_type,
uint32_t value_num, const char *value_ptr) {
py::dtype dtype = tdb_to_np_dtype(value_type, 1);

if (value_ptr == nullptr) {
auto np = py::module::import("numpy");
return np.attr("empty")(py::make_tuple(0), dtype);
}

// special case for TILEDB_STRING_UTF8: TileDB assumes size=1
if (value_type != TILEDB_STRING_UTF8) {
value_num *= tiledb_datatype_size(value_type);
}

auto buf = py::memoryview::from_memory(value_ptr, value_num);

auto np = py::module::import("numpy");
return np.attr("frombuffer")(buf, dtype);
}

py::tuple get_metadata(Group &group, const py::str &key, bool is_ndarray) {
tiledb_datatype_t tdb_type;
uint32_t value_num;
const char *value_ptr;

group.get_metadata(key, &tdb_type, &value_num, (const void **)&value_ptr);
if (is_ndarray) {
auto arr = unpack_metadata_ndarray(tdb_type, value_num, value_ptr);
return py::make_tuple(arr, tdb_type);
} else {
auto arr = unpack_metadata_val(tdb_type, value_num, value_ptr);
return py::make_tuple(arr, tdb_type);
}
}

bool has_member(Group &group, std::string obj) {
try {
group.member(obj);
} catch (const TileDBError &e) {
return false;
}
return true;
}

void init_group(py::module &m) {
py::class_<Group>(m, "Group")
.def(
Expand All @@ -186,14 +31,35 @@ void init_group(py::module &m) {
.def_property_readonly("_uri", &Group::uri)
.def_property_readonly("_query_type", &Group::query_type)

.def("_put_metadata", put_metadata_numpy)
.def("_put_metadata", put_metadata)

.def("_put_metadata",
[](Group &group, const std::string &key, py::array value) {
MetadataAdapter<Group> a;
a.put_metadata_numpy(group, key, value);
})
.def("_put_metadata",
[](Group &group, const std::string &key,
tiledb_datatype_t value_type, uint32_t value_num,
py::buffer value) {
MetadataAdapter<Group> a;
a.put_metadata(group, key, value_type, value_num, value);
})
.def("_delete_metadata", &Group::delete_metadata)
.def("_has_metadata", has_metadata)
.def("_has_metadata",
[](Group &group, const std::string &key) {
MetadataAdapter<Group> a;
return a.has_metadata(group, key);
})
.def("_metadata_num", &Group::metadata_num)
.def("_get_metadata", get_metadata)
.def("_get_key_from_index", get_key_from_index)
.def("_get_metadata",
[](Group &group, const std::string &key, bool is_ndarray) {
MetadataAdapter<Group> a;
return a.get_metadata(group, key, is_ndarray);
})
.def("_get_key_from_index",
[](Group &group, uint64_t index) {
MetadataAdapter<Group> a;
return a.get_key_from_index(group, index);
})

.def("_add", &Group::add_member, py::arg("uri"),
py::arg("relative") = false, py::arg("name") = std::nullopt
Expand All @@ -209,7 +75,11 @@ void init_group(py::module &m) {
static_cast<Object (Group::*)(uint64_t) const>(&Group::member))
.def("_member",
static_cast<Object (Group::*)(std::string) const>(&Group::member))
.def("_has_member", has_member)
.def("_has_member",
[](Group &group, std::string obj) {
MetadataAdapter<Group> a;
return a.has_member(group, obj);
})
.def("_is_relative", &Group::is_relative)
.def("_dump", &Group::dump)

Expand Down
Loading

0 comments on commit 66c401d

Please sign in to comment.