From 92fb0e254d499e467f0e61f7998a8747d23f2907 Mon Sep 17 00:00:00 2001 From: Agisilaos Kounelis <36283973+kounelisagis@users.noreply.github.com> Date: Mon, 27 Jan 2025 19:56:55 +0700 Subject: [PATCH] Add support for `BLOB` attribute type override in `from_pandas` (#2143) --- tiledb/dataframe_.py | 3 +++ tiledb/tests/test_pandas_dataframe.py | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tiledb/dataframe_.py b/tiledb/dataframe_.py index d030c4d8f0..e3c3a0f9d9 100644 --- a/tiledb/dataframe_.py +++ b/tiledb/dataframe_.py @@ -203,6 +203,9 @@ def from_dtype(cls, dtype, column_name, varlen_types=()): if isinstance(dtype, str) and dtype == "ascii": return cls("ascii", var=True) + if isinstance(dtype, str) and dtype == "blob": + return cls(np.bytes_, var=True) + dtype = pd_types.pandas_dtype(dtype) # Note: be careful if you rearrange the order of the following checks diff --git a/tiledb/tests/test_pandas_dataframe.py b/tiledb/tests/test_pandas_dataframe.py index f7374252c0..0b390b4805 100644 --- a/tiledb/tests/test_pandas_dataframe.py +++ b/tiledb/tests/test_pandas_dataframe.py @@ -271,15 +271,17 @@ def test_apply_dtype_index_ingest(self): { "a": np.random.random_sample(20), "b": [str(uuid.uuid4()) for _ in range(20)], + "c": [np.random.bytes(10) for _ in range(20)], } ), sparse=True, - index_dims=["a", "b"], - column_types={"a": np.float32, "b": np.bytes_}, + index_dims=["a", "b", "c"], + column_types={"a": np.float32, "b": np.bytes_, "c": "blob"}, ) with tiledb.open(uri) as A: assert A.schema.domain.dim(0).dtype == np.float32 assert A.schema.domain.dim(1).dtype == np.bytes_ + assert A.schema.domain.dim(2).dtype == np.bytes_ def test_apply_dtype_index_schema_only(self): uri = self.path("index_dtype_casted_dtype")