Skip to content

Commit

Permalink
[python] Fix enum regression in PR #3647
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenv committed Feb 4, 2025
1 parent ce0dc82 commit 557d143
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 13 deletions.
7 changes: 3 additions & 4 deletions apis/python/src/tiledbsoma/_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,10 +778,9 @@ def write(
write_options = TileDBWriteOptions.from_platform_config(platform_config)
sort_coords = write_options.sort_coords

for batch in values.to_batches():
mq = ManagedQuery(self)
mq._handle.set_array_data(batch)
mq._handle.submit_write(sort_coords or False)
mq = ManagedQuery(self)
mq._handle.set_array_data(values.combine_chunks().to_batches()[0])
mq._handle.submit_write(sort_coords or False)

if write_options.consolidate_and_vacuum:
self._handle._handle.consolidate_and_vacuum()
Expand Down
7 changes: 3 additions & 4 deletions apis/python/src/tiledbsoma/_point_cloud_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,10 +483,9 @@ def write(

clib_dataframe = self._handle._handle

for batch in values.to_batches():
mq = ManagedQuery(self, None)
mq._handle.set_array_data(batch)
mq._handle.submit_write(sort_coords or False)
mq = ManagedQuery(self, None)
mq._handle.set_array_data(values.combine_chunks().to_batches()[0])
mq._handle.submit_write(sort_coords or False)

if write_options.consolidate_and_vacuum:
clib_dataframe.consolidate_and_vacuum()
Expand Down
8 changes: 3 additions & 5 deletions apis/python/src/tiledbsoma/_sparse_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,11 +391,9 @@ def write(

if isinstance(values, pa.Table):
# Write bulk data
for batch in values.to_batches():
# clib_sparse_array.write(batch, sort_coords or False)
mq = ManagedQuery(self, None)
mq._handle.set_array_data(batch)
mq._handle.submit_write(sort_coords or False)
mq = ManagedQuery(self, None)
mq._handle.set_array_data(values.combine_chunks().to_batches()[0])
mq._handle.submit_write(sort_coords or False)

# Write bounding-box metadata
maxes = []
Expand Down
41 changes: 41 additions & 0 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2124,3 +2124,44 @@ def test_arrow_table_validity_with_slicing(tmp_path):
assert_array_equal(pdf["mybool"], table["mybool"])
assert_array_equal(pdf["mydatetime"], table["mydatetime"])
assert_array_equal(pdf["myenum"], table["myenum"])


def test_enum_regression_62887(tmp_path):
uri = tmp_path.as_posix()

schema = pa.schema(
[
pa.field("soma_joinid", pa.int64(), nullable=False),
pa.field("A", pa.dictionary(pa.int8(), pa.int8())),
]
)

tbl = pa.Table.from_pydict(
{
"soma_joinid": pa.chunked_array([[0, 1, 2, 3, 4, 5, 6, 7], [8, 9]]),
"A": pa.chunked_array(
[
pa.DictionaryArray.from_arrays(
indices=pa.array([0, 0, 0, 0, 0, 0, 0, 0], type=pa.int8()),
dictionary=pa.array(
[0, 1, 2, 3, 4, 5, 6, 7, 8], type=pa.int8()
),
),
pa.DictionaryArray.from_arrays(
indices=pa.array([0, 0], type=pa.int8()),
dictionary=pa.array(
[0, 1, 2, 3, 4, 5, 6, 7, 8], type=pa.int8()
),
),
]
),
}
)

with soma.DataFrame.create(
uri, schema=schema, index_column_names=["soma_joinid"], domain=[(0, 10000000)]
) as A:
A.write(tbl)

with soma.open(uri) as A:
assert_array_equal(A.read().concat()["A"], tbl["A"])

0 comments on commit 557d143

Please sign in to comment.