Skip to content

Commit

Permalink
[WIP] Add Consolidation and Vacuuming For Group Metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenv committed Sep 19, 2023
1 parent 3070f4b commit bc68541
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 72 deletions.
9 changes: 7 additions & 2 deletions tiledb/cc/group.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ void init_group(py::module &m) {
.def("_add", &Group::add_member, py::arg("uri"),
py::arg("relative") = false, py::arg("name") = std::nullopt)
.def("_remove", &Group::remove_member)
.def("_delete_group", &Group::delete_group)
.def("_member_count", &Group::member_count)
.def("_member",
static_cast<Object (Group::*)(uint64_t) const>(&Group::member))
Expand All @@ -136,8 +137,12 @@ void init_group(py::module &m) {
.def("_dump", &Group::dump)

/* static methods */
.def("_create", &Group::create)
.def("_delete_group", &Group::delete_group);
.def_static("_create", &Group::create)
.def_static("_consolidate_metadata", &Group::consolidate_metadata,
py::arg("ctx"), py::arg("uri"), py::arg("config") = std::shared_ptr<Config>(nullptr))
.def_static("_vacuum_metadata", &Group::vacuum_metadata,
py::arg("ctx"), py::arg("uri"), py::arg("config") = std::shared_ptr<Config>(nullptr));

}

} // namespace libtiledbcpp
16 changes: 16 additions & 0 deletions tiledb/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,3 +461,19 @@ def set_config(self, cfg: Config):
"Use `group.cl0se()` or Group(.., closed=True)"
)
self._set_config(cfg)

@staticmethod
def consolidate_metadata(
uri: str, config: Config = None, ctx: Optional[Ctx] = None
):
if ctx is None:
ctx = default_ctx()

lt.Group._consolidate_metadata(ctx, uri)

@staticmethod
def vacuum_metadata(uri: str, config: Config = None, ctx: Optional[Ctx] = None):
if ctx is None:
ctx = default_ctx()

lt.Group._vacuum_metadata(ctx, uri)
166 changes: 96 additions & 70 deletions tiledb/tests/test_group.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import pathlib

import numpy as np
import pytest
Expand Down Expand Up @@ -265,6 +266,82 @@ def test_group_named_members(self):
assert len(grp) == 0
grp.close()

def test_pass_context(self):
foo = self.path("foo")
bar = self.path("foo/bar")

tiledb.group_create(foo)
tiledb.group_create(bar)

ctx = tiledb.Ctx()
with tiledb.Group(foo, mode="w", ctx=ctx) as G:
G.add(bar, name="bar")

with tiledb.Group(foo, mode="r", ctx=ctx) as G:
assert "bar" in G

def test_relative(self):
group1 = self.path("group1")
group2_1 = self.path("group1/group2_1")
group2_2 = self.path("group1/group2_2")

tiledb.group_create(group2_1)
tiledb.group_create(group2_2)

with tiledb.Group(group1, mode="w") as G:
G.add(group2_1, name="group2_1", relative=False)
G.add("group2_2", name="group2_2", relative=True)

with tiledb.Group(group1, mode="r") as G:
assert G.is_relative("group2_1") is False
assert G.is_relative("group2_2") is True

def test_set_config(self):
group_uri = self.path("foo")
array_uri_1 = self.path("foo/a")
array_uri_2 = self.path("foo/b")

tiledb.group_create(group_uri)

dom = tiledb.Domain(tiledb.Dim("id", dtype="ascii"))
attr = tiledb.Attr("value", dtype=np.int64)
sch = tiledb.ArraySchema(domain=dom, attrs=(attr,), sparse=True)

tiledb.Array.create(array_uri_1, sch)
tiledb.Array.create(array_uri_2, sch)

cfg = tiledb.Config({"sm.group.timestamp_end": 2000})
with tiledb.Group(group_uri, "w", cfg) as G:
G.add(name="a", uri="a", relative=True)

cfg = tiledb.Config({"sm.group.timestamp_end": 3000})
with tiledb.Group(group_uri, "w", cfg) as G:
G.add(name="b", uri="b", relative=True)

ms = np.arange(1000, 4000, 1000, dtype=np.int64)

for sz, m in enumerate(ms):
cfg = tiledb.Config({"sm.group.timestamp_end": m})

G = tiledb.Group(group_uri)

# Cannot set config on open group
with self.assertRaises(ValueError):
G.set_config(cfg)

G.close()
G.set_config(cfg)

G.open()
assert len(G) == sz
G.close()

for sz, m in enumerate(ms):
cfg = tiledb.Config({"sm.group.timestamp_end": m})

with tiledb.Group(group_uri, config=cfg) as G:
assert len(G) == sz


class GroupMetadataTest(GroupTestCase):
@pytest.mark.parametrize(
Expand Down Expand Up @@ -489,78 +566,27 @@ def test_basic(self, test_vals):
self.assert_metadata_roundtrip(grp.meta, test_vals)
grp.close()

def test_pass_context(self):
foo = self.path("foo")
bar = self.path("foo/bar")

tiledb.group_create(foo)
tiledb.group_create(bar)

ctx = tiledb.Ctx()
with tiledb.Group(foo, mode="w", ctx=ctx) as G:
G.add(bar, name="bar")

with tiledb.Group(foo, mode="r", ctx=ctx) as G:
assert "bar" in G

def test_relative(self):
group1 = self.path("group1")
group2_1 = self.path("group1/group2_1")
group2_2 = self.path("group1/group2_2")

tiledb.group_create(group2_1)
tiledb.group_create(group2_2)

with tiledb.Group(group1, mode="w") as G:
G.add(group2_1, name="group2_1", relative=False)
G.add("group2_2", name="group2_2", relative=True)

with tiledb.Group(group1, mode="r") as G:
assert G.is_relative("group2_1") is False
assert G.is_relative("group2_2") is True

def test_set_config(self):
group_uri = self.path("foo")
array_uri_1 = self.path("foo/a")
array_uri_2 = self.path("foo/b")

tiledb.group_create(group_uri)

dom = tiledb.Domain(tiledb.Dim("id", dtype="ascii"))
attr = tiledb.Attr("value", dtype=np.int64)
sch = tiledb.ArraySchema(domain=dom, attrs=(attr,), sparse=True)

tiledb.Array.create(array_uri_1, sch)
tiledb.Array.create(array_uri_2, sch)

cfg = tiledb.Config({"sm.group.timestamp_end": 2000})
with tiledb.Group(group_uri, "w", cfg) as G:
G.add(name="a", uri="a", relative=True)

cfg = tiledb.Config({"sm.group.timestamp_end": 3000})
with tiledb.Group(group_uri, "w", cfg) as G:
G.add(name="b", uri="b", relative=True)

ms = np.arange(1000, 4000, 1000, dtype=np.int64)

for sz, m in enumerate(ms):
cfg = tiledb.Config({"sm.group.timestamp_end": m})

G = tiledb.Group(group_uri)
def test_consolidation_and_vac(self):
vfs = tiledb.VFS()
path = self.path("test_consolidation_and_vac")
tiledb.Group.create(path)

# Cannot set config on open group
with self.assertRaises(ValueError):
G.set_config(cfg)
cfg = tiledb.Config({"sm.group.timestamp_end": 1})
with tiledb.Group(path, "w", cfg) as grp:
grp.meta["meta"] = 1

G.close()
G.set_config(cfg)
cfg = tiledb.Config({"sm.group.timestamp_end": 2})
with tiledb.Group(path, "w", cfg) as grp:
grp.meta["meta"] = 2

G.open()
assert len(G) == sz
G.close()
cfg = tiledb.Config({"sm.group.timestamp_end": 3})
with tiledb.Group(path, "w", cfg) as grp:
grp.meta["meta"] = 3

for sz, m in enumerate(ms):
cfg = tiledb.Config({"sm.group.timestamp_end": m})
meta_path = pathlib.Path(path) / "__meta"
assert len(vfs.ls(meta_path)) == 3

with tiledb.Group(group_uri, config=cfg) as G:
assert len(G) == sz
tiledb.Group.consolidate_metadata(path)
tiledb.Group.vacuum_metadata(path)

assert len(vfs.ls(meta_path)) == 1

0 comments on commit bc68541

Please sign in to comment.