diff --git a/tiledb/__init__.py b/tiledb/__init__.py index 2970270499..f21199f53a 100644 --- a/tiledb/__init__.py +++ b/tiledb/__init__.py @@ -83,6 +83,7 @@ remove, save, schema_like, + vacuum, walk, ) from .libtiledb import ( @@ -90,7 +91,6 @@ Ctx, DenseArrayImpl, SparseArrayImpl, - vacuum, ) from .multirange_indexing import EmptyRange from .object import Object diff --git a/tiledb/highlevel.py b/tiledb/highlevel.py index 9ce7d1a3ed..06a91e8acc 100644 --- a/tiledb/highlevel.py +++ b/tiledb/highlevel.py @@ -269,6 +269,70 @@ def consolidate(uri, config=None, ctx=None, fragment_uris=None, timestamp=None): return arr.consolidate(ctx, config) +def vacuum(uri, config=None, ctx=None, timestamp=None): + """ + Vacuum underlying array fragments after consolidation. + + :param str uri: URI of array to be vacuumed + :param config: Override the context configuration for vacuuming. + Defaults to None, inheriting the context parameters. + :param (ctx: tiledb.Ctx, optional): Context. Defaults to + `tiledb.default_ctx()`. + :raises TypeError: cannot convert `uri` to unicode string + :raises: :py:exc:`tiledb.TileDBError` + + This operation of this function is controlled by + the `"sm.vacuum.mode"` parameter, which accepts the values ``fragments``, + ``fragment_meta``, and ``array_meta``. Rather than passing the timestamp + into this function, it may be set by using `"sm.vacuum.timestamp_start"`and + `"sm.vacuum.timestamp_end"` which takes in a time in UNIX seconds. If both + are set then this function's `timestamp` argument will be used. + + **Example:** + + >>> import tiledb, numpy as np + >>> import tempfile + >>> path = tempfile.mkdtemp() + >>> with tiledb.from_numpy(path, np.random.rand(4)) as A: + ... pass # make sure to close + >>> with tiledb.open(path, 'w') as A: + ... for i in range(4): + ... A[:] = np.ones(4, dtype=np.int64) * i + >>> paths = tiledb.VFS().ls(path) + >>> # should be 12 (2 base files + 2*5 fragment+ok files) + >>> (); len(paths); () # doctest:+ELLIPSIS + (...) + >>> () ; tiledb.consolidate(path) ; () # doctest:+ELLIPSIS + (...) + >>> tiledb.vacuum(path) + >>> paths = tiledb.VFS().ls(path) + >>> # should now be 4 ( base files + 2 fragment+ok files) + >>> (); len(paths); () # doctest:+ELLIPSIS + (...) + + """ + ctx = _get_ctx(ctx) + if config is None: + config = tiledb.Config() + + if timestamp is not None: + warnings.warn( + "Partial vacuuming via timestamp will be deprecrated in " + "a future release and replaced by passing in fragment URIs.", + DeprecationWarning, + ) + + if not isinstance(timestamp, tuple) and len(timestamp) != 2: + raise TypeError("'timestamp' argument expects tuple(start: int, end: int)") + + if timestamp[0] is not None: + config["sm.vacuum.timestamp_start"] = timestamp[0] + if timestamp[1] is not None: + config["sm.vacuum.timestamp_end"] = timestamp[1] + + lt.Array.vacuum(ctx, uri, config) + + def schema_like(*args, shape=None, dtype=None, ctx=None, **kwargs): """ Return an ArraySchema corresponding to a NumPy-like object or diff --git a/tiledb/libtiledb.pyx b/tiledb/libtiledb.pyx index 27d3625922..ded5fc4307 100644 --- a/tiledb/libtiledb.pyx +++ b/tiledb/libtiledb.pyx @@ -3156,76 +3156,3 @@ cdef class SparseArrayImpl(Array): dim_values[dim] = tuple(np.unique(query[dim])) return dim_values - - -def vacuum(uri, config=None, ctx=None, timestamp=None): - """ - Vacuum underlying array fragments after consolidation. - - :param str uri: URI of array to be vacuumed - :param config: Override the context configuration for vacuuming. - Defaults to None, inheriting the context parameters. - :param (ctx: tiledb.Ctx, optional): Context. Defaults to - `tiledb.default_ctx()`. - :raises TypeError: cannot convert `uri` to unicode string - :raises: :py:exc:`tiledb.TileDBError` - - This operation of this function is controlled by - the `"sm.vacuum.mode"` parameter, which accepts the values ``fragments``, - ``fragment_meta``, and ``array_meta``. Rather than passing the timestamp - into this function, it may be set by using `"sm.vacuum.timestamp_start"`and - `"sm.vacuum.timestamp_end"` which takes in a time in UNIX seconds. If both - are set then this function's `timestamp` argument will be used. - - **Example:** - - >>> import tiledb, numpy as np - >>> import tempfile - >>> path = tempfile.mkdtemp() - >>> with tiledb.from_numpy(path, np.random.rand(4)) as A: - ... pass # make sure to close - >>> with tiledb.open(path, 'w') as A: - ... for i in range(4): - ... A[:] = np.ones(4, dtype=np.int64) * i - >>> paths = tiledb.VFS().ls(path) - >>> # should be 12 (2 base files + 2*5 fragment+ok files) - >>> (); len(paths); () # doctest:+ELLIPSIS - (...) - >>> () ; tiledb.consolidate(path) ; () # doctest:+ELLIPSIS - (...) - >>> tiledb.vacuum(path) - >>> paths = tiledb.VFS().ls(path) - >>> # should now be 4 ( base files + 2 fragment+ok files) - >>> (); len(paths); () # doctest:+ELLIPSIS - (...) - - """ - cdef tiledb_ctx_t* ctx_ptr = NULL - cdef tiledb_config_t* config_ptr = NULL - - if not ctx: - ctx = default_ctx() - - if timestamp is not None: - warnings.warn("Partial vacuuming via timestamp will be deprecrated in " - "a future release and replaced by passing in fragment URIs.", - DeprecationWarning) - - if config is None: - config = Config() - - if not isinstance(timestamp, tuple) and len(timestamp) != 2: - raise TypeError("'timestamp' argument expects tuple(start: int, end: int)") - - if timestamp[0] is not None: - config["sm.vacuum.timestamp_start"] = timestamp[0] - if timestamp[1] is not None: - config["sm.vacuum.timestamp_end"] = timestamp[1] - - ctx_ptr = safe_ctx_ptr(ctx) - config_ptr = PyCapsule_GetPointer( - config.__capsule__(), "config") if config is not None else NULL - cdef bytes buri = unicode_path(uri) - cdef const char* uri_ptr = PyBytes_AS_STRING(buri) - - check_error(ctx, tiledb_array_vacuum(ctx_ptr, uri_ptr, config_ptr))