Skip to content

Commit

Permalink
Merge pull request #4307 from Xarthisius/h5_ewah
Browse files Browse the repository at this point in the history
ENH: Allow to store multiple bitmap indices in the ewah-sidecar
  • Loading branch information
neutrinoceros authored Jan 24, 2023
2 parents 6a65a07 + 89d635a commit e7878a1
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 113 deletions.
1 change: 0 additions & 1 deletion tests/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ answer_tests:
local_arepo_010: # PR 3386
- yt/frontends/arepo/tests/test_outputs.py:test_arepo_bullet
- yt/frontends/arepo/tests/test_outputs.py:test_arepo_tng59
- yt/frontends/arepo/tests/test_outputs.py:test_index_override
- yt/frontends/arepo/tests/test_outputs.py:test_arepo_cr

local_artio_004:
Expand Down
17 changes: 0 additions & 17 deletions yt/frontends/arepo/tests/test_outputs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os
import tempfile
from collections import OrderedDict

from yt.frontends.arepo.api import ArepoHDF5Dataset
Expand Down Expand Up @@ -79,21 +77,6 @@ def test_arepo_tng59_periodicity():
assert ds2.periodicity == (False, False, False)


@requires_module("h5py")
@requires_ds(tng59_h5)
def test_index_override():
# This tests that we can supply an index_filename, and that when we do, it
# doesn't get written if our bounding_box is overwritten.
tmpfd, tmpname = tempfile.mkstemp(suffix=".index6_4.ewah")
os.close(tmpfd)
ds = data_dir_load(
tng59_h5, kwargs={"index_filename": tmpname, "bounding_box": _tng59_bbox}
)
assert isinstance(ds, ArepoHDF5Dataset)
ds.index
assert len(open(tmpname).read()) == 0


@requires_module("h5py")
@requires_file(tng59_h5)
def test_nh_density():
Expand Down
1 change: 1 addition & 0 deletions yt/frontends/gadget/tests/test_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
iso_kwargs = dict(bounding_box=[[-3, 3], [-3, 3], [-3, 3]])


@requires_module("h5py")
def test_gadget_binary():
header_specs = ["default", "default+pad32", ["default", "pad32"]]
curdir = os.getcwd()
Expand Down
28 changes: 3 additions & 25 deletions yt/geometry/particle_geometry_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
import struct
import weakref
from glob import glob

import numpy as np

Expand Down Expand Up @@ -154,32 +153,11 @@ def _initialize_index(self):
else:
dont_cache = False

# If we have applied a bounding box then we can't cache the
# ParticleBitmap because it is domain dependent
if getattr(ds, "_domain_override", False):
dont_cache = True

if not hasattr(self.ds, "_file_hash"):
self.ds._file_hash = self._generate_hash()

# Load Morton index from file if provided
def _current_fname(order1, order2):
if getattr(ds, "index_filename", None) is None:
irange = f"[{order2}-{order2+2}]"
fn_prefix = f"{ds.parameter_filename}.index{order1}"
fn_glob = f"{fn_prefix}_{irange}.ewah"
fns = glob(fn_glob)
fname = f"{fn_prefix}_{order2}.ewah" if len(fns) == 0 else fns[-1]
else:
fname = ds.index_filename
lfname = os.path.split(fname)[-1]
new_order2 = int(lfname.split("_")[-1].split(".")[0])
return fname, new_order2

# We set order2 in case we loaded an index filename
# where the order2 is +2 the one which is assumed at
# first
fname, order2 = _current_fname(order1, order2)
fname = getattr(ds, "index_filename", None) or f"{ds.parameter_filename}.ewah"

self.regions = ParticleBitmap(
ds.domain_left_edge,
Expand All @@ -205,8 +183,8 @@ def _current_fname(order1, order2):
self._initialize_coarse_index()
self._initialize_refined_index()
# We now update fname since index_order2 may have changed
fname, _ = _current_fname(
self.regions.index_order1, self.regions.index_order2
fname = (
getattr(ds, "index_filename", None) or f"{ds.parameter_filename}.ewah"
)
wdir = os.path.dirname(fname)
if not dont_cache and os.access(wdir, os.W_OK):
Expand Down
130 changes: 61 additions & 69 deletions yt/geometry/particle_oct_container.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ Oct container tuned for Particles
"""


Expand All @@ -32,6 +31,7 @@ from cython cimport floating
from cython.operator cimport dereference, preincrement

from yt.geometry cimport oct_visitors
from yt.utilities.lib.fnv_hash cimport c_fnv_hash as fnv_hash
from yt.utilities.lib.fp_utils cimport *
from yt.utilities.lib.geometry_utils cimport (
bounded_morton,
Expand Down Expand Up @@ -59,7 +59,6 @@ from yt.funcs import get_pbar
from ..utilities.lib.ewah_bool_wrap cimport BoolArrayCollection

import os
import struct

# If set to 1, ghost cells are added at the refined level regardless of if the
# coarse cell containing it is refined in the selector.
Expand Down Expand Up @@ -431,6 +430,7 @@ cdef class ParticleBitmap:
cdef np.int32_t dims[3]
cdef np.int64_t file_hash
cdef np.uint64_t directional_max2[3]
cdef np.int64_t hash_value
cdef public np.uint64_t nfiles
cdef public np.int32_t index_order1
cdef public np.int32_t index_order2
Expand Down Expand Up @@ -484,6 +484,15 @@ cdef class ParticleBitmap:
self.particle_counts = np.zeros(1 << (index_order1 * 3), dtype="uint64")
self.bitmasks = FileBitmasks(self.nfiles)
self.collisions = BoolArrayCollection()
hash_data = bytearray()
hash_data.extend(self.file_hash.to_bytes(8, "little", signed=True))
hash_data.extend(np.array(self.left_edge).tobytes())
hash_data.extend(np.array(self.right_edge).tobytes())
hash_data.extend(np.array(self.periodicity).tobytes())
hash_data.extend(self.nfiles.to_bytes(2, "little", signed=False))
hash_data.extend(self.index_order1.to_bytes(2, "little", signed=True))
hash_data.extend(self.index_order2.to_bytes(2, "little", signed=True))
self.hash_value = fnv_hash(hash_data)

def _bitmask_logicaland(self, ifile, bcoll, out):
self.bitmasks._logicaland(ifile, bcoll, out)
Expand Down Expand Up @@ -985,47 +994,38 @@ cdef class ParticleBitmap:
nc, nm = self.bitmasks._find_collisions_refined(self.collisions,verbose)
return nc, nm

def calcsize_bitmasks(self):
# TODO: All cython
cdef bytes serial_BAC
cdef np.uint64_t ifile
cdef int out = 0
out += struct.calcsize('Q')
# Bitmaps for each file
for ifile in range(self.nfiles):
serial_BAC = self.bitmasks._dumps(ifile)
out += struct.calcsize('Q')
out += len(serial_BAC)
# Bitmap for collisions
serial_BAC = self.collisions._dumps()
out += struct.calcsize('Q')
out += len(serial_BAC)
return out

def get_bitmasks(self):
return self.bitmasks

def iseq_bitmask(self, solf):
return self.bitmasks._iseq(solf.get_bitmasks())

def save_bitmasks(self, fname):
import h5py
cdef bytes serial_BAC
cdef np.uint64_t ifile
f = open(fname,'wb')
# Header
f.write(struct.pack('Q', _bitmask_version))
f.write(struct.pack('q', self.file_hash))
f.write(struct.pack('Q', self.nfiles))
# Bitmap for each file
for ifile in range(self.nfiles):
serial_BAC = self.bitmasks._dumps(ifile)
f.write(struct.pack('Q', len(serial_BAC)))
f.write(serial_BAC)
# Collisions
serial_BAC = self.collisions._dumps()
f.write(struct.pack('Q', len(serial_BAC)))
f.write(serial_BAC)
f.close()
with h5py.File(fname, mode="a") as fp:
try:
grp = fp[str(self.hash_value)]
grp.clear()
except KeyError:
grp = fp.create_group(str(self.hash_value))

grp.attrs["bitmask_version"] = _bitmask_version
grp.attrs["nfiles"] = self.nfiles
# Add some attrs for convenience. They're not read back.
grp.attrs["file_hash"] = self.file_hash
grp.attrs["left_edge"] = self.left_edge
grp.attrs["right_edge"] = self.right_edge
grp.attrs["periodicity"] = self.periodicity
grp.attrs["index_order1"] = self.index_order1
grp.attrs["index_order2"] = self.index_order2

for ifile in range(self.nfiles):
serial_BAC = self.bitmasks._dumps(ifile)
grp.create_dataset(f"nfile_{ifile:05}", data=np.void(serial_BAC))
serial_BAC = self.collisions._dumps()
grp.create_dataset("collisions", data=np.void(serial_BAC))

def check_bitmasks(self):
return self.bitmasks._check()
Expand All @@ -1034,50 +1034,42 @@ cdef class ParticleBitmap:
self.bitmasks._reset()

def load_bitmasks(self, fname):
import h5py
cdef bint read_flag = 1
cdef bint irflag
cdef np.uint64_t ver
cdef np.uint64_t nfiles = 0
cdef np.int64_t file_hash
cdef np.uint64_t size_serial
cdef bint overwrite = 0
# Verify that file is correct version
if not os.path.isfile(fname):
raise OSError
f = open(fname,'rb')
ver, = struct.unpack('Q',f.read(struct.calcsize('Q')))
if ver == self.nfiles and ver != _bitmask_version:
overwrite = 1
nfiles = ver
ver = 0 # Original bitmaps had number of files first
if ver != _bitmask_version:
raise OSError("The file format of the index has changed since "
"this file was created. It will be replaced with an "
"updated version.")
# Read file hash
file_hash, = struct.unpack('q', f.read(struct.calcsize('q')))
if file_hash != self.file_hash:
raise OSError
# Read number of bitmaps
if nfiles == 0:
nfiles, = struct.unpack('Q', f.read(struct.calcsize('Q')))
if nfiles != self.nfiles:
raise OSError(
"Number of bitmasks ({}) conflicts with number of files "
"({})".format(nfiles, self.nfiles))
# Read bitmap for each file
pb = get_pbar("Loading particle index", nfiles)
for ifile in range(nfiles):
pb.update(ifile+1)
size_serial, = struct.unpack('Q', f.read(struct.calcsize('Q')))
irflag = self.bitmasks._loads(ifile, f.read(size_serial))
with h5py.File(fname, mode="r") as fp:
try:
grp = fp[str(self.hash_value)]
except KeyError:
raise OSError(f"Index not found in the {fname}")

ver = grp.attrs["bitmask_version"]
if ver == self.nfiles and ver != _bitmask_version:
overwrite = 1
ver = 0 # Original bitmaps had number of files first
if ver != _bitmask_version:
raise OSError("The file format of the index has changed since "
"this file was created. It will be replaced with an "
"updated version.")

# Read bitmap for each file
pb = get_pbar("Loading particle index", self.nfiles)
for ifile in range(self.nfiles):
pb.update(ifile+1)
irflag = self.bitmasks._loads(ifile, grp[f"nfile_{ifile:05}"][...].tobytes())
if irflag == 0:
read_flag = 0
pb.finish()
# Collisions
irflag = self.collisions._loads(grp["collisions"][...].tobytes())
if irflag == 0:
read_flag = 0
pb.finish()
# Collisions
size_serial, = struct.unpack('Q',f.read(struct.calcsize('Q')))
irflag = self.collisions._loads(f.read(size_serial))
f.close()

# Save in correct format
if overwrite == 1:
self.save_bitmasks(fname)
Expand Down
3 changes: 2 additions & 1 deletion yt/geometry/tests/test_particle_octree.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from yt.geometry.oct_container import _ORDER_MAX
from yt.geometry.particle_oct_container import ParticleBitmap, ParticleOctreeContainer
from yt.geometry.selection_routines import RegionSelector
from yt.testing import assert_array_equal, assert_equal, assert_true
from yt.testing import assert_array_equal, assert_equal, assert_true, requires_module
from yt.units.unit_registry import UnitRegistry
from yt.units.yt_array import YTArray
from yt.utilities.lib.geometry_utils import (
Expand Down Expand Up @@ -276,6 +276,7 @@ def test_bitmap_collisions():
assert_equal(nr, 2 ** (3 * (order1 + order2)), "%d collisions" % nr)


@requires_module("h5py")
def test_bitmap_save_load():
# Test init for slabs of points in x
left_edge = np.array([0.0, 0.0, 0.0])
Expand Down

0 comments on commit e7878a1

Please sign in to comment.