Skip to content

Commit

Permalink
Add larger bit depth for displaying flag_masks (#492)
Browse files Browse the repository at this point in the history
* Add flag_mask formatting support for bitdepth greater than uint8

Underlying cf flag_mask codes (eg. == and .isin) work with
flag_masks with more than 8 independent flags (eg. uint32), however
the formatting codes that print messages to the screen are limited
to uint8. Add/modify functions to unpack bits from larger integer
dtypes.

* Edit fixed-width formatting for larger bit depth flag_masks

Original formatting widths work well for 8-bit flag_masks, but need
some modification for larger integer values.

* Fix bit_length calculating multiple times

bit_length is re-calculated a second time inside _unpacked_bits,
which can lead to incorrect bit_length. Pass this value as an
argument instead, since it was properly calculated earlier in
the process.

* Remove unsigned integer check

Flags do not have to be unsigned. For example, "basin" test dataset
will fail the unsigned integer test even though it contains valid
int64 flags.

* Add test for 16bit version of flag_masks

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add flag_indep_uint16 example dataset

* One more test

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Deepak Cherian <[email protected]>
  • Loading branch information
3 people authored Jan 13, 2024
1 parent c2f2540 commit f6c8a1f
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 9 deletions.
10 changes: 10 additions & 0 deletions cf_xarray/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,16 @@ def _create_inexact_bounds():
name="flag_var",
)

flag_indep_uint16 = xr.DataArray(
np.array([1, 10, 100, 1000, 10000, 65535], dtype=np.uint16),
dims=("time",),
attrs={
"flag_masks": [2**i for i in range(16)],
"flag_meanings": " ".join([f"flag_{2**i}" for i in range(16)]),
"standard_name": "flag_independent",
},
name="flag_var",
)

flag_mix = xr.DataArray(
np.array([4, 8, 13, 5, 10, 14, 7, 3], np.uint8),
Expand Down
64 changes: 55 additions & 9 deletions cf_xarray/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,47 @@ def _maybe_panel(textgen, title: str, rich: bool):
return title + ":\n" + text


def find_set_bits(mask, value, repeated_masks):
bitpos = np.arange(8)[::-1]
def _get_bit_length(dtype):
# Check if dtype is a numpy dtype, if not, convert it
if not isinstance(dtype, np.dtype):
dtype = np.dtype(dtype)

# Calculate the bit length
bit_length = 8 * dtype.itemsize

return bit_length


def _unpackbits(mask, bit_length):
# Ensure the array is a numpy array
arr = np.asarray(mask)

# Create an output array of the appropriate shape
output_shape = arr.shape + (bit_length,)
output = np.zeros(output_shape, dtype=np.uint8)

# Unpack bits
for i in range(bit_length):
output[..., i] = (arr >> i) & 1

return output[..., ::-1]


def _max_chars_for_bit_length(bit_length):
"""
Find the maximum characters needed for a fixed-width display
for integer values of a certain bit_length. Use calculation
for signed integers, since it conservatively will always have
enough characters for signed or unsigned.
"""
# Maximum value for signed integers of this bit length
max_val = 2 ** (bit_length - 1) - 1
# Add 1 for the negative sign
return len(str(max_val)) + 1


def find_set_bits(mask, value, repeated_masks, bit_length):
bitpos = np.arange(bit_length)[::-1]
if mask not in repeated_masks:
if value == 0:
return [-1]
Expand All @@ -161,8 +200,8 @@ def find_set_bits(mask, value, repeated_masks):
else:
return [int(np.log2(mask))]
else:
allset = bitpos[np.unpackbits(np.uint8(mask)) == 1]
setbits = bitpos[np.unpackbits(np.uint8(mask & value)) == 1]
allset = bitpos[_unpackbits(mask, bit_length) == 1]
setbits = bitpos[_unpackbits(mask & value, bit_length) == 1]
return [b if abs(b) in setbits else -b for b in allset]


Expand All @@ -184,25 +223,30 @@ def _format_flags(accessor, rich):
# for f, (m, _) in flag_dict.items()
# if m is not None and m not in repeated_masks
# ]

bit_length = _get_bit_length(accessor._obj.dtype)
mask_width = _max_chars_for_bit_length(bit_length)
key_width = max(len(key) for key in flag_dict)

bit_text = []
value_text = []
for key, (mask, value) in flag_dict.items():
if mask is None:
bit_text.append("✗" if rich else "")
value_text.append(str(value))
continue
bits = find_set_bits(mask, value, repeated_masks)
bitstring = ["."] * 8
bits = find_set_bits(mask, value, repeated_masks, bit_length)
bitstring = ["."] * bit_length
if bits == [-1]:
continue
else:
for b in bits:
bitstring[abs(b)] = _format_cf_name("1" if b >= 0 else "0", rich)
text = "".join(bitstring[::-1])
value_text.append(
f"{mask} & {value}"
f"{mask:{mask_width}} & {value}"
if key in excl_flags and value is not None
else str(mask)
else f"{mask:{mask_width}}"
)
bit_text.append(text if rich else f" / Bit: {text}")

Expand Down Expand Up @@ -230,7 +274,9 @@ def _format_flags(accessor, rich):
else:
rows = []
for val, bit, key in zip(value_text, bit_text, flag_dict):
rows.append(f"{TAB}{_format_cf_name(key, rich)}: {TAB} {val} {bit}")
rows.append(
f"{TAB}{_format_cf_name(key, rich):>{key_width}}: {TAB} {val} {bit}"
)
return _print_rows("Flag Meanings", rows, rich)


Expand Down
27 changes: 27 additions & 0 deletions cf_xarray/tests/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
dsg,
flag_excl,
flag_indep,
flag_indep_uint16,
flag_mix,
forecast,
mollwds,
Expand Down Expand Up @@ -164,6 +165,7 @@ def test_repr() -> None:
# Flag DataArray
assert "Flag Variable" in repr(flag_excl.cf)
assert "Flag Variable" in repr(flag_indep.cf)
assert "Flag Variable" in repr(flag_indep_uint16.cf)
assert "Flag Variable" in repr(flag_mix.cf)
assert "Flag Variable" in repr(basin.cf)

Expand Down Expand Up @@ -1837,6 +1839,30 @@ def test_flag_indep(self) -> None:
res = flag_indep.cf.flags[name]
np.testing.assert_equal(res.to_numpy(), expected[i])

def test_flag_indep_uint16(self) -> None:
expected = [
[True, False, False, False, False, True], # bit 1
[False, True, False, False, False, True], # bit 2
[False, False, True, False, False, True], # bit 4
[False, True, False, True, False, True], # bit 8
[False, False, False, False, True, True], # bit 16
[False, False, True, True, False, True], # bit 32
[False, False, True, True, False, True], # bit 64
[False, False, False, True, False, True], # bit 128
[False, False, False, True, True, True], # bit 256
[False, False, False, True, True, True], # bit 512
[False, False, False, False, True, True], # bit 1024
[False, False, False, False, False, True], # bit 2048
[False, False, False, False, False, True], # bit 4096
[False, False, False, False, True, True], # bit 8192
[False, False, False, False, False, True], # bit 16384
[False, False, False, False, False, True], # bit 32768
]
for i in range(16):
name = f"flag_{2**i}"
res = flag_indep_uint16.cf.flags[name]
np.testing.assert_equal(res.to_numpy(), expected[i])

def test_flag_mix(self) -> None:
expected = [
[False, False, True, True, False, False, True, True], # flag 1
Expand Down Expand Up @@ -1983,6 +2009,7 @@ def plane(coords, slopex, slopey):
[basin, "Flag Variable"],
[flag_mix, "Flag Variable"],
[flag_indep, "Flag Variable"],
[flag_indep_uint16, "Flag Variable"],
[flag_excl, "Flag Variable"],
[dsg, "Discrete Sampling Geometry"],
),
Expand Down

0 comments on commit f6c8a1f

Please sign in to comment.