Add larger bit depth for displaying flag_masks (#492)

* Add flag_mask formatting support for bitdepth greater than uint8 Underlying cf flag_mask codes (eg. == and .isin) work with flag_masks with more than 8 independent flags (eg. uint32), however the formatting codes that print messages to the screen are limited to uint8. Add/modify functions to unpack bits from larger integer dtypes. * Edit fixed-width formatting for larger bit depth flag_masks Original formatting widths work well for 8-bit flag_masks, but need some modification for larger integer values. * Fix bit_length calculating multiple times bit_length is re-calculated a second time inside _unpacked_bits, which can lead to incorrect bit_length. Pass this value as an argument instead, since it was properly calculated earlier in the process. * Remove unsigned integer check Flags do not have to be unsigned. For example, "basin" test dataset will fail the unsigned integer test even though it contains valid int64 flags. * Add test for 16bit version of flag_masks * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add flag_indep_uint16 example dataset * One more test --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <[email protected]>
xarray-contrib · Jan 13, 2024 · f6c8a1f · f6c8a1f
1 parent c2f2540
commit f6c8a1f
Show file tree

Hide file tree

Showing 3 changed files with 92 additions and 9 deletions.
diff --git a/cf_xarray/datasets.py b/cf_xarray/datasets.py
@@ -503,6 +503,16 @@ def _create_inexact_bounds():
     name="flag_var",
 )
 
+flag_indep_uint16 = xr.DataArray(
+    np.array([1, 10, 100, 1000, 10000, 65535], dtype=np.uint16),
+    dims=("time",),
+    attrs={
+        "flag_masks": [2**i for i in range(16)],
+        "flag_meanings": " ".join([f"flag_{2**i}" for i in range(16)]),
+        "standard_name": "flag_independent",
+    },
+    name="flag_var",
+)
 
 flag_mix = xr.DataArray(
     np.array([4, 8, 13, 5, 10, 14, 7, 3], np.uint8),

diff --git a/cf_xarray/formatting.py b/cf_xarray/formatting.py
@@ -151,8 +151,47 @@ def _maybe_panel(textgen, title: str, rich: bool):
         return title + ":\n" + text
 
 
-def find_set_bits(mask, value, repeated_masks):
-    bitpos = np.arange(8)[::-1]
+def _get_bit_length(dtype):
+    # Check if dtype is a numpy dtype, if not, convert it
+    if not isinstance(dtype, np.dtype):
+        dtype = np.dtype(dtype)
+
+    # Calculate the bit length
+    bit_length = 8 * dtype.itemsize
+
+    return bit_length
+
+
+def _unpackbits(mask, bit_length):
+    # Ensure the array is a numpy array
+    arr = np.asarray(mask)
+
+    # Create an output array of the appropriate shape
+    output_shape = arr.shape + (bit_length,)
+    output = np.zeros(output_shape, dtype=np.uint8)
+
+    # Unpack bits
+    for i in range(bit_length):
+        output[..., i] = (arr >> i) & 1
+
+    return output[..., ::-1]
+
+
+def _max_chars_for_bit_length(bit_length):
+    """
+    Find the maximum characters needed for a fixed-width display
+    for integer values of a certain bit_length. Use calculation
+    for signed integers, since it conservatively will always have
+    enough characters for signed or unsigned.
+    """
+    # Maximum value for signed integers of this bit length
+    max_val = 2 ** (bit_length - 1) - 1
+    # Add 1 for the negative sign
+    return len(str(max_val)) + 1
+
+
+def find_set_bits(mask, value, repeated_masks, bit_length):
+    bitpos = np.arange(bit_length)[::-1]
     if mask not in repeated_masks:
         if value == 0:
             return [-1]
@@ -161,8 +200,8 @@ def find_set_bits(mask, value, repeated_masks):
         else:
             return [int(np.log2(mask))]
     else:
-        allset = bitpos[np.unpackbits(np.uint8(mask)) == 1]
-        setbits = bitpos[np.unpackbits(np.uint8(mask & value)) == 1]
+        allset = bitpos[_unpackbits(mask, bit_length) == 1]
+        setbits = bitpos[_unpackbits(mask & value, bit_length) == 1]
         return [b if abs(b) in setbits else -b for b in allset]
 
 
@@ -184,25 +223,30 @@ def _format_flags(accessor, rich):
     #     for f, (m, _) in flag_dict.items()
     #     if m is not None and m not in repeated_masks
     # ]
+
+    bit_length = _get_bit_length(accessor._obj.dtype)
+    mask_width = _max_chars_for_bit_length(bit_length)
+    key_width = max(len(key) for key in flag_dict)
+
     bit_text = []
     value_text = []
     for key, (mask, value) in flag_dict.items():
         if mask is None:
             bit_text.append("✗" if rich else "")
             value_text.append(str(value))
             continue
-        bits = find_set_bits(mask, value, repeated_masks)
-        bitstring = ["."] * 8
+        bits = find_set_bits(mask, value, repeated_masks, bit_length)
+        bitstring = ["."] * bit_length
         if bits == [-1]:
             continue
         else:
             for b in bits:
                 bitstring[abs(b)] = _format_cf_name("1" if b >= 0 else "0", rich)
         text = "".join(bitstring[::-1])
         value_text.append(
-            f"{mask} & {value}"
+            f"{mask:{mask_width}} & {value}"
             if key in excl_flags and value is not None
-            else str(mask)
+            else f"{mask:{mask_width}}"
         )
         bit_text.append(text if rich else f" / Bit: {text}")
 
@@ -230,7 +274,9 @@ def _format_flags(accessor, rich):
     else:
         rows = []
         for val, bit, key in zip(value_text, bit_text, flag_dict):
-            rows.append(f"{TAB}{_format_cf_name(key, rich)}: {TAB} {val} {bit}")
+            rows.append(
+                f"{TAB}{_format_cf_name(key, rich):>{key_width}}: {TAB} {val} {bit}"
+            )
         return _print_rows("Flag Meanings", rows, rich)
 
 

diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py
@@ -26,6 +26,7 @@
     dsg,
     flag_excl,
     flag_indep,
+    flag_indep_uint16,
     flag_mix,
     forecast,
     mollwds,
@@ -164,6 +165,7 @@ def test_repr() -> None:
     # Flag DataArray
     assert "Flag Variable" in repr(flag_excl.cf)
     assert "Flag Variable" in repr(flag_indep.cf)
+    assert "Flag Variable" in repr(flag_indep_uint16.cf)
     assert "Flag Variable" in repr(flag_mix.cf)
     assert "Flag Variable" in repr(basin.cf)
 
@@ -1837,6 +1839,30 @@ def test_flag_indep(self) -> None:
             res = flag_indep.cf.flags[name]
             np.testing.assert_equal(res.to_numpy(), expected[i])
 
+    def test_flag_indep_uint16(self) -> None:
+        expected = [
+            [True, False, False, False, False, True],  # bit     1
+            [False, True, False, False, False, True],  # bit     2
+            [False, False, True, False, False, True],  # bit     4
+            [False, True, False, True, False, True],  # bit     8
+            [False, False, False, False, True, True],  # bit    16
+            [False, False, True, True, False, True],  # bit    32
+            [False, False, True, True, False, True],  # bit    64
+            [False, False, False, True, False, True],  # bit   128
+            [False, False, False, True, True, True],  # bit   256
+            [False, False, False, True, True, True],  # bit   512
+            [False, False, False, False, True, True],  # bit  1024
+            [False, False, False, False, False, True],  # bit  2048
+            [False, False, False, False, False, True],  # bit  4096
+            [False, False, False, False, True, True],  # bit  8192
+            [False, False, False, False, False, True],  # bit 16384
+            [False, False, False, False, False, True],  # bit 32768
+        ]
+        for i in range(16):
+            name = f"flag_{2**i}"
+            res = flag_indep_uint16.cf.flags[name]
+            np.testing.assert_equal(res.to_numpy(), expected[i])
+
     def test_flag_mix(self) -> None:
         expected = [
             [False, False, True, True, False, False, True, True],  # flag 1
@@ -1983,6 +2009,7 @@ def plane(coords, slopex, slopey):
         [basin, "Flag Variable"],
         [flag_mix, "Flag Variable"],
         [flag_indep, "Flag Variable"],
+        [flag_indep_uint16, "Flag Variable"],
         [flag_excl, "Flag Variable"],
         [dsg, "Discrete Sampling Geometry"],
     ),