From aef44351215803b7421d32f06e8859097361f4fa Mon Sep 17 00:00:00 2001 From: Stefan Bender Date: Mon, 5 Jul 2021 19:41:51 +0200 Subject: [PATCH 1/9] Delegate `max_rows` from dataset `__repr__` As discussed in https://github.com/pydata/xarray/issues/5545 the default setting of `display_max_rows` is sometimes less useful to inspect datasets for completeness, and changing the option is not backwards compatible. In addition, a concise and short output dataset seems to be preferred by most people. The compromise is to keep the dataset's `__repr__` short and tunable via `xr.set_options(display_max_rows=...)`, and at the same time to enable the output of all items by explicitly requesting `ds.coords`, `ds.data_vars`, and `ds.attrs`. These explicit `__repr__`s also restore backwards compatibility in these cases. Slightly changes the internal implementation of `_mapping_repr()`: Setting (leaving) `max_rows` to `None` means "no limits". --- xarray/core/formatting.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 07864e81bb6..ab30facf49f 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -377,14 +377,12 @@ def _mapping_repr( ): if col_width is None: col_width = _calculate_col_width(mapping) - if max_rows is None: - max_rows = OPTIONS["display_max_rows"] summary = [f"{title}:"] if mapping: len_mapping = len(mapping) if not _get_boolean_with_default(expand_option_name, default=True): summary = [f"{summary[0]} ({len_mapping})"] - elif len_mapping > max_rows: + elif max_rows is not None and len_mapping > max_rows: summary = [f"{summary[0]} ({max_rows}/{len_mapping})"] first_rows = max_rows // 2 + max_rows % 2 items = list(mapping.items()) @@ -416,7 +414,7 @@ def _mapping_repr( ) -def coords_repr(coords, col_width=None): +def coords_repr(coords, col_width=None, max_rows=None): if col_width is None: col_width = _calculate_col_width(_get_col_items(coords)) return _mapping_repr( @@ -425,6 +423,7 @@ def coords_repr(coords, col_width=None): summarizer=summarize_coord, expand_option_name="display_expand_coords", col_width=col_width, + max_rows=max_rows, ) @@ -542,21 +541,22 @@ def dataset_repr(ds): summary = ["".format(type(ds).__name__)] col_width = _calculate_col_width(_get_col_items(ds.variables)) + max_rows = OPTIONS["display_max_rows"] dims_start = pretty_print("Dimensions:", col_width) summary.append("{}({})".format(dims_start, dim_summary(ds))) if ds.coords: - summary.append(coords_repr(ds.coords, col_width=col_width)) + summary.append(coords_repr(ds.coords, col_width=col_width, max_rows=max_rows)) unindexed_dims_str = unindexed_dims_repr(ds.dims, ds.coords) if unindexed_dims_str: summary.append(unindexed_dims_str) - summary.append(data_vars_repr(ds.data_vars, col_width=col_width)) + summary.append(data_vars_repr(ds.data_vars, col_width=col_width, max_rows=max_rows)) if ds.attrs: - summary.append(attrs_repr(ds.attrs)) + summary.append(attrs_repr(ds.attrs, max_rows=max_rows)) return "\n".join(summary) From 6a8ca1920db533b39ffccebb456dac6bcba73a55 Mon Sep 17 00:00:00 2001 From: Stefan Bender Date: Mon, 5 Jul 2021 19:41:51 +0200 Subject: [PATCH 2/9] tests: Update dataset `__repr__` tests [1/2] Updates the dataset `__repr__` test to assure that the dataset output honours the `display_max_rows` setting, not the `data_vars` output. Discussed in https://github.com/pydata/xarray/issues/5545 --- xarray/tests/test_formatting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index b9ba57f99dc..8e6f2f588ef 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -525,7 +525,7 @@ def test__mapping_repr(display_max_rows, n_vars, n_attr): with xr.set_options(display_max_rows=display_max_rows): # Parse the data_vars print and show only data_vars rows: - summary = formatting.data_vars_repr(ds.data_vars).split("\n") + summary = formatting.dataset_repr(ds).split("\n") summary = [v for v in summary if long_name in v] # The length should be less than or equal to display_max_rows: From f171cd997a466bdebd77e128f1787e806f0d45d6 Mon Sep 17 00:00:00 2001 From: Stefan Bender Date: Mon, 5 Jul 2021 19:41:51 +0200 Subject: [PATCH 3/9] tests: Extend dataset `__repr__` tests [2/2] Extends the dataset `__repr__` test to ensure that the output of `ds.coords`, `ds.data_vars`, and `ds.attrs` is of full length as desired. Includes more dimensions and coordinates to cover more cases. Discussed in https://github.com/pydata/xarray/issues/5545 --- xarray/tests/test_formatting.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 8e6f2f588ef..2c20682812b 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -509,15 +509,16 @@ def test__mapping_repr(display_max_rows, n_vars, n_attr): long_name = "long_name" a = np.core.defchararray.add(long_name, np.arange(0, n_vars).astype(str)) b = np.core.defchararray.add("attr_", np.arange(0, n_attr).astype(str)) + c = np.core.defchararray.add("coord", np.arange(0, n_vars).astype(str)) attrs = {k: 2 for k in b} - coords = dict(time=np.array([0, 1])) + coords = {_c: np.array([0, 1]) for _c in c} data_vars = dict() - for v in a: + for (v, _c) in zip(a, coords.items()): data_vars[v] = xr.DataArray( name=v, data=np.array([3, 4]), - dims=["time"], - coords=coords, + dims=[_c[0]], + coords=dict([_c]), ) ds = xr.Dataset(data_vars) ds.attrs = attrs @@ -527,23 +528,41 @@ def test__mapping_repr(display_max_rows, n_vars, n_attr): # Parse the data_vars print and show only data_vars rows: summary = formatting.dataset_repr(ds).split("\n") summary = [v for v in summary if long_name in v] - # The length should be less than or equal to display_max_rows: len_summary = len(summary) data_vars_print_size = min(display_max_rows, len_summary) assert len_summary == data_vars_print_size + summary = formatting.data_vars_repr(ds.data_vars).split("\n") + summary = [v for v in summary if long_name in v] + # The length should be equal to the number of data variables + len_summary = len(summary) + assert len_summary == n_vars + + summary = formatting.coords_repr(ds.coords).split("\n") + summary = [v for v in summary if "coord" in v] + # The length should be equal to the number of data variables + len_summary = len(summary) + assert len_summary == n_vars + + summary = formatting.attrs_repr(ds.attrs).split("\n") + summary = [v for v in summary if "attr_" in v] + # The length should be equal to the number of attributes + len_summary = len(summary) + assert len_summary == n_attr + with xr.set_options( display_expand_coords=False, display_expand_data_vars=False, display_expand_attrs=False, ): actual = formatting.dataset_repr(ds) + coord_s = ", ".join([f"{c}: {len(v)}" for c, v in coords.items()]) expected = dedent( f"""\ - Dimensions: (time: 2) - Coordinates: (1) + Dimensions: ({coord_s}) + Coordinates: (40) Data variables: ({n_vars}) Attributes: ({n_attr})""" ) From 3dd645b3141cdb97567cc245aa836b22fab2b3da Mon Sep 17 00:00:00 2001 From: Stefan Bender Date: Thu, 15 Jul 2021 11:13:53 +0200 Subject: [PATCH 4/9] doc: Add what's new entry for `__repr__` changes Sorted as a "breaking change" for 0.18.3 for now. --- doc/whats-new.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cd65e0dbe35..614afb6311d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -62,6 +62,11 @@ Breaking changes pre-existing array values. This is a safer default than the prior ``mode="a"``, and allows for higher performance writes (:pull:`5252`). By `Stephan Hoyer `_. +- The ``__repr__`` of a :py:class:`xarray.Dataset`'s ``attrs``, ``coords``, + and ``data_vars`` ignore ``xarray.set_option(display_max_rows=...)`` and + show the full output when called directly as, e.g., ``ds.data_vars`` or + ``print(ds.data_vars)`` (:issue:`5545`, :pull:`5580`). + By `Stefan Bender `_. Deprecations ~~~~~~~~~~~~ From 3896e8e86b6c73b807517fcca1ae21e1913332c9 Mon Sep 17 00:00:00 2001 From: Stefan Bender Date: Tue, 10 Aug 2021 21:02:13 +0200 Subject: [PATCH 5/9] Revert "doc: Add what's new entry for `__repr__` changes" This reverts commit 3dd645b3141cdb97567cc245aa836b22fab2b3da. --- doc/whats-new.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 614afb6311d..cd65e0dbe35 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -62,11 +62,6 @@ Breaking changes pre-existing array values. This is a safer default than the prior ``mode="a"``, and allows for higher performance writes (:pull:`5252`). By `Stephan Hoyer `_. -- The ``__repr__`` of a :py:class:`xarray.Dataset`'s ``attrs``, ``coords``, - and ``data_vars`` ignore ``xarray.set_option(display_max_rows=...)`` and - show the full output when called directly as, e.g., ``ds.data_vars`` or - ``print(ds.data_vars)`` (:issue:`5545`, :pull:`5580`). - By `Stefan Bender `_. Deprecations ~~~~~~~~~~~~ From 888850c417a3e5e827b44eadfc62e46940c94ba6 Mon Sep 17 00:00:00 2001 From: Stefan Bender Date: Thu, 15 Jul 2021 11:13:53 +0200 Subject: [PATCH 6/9] doc: Add what's new entry for `__repr__` changes Sorted as a "breaking change", for 0.19.1 for now. --- doc/whats-new.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3dad685aaf7..c7656dc2200 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -27,6 +27,11 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- The ``__repr__`` of a :py:class:`xarray.Dataset`'s ``attrs``, ``coords``, + and ``data_vars`` ignore ``xarray.set_option(display_max_rows=...)`` and + show the full output when called directly as, e.g., ``ds.data_vars`` or + ``print(ds.data_vars)`` (:issue:`5545`, :pull:`5580`). + By `Stefan Bender `_. Deprecations ~~~~~~~~~~~~ From 4239b90aadac248b2fefddc5b936a4fb06ad8469 Mon Sep 17 00:00:00 2001 From: Stefan Bender Date: Tue, 10 Aug 2021 21:15:18 +0200 Subject: [PATCH 7/9] doc: Remove `attrs` from `__repr__` changes Address comment from @keewis: `.attrs` is a standard python dict, so there's no custom repr. --- doc/whats-new.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c7656dc2200..3845999f513 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -27,10 +27,10 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ -- The ``__repr__`` of a :py:class:`xarray.Dataset`'s ``attrs``, ``coords``, - and ``data_vars`` ignore ``xarray.set_option(display_max_rows=...)`` and - show the full output when called directly as, e.g., ``ds.data_vars`` or - ``print(ds.data_vars)`` (:issue:`5545`, :pull:`5580`). +- The ``__repr__`` of a :py:class:`xarray.Dataset`'s ``coords`` and ``data_vars`` + ignore ``xarray.set_option(display_max_rows=...)`` and show the full output + when called directly as, e.g., ``ds.data_vars`` or ``print(ds.data_vars)`` + (:issue:`5545`, :pull:`5580`). By `Stefan Bender `_. Deprecations From 68b6e159afa4ce2ea166147588838142df7f2ccb Mon Sep 17 00:00:00 2001 From: Stefan Bender Date: Tue, 10 Aug 2021 21:51:41 +0200 Subject: [PATCH 8/9] tests: Remove `ds.attrs` formatting test According to @keewis: I don't think we need to test this because `attrs_repr` will only ever be called by `dataset_repr` / `array_repr`: on its own, the standard python `dict`'s `repr` will be used. --- xarray/tests/test_formatting.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 2c20682812b..b657aadebbe 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -545,12 +545,6 @@ def test__mapping_repr(display_max_rows, n_vars, n_attr): len_summary = len(summary) assert len_summary == n_vars - summary = formatting.attrs_repr(ds.attrs).split("\n") - summary = [v for v in summary if "attr_" in v] - # The length should be equal to the number of attributes - len_summary = len(summary) - assert len_summary == n_attr - with xr.set_options( display_expand_coords=False, display_expand_data_vars=False, From 71cf034820f98096856156be71bed8d35a037be8 Mon Sep 17 00:00:00 2001 From: Stefan Bender Date: Tue, 10 Aug 2021 21:59:49 +0200 Subject: [PATCH 9/9] tests: Fix no. of coordinates in formatting_repr The number of coordinates changed to be the same as the number of variables, which only incidentally was fixed to 40. Updates the to-be-tested format string to use the same number of variables instead of the hard-coded one, which might be subject to change. --- xarray/tests/test_formatting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index b657aadebbe..d5e7d2ee232 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -556,7 +556,7 @@ def test__mapping_repr(display_max_rows, n_vars, n_attr): f"""\ Dimensions: ({coord_s}) - Coordinates: (40) + Coordinates: ({n_vars}) Data variables: ({n_vars}) Attributes: ({n_attr})""" )