From 391435b433e7ab0330a36dc96c84949908d1e0be Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 14 Aug 2020 16:04:46 +0200 Subject: [PATCH 1/7] add testst --- xarray/tests/test_accessor_str.py | 35 +++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_accessor_str.py b/xarray/tests/test_accessor_str.py index a987d302202..e0cbdb7377a 100644 --- a/xarray/tests/test_accessor_str.py +++ b/xarray/tests/test_accessor_str.py @@ -596,7 +596,7 @@ def test_wrap(): ) # expected values - xp = xr.DataArray( + expected = xr.DataArray( [ "hello world", "hello world!", @@ -610,15 +610,29 @@ def test_wrap(): ] ) - rs = values.str.wrap(12, break_long_words=True) - assert_equal(rs, xp) + result = values.str.wrap(12, break_long_words=True) + assert_equal(result, expected) # test with pre and post whitespace (non-unicode), NaN, and non-ascii # Unicode values = xr.DataArray([" pre ", "\xac\u20ac\U00008000 abadcafe"]) - xp = xr.DataArray([" pre", "\xac\u20ac\U00008000 ab\nadcafe"]) - rs = values.str.wrap(6) - assert_equal(rs, xp) + expected = xr.DataArray([" pre", "\xac\u20ac\U00008000 ab\nadcafe"]) + result = values.str.wrap(6) + assert_equal(result, expected) + + +def test_wrap_kwargs_passed(): + # GH4334 + + values = xr.DataArray(" hello world ") + + result = values.str.wrap(7) + expected = xr.DataArray(" hello\nworld") + assert_equal(result, expected) + + result = values.str.wrap(7, drop_whitespace=False) + expected = xr.DataArray(" hello\n world\n ") + assert_equal(result, expected) def test_get(dtype): @@ -642,6 +656,15 @@ def test_get(dtype): assert_equal(result, expected) +def test_get_default(dtype): + # GH4334 + values = xr.DataArray(["a_b", "c", ""]).astype(dtype) + + result = values.str.get(2, "default") + expected = xr.DataArray(["b", "default", "default"]).astype(dtype) + assert_equal(result, expected) + + def test_encode_decode(): data = xr.DataArray(["a", "b", "a\xe4"]) encoded = data.str.encode("utf-8") From 9b7cc36df35fce947811a9f27ba348963ba8eb8a Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 14 Aug 2020 16:05:37 +0200 Subject: [PATCH 2/7] update code --- xarray/core/accessor_str.py | 39 +++++++++++++------------------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 5502ba72855..69610da6681 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -39,7 +39,6 @@ import codecs import re -import textwrap import numpy as np @@ -104,7 +103,7 @@ def __getitem__(self, key): else: return self.get(key) - def get(self, i): + def get(self, i, default=""): """ Extract element from indexable in each element in the array. @@ -120,8 +119,14 @@ def get(self, i): ------- items : array of objects """ - obj = slice(-1, None) if i == -1 else slice(i, i + 1) - return self._apply(lambda x: x[obj]) + s = slice(-1, None) if i == -1 else slice(i, i + 1) + + def f(x): + item = x[s] + + return item if item else default + + return self._apply(f) def slice(self, start=None, stop=None, step=None): """ @@ -663,32 +668,16 @@ def wrap(self, width, **kwargs): ---------- width : int Maximum line-width - expand_tabs : bool, optional - If true, tab characters will be expanded to spaces (default: True) - replace_whitespace : bool, optional - If true, each whitespace character (as defined by - string.whitespace) remaining after tab expansion will be replaced - by a single space (default: True) - drop_whitespace : bool, optional - If true, whitespace that, after wrapping, happens to end up at the - beginning or end of a line is dropped (default: True) - break_long_words : bool, optional - If true, then words longer than width will be broken in order to - ensure that no lines are longer than width. If it is false, long - words will not be broken, and some lines may be longer than width. - (default: True) - break_on_hyphens : bool, optional - If true, wrapping will occur preferably on whitespace and right - after hyphens in compound words, as it is customary in English. If - false, only whitespaces will be considered as potentially good - places for line breaks, but you need to set break_long_words to - false if you want truly insecable words. (default: True) + kwargs + keyword arguments passed into :class:`textwrap.TextWrapper. Returns ------- wrapped : same type as values """ - tw = textwrap.TextWrapper(width=width) + import textwrap + + tw = textwrap.TextWrapper(width=width, **kwargs) f = lambda x: "\n".join(tw.wrap(x)) return self._apply(f) From a62840a92a8ba6bd35127a55050ba49407291d90 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 14 Aug 2020 16:06:58 +0200 Subject: [PATCH 3/7] whats new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2023b79dbb5..1f49a874136 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -51,6 +51,8 @@ Bug fixes ~~~~~~~~~ - Fixed a bug in backend caused by basic installation of Dask (:issue:`4164`, :pull:`4318`) `Sam Morley `_. +- Fixed inconsistencies between docstring and functionality for :py.meth:`DataArray.str.get` + and :py.meth:`DataArray.str.wrap` (:issue:`4334`). By `Mathias Hauser `_. Documentation From d3ea8ab6f4a54d10f2b086b70b162201e15ef5a7 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 14 Aug 2020 17:50:25 +0200 Subject: [PATCH 4/7] adapt some fnc descriptions --- xarray/core/accessor_str.py | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 69610da6681..7d4795ae851 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -89,7 +89,7 @@ def _apply(self, f, dtype=None): def len(self): """ - Compute the length of each element in the array. + Compute the length of each string in the array. Returns ------- @@ -105,7 +105,7 @@ def __getitem__(self, key): def get(self, i, default=""): """ - Extract element from indexable in each element in the array. + Extract character number `i` from each string in the array. Parameters ---------- @@ -130,7 +130,7 @@ def f(x): def slice(self, start=None, stop=None, step=None): """ - Slice substrings from each element in the array. + Slice substrings from each string in the array. Parameters ---------- @@ -364,7 +364,7 @@ def count(self, pat, flags=0): def startswith(self, pat): """ - Test if the start of each string element matches a pattern. + Test if the start of each string in the array matches a pattern. Parameters ---------- @@ -383,7 +383,7 @@ def startswith(self, pat): def endswith(self, pat): """ - Test if the end of each string element matches a pattern. + Test if the end of each string in the array matches a pattern. Parameters ---------- @@ -437,8 +437,7 @@ def pad(self, width, side="left", fillchar=" "): def center(self, width, fillchar=" "): """ - Filling left and right side of strings in the array with an - additional character. + Pad left and right side of each string in the array. Parameters ---------- @@ -456,8 +455,7 @@ def center(self, width, fillchar=" "): def ljust(self, width, fillchar=" "): """ - Filling right side of strings in the array with an additional - character. + Pad right side of each string in the array. Parameters ---------- @@ -475,7 +473,7 @@ def ljust(self, width, fillchar=" "): def rjust(self, width, fillchar=" "): """ - Filling left side of strings in the array with an additional character. + Pad left side of each string in the array. Parameters ---------- @@ -493,7 +491,7 @@ def rjust(self, width, fillchar=" "): def zfill(self, width): """ - Pad strings in the array by prepending '0' characters. + Pad each string in the array by prepending '0' characters. Strings in the array are padded with '0' characters on the left of the string to reach a total string length `width`. Strings @@ -513,7 +511,7 @@ def zfill(self, width): def contains(self, pat, case=True, flags=0, regex=True): """ - Test if pattern or regex is contained within a string of the array. + Test if pattern or regex is contained within each string of the array. Return boolean array based on whether a given pattern or regex is contained within a string of the array. @@ -559,7 +557,7 @@ def contains(self, pat, case=True, flags=0, regex=True): def match(self, pat, case=True, flags=0): """ - Determine if each string matches a regular expression. + Determine if each string in the array matches a regular expression. Parameters ---------- @@ -618,7 +616,7 @@ def strip(self, to_strip=None, side="both"): def lstrip(self, to_strip=None): """ - Remove leading and trailing characters. + Remove leading characters. Strip whitespaces (including newlines) or a set of specified characters from each string in the array from the left side. @@ -638,7 +636,7 @@ def lstrip(self, to_strip=None): def rstrip(self, to_strip=None): """ - Remove leading and trailing characters. + Remove trailing characters. Strip whitespaces (including newlines) or a set of specified characters from each string in the array from the right side. @@ -658,8 +656,7 @@ def rstrip(self, to_strip=None): def wrap(self, width, **kwargs): """ - Wrap long strings in the array to be formatted in paragraphs with - length less than a given width. + Wrap long strings in the array in paragraphs with length less than `width`. This method has the same keyword parameters and defaults as :class:`textwrap.TextWrapper`. @@ -683,7 +680,7 @@ def wrap(self, width, **kwargs): def translate(self, table): """ - Map all characters in the string through the given mapping table. + Map characters of each string through the given mapping table. Parameters ---------- From 6168aef8bf977ab2281d8c416cc5f0b9a3dcb21f Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 14 Aug 2020 23:44:22 +0200 Subject: [PATCH 5/7] Apply suggestions from code review Co-authored-by: keewis --- xarray/core/accessor_str.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 7d4795ae851..2a44c803d5a 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -665,8 +665,8 @@ def wrap(self, width, **kwargs): ---------- width : int Maximum line-width - kwargs - keyword arguments passed into :class:`textwrap.TextWrapper. + **kwargs + keyword arguments passed into :class:`textwrap.TextWrapper`. Returns ------- From 15616b0c85e19881064735666f7e265953715c23 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 14 Aug 2020 23:53:53 +0200 Subject: [PATCH 6/7] revert import switch --- xarray/core/accessor_str.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 2a44c803d5a..1f0c95af71e 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -39,6 +39,7 @@ import codecs import re +import textwrap import numpy as np @@ -672,8 +673,6 @@ def wrap(self, width, **kwargs): ------- wrapped : same type as values """ - import textwrap - tw = textwrap.TextWrapper(width=width, **kwargs) f = lambda x: "\n".join(tw.wrap(x)) return self._apply(f) From 28c133a8996170713e2e1a00be8e9ee6f058c736 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 14 Aug 2020 23:58:27 +0200 Subject: [PATCH 7/7] fix whats new --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 257aacbf0e0..b48e5a4041f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,8 +55,8 @@ Bug fixes ~~~~~~~~~ - Fixed a bug in backend caused by basic installation of Dask (:issue:`4164`, :pull:`4318`) `Sam Morley `_. -- Fixed inconsistencies between docstring and functionality for :py.meth:`DataArray.str.get` - and :py.meth:`DataArray.str.wrap` (:issue:`4334`). By `Mathias Hauser `_. +- Fixed inconsistencies between docstring and functionality for :py:meth:`DataArray.str.get` + and :py:meth:`DataArray.str.wrap` (:issue:`4334`). By `Mathias Hauser `_. Documentation