diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ee26842ba95..b48e5a4041f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,6 +55,8 @@ Bug fixes ~~~~~~~~~ - Fixed a bug in backend caused by basic installation of Dask (:issue:`4164`, :pull:`4318`) `Sam Morley `_. +- Fixed inconsistencies between docstring and functionality for :py:meth:`DataArray.str.get` + and :py:meth:`DataArray.str.wrap` (:issue:`4334`). By `Mathias Hauser `_. Documentation diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 5502ba72855..1f0c95af71e 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -90,7 +90,7 @@ def _apply(self, f, dtype=None): def len(self): """ - Compute the length of each element in the array. + Compute the length of each string in the array. Returns ------- @@ -104,9 +104,9 @@ def __getitem__(self, key): else: return self.get(key) - def get(self, i): + def get(self, i, default=""): """ - Extract element from indexable in each element in the array. + Extract character number `i` from each string in the array. Parameters ---------- @@ -120,12 +120,18 @@ def get(self, i): ------- items : array of objects """ - obj = slice(-1, None) if i == -1 else slice(i, i + 1) - return self._apply(lambda x: x[obj]) + s = slice(-1, None) if i == -1 else slice(i, i + 1) + + def f(x): + item = x[s] + + return item if item else default + + return self._apply(f) def slice(self, start=None, stop=None, step=None): """ - Slice substrings from each element in the array. + Slice substrings from each string in the array. Parameters ---------- @@ -359,7 +365,7 @@ def count(self, pat, flags=0): def startswith(self, pat): """ - Test if the start of each string element matches a pattern. + Test if the start of each string in the array matches a pattern. Parameters ---------- @@ -378,7 +384,7 @@ def startswith(self, pat): def endswith(self, pat): """ - Test if the end of each string element matches a pattern. + Test if the end of each string in the array matches a pattern. Parameters ---------- @@ -432,8 +438,7 @@ def pad(self, width, side="left", fillchar=" "): def center(self, width, fillchar=" "): """ - Filling left and right side of strings in the array with an - additional character. + Pad left and right side of each string in the array. Parameters ---------- @@ -451,8 +456,7 @@ def center(self, width, fillchar=" "): def ljust(self, width, fillchar=" "): """ - Filling right side of strings in the array with an additional - character. + Pad right side of each string in the array. Parameters ---------- @@ -470,7 +474,7 @@ def ljust(self, width, fillchar=" "): def rjust(self, width, fillchar=" "): """ - Filling left side of strings in the array with an additional character. + Pad left side of each string in the array. Parameters ---------- @@ -488,7 +492,7 @@ def rjust(self, width, fillchar=" "): def zfill(self, width): """ - Pad strings in the array by prepending '0' characters. + Pad each string in the array by prepending '0' characters. Strings in the array are padded with '0' characters on the left of the string to reach a total string length `width`. Strings @@ -508,7 +512,7 @@ def zfill(self, width): def contains(self, pat, case=True, flags=0, regex=True): """ - Test if pattern or regex is contained within a string of the array. + Test if pattern or regex is contained within each string of the array. Return boolean array based on whether a given pattern or regex is contained within a string of the array. @@ -554,7 +558,7 @@ def contains(self, pat, case=True, flags=0, regex=True): def match(self, pat, case=True, flags=0): """ - Determine if each string matches a regular expression. + Determine if each string in the array matches a regular expression. Parameters ---------- @@ -613,7 +617,7 @@ def strip(self, to_strip=None, side="both"): def lstrip(self, to_strip=None): """ - Remove leading and trailing characters. + Remove leading characters. Strip whitespaces (including newlines) or a set of specified characters from each string in the array from the left side. @@ -633,7 +637,7 @@ def lstrip(self, to_strip=None): def rstrip(self, to_strip=None): """ - Remove leading and trailing characters. + Remove trailing characters. Strip whitespaces (including newlines) or a set of specified characters from each string in the array from the right side. @@ -653,8 +657,7 @@ def rstrip(self, to_strip=None): def wrap(self, width, **kwargs): """ - Wrap long strings in the array to be formatted in paragraphs with - length less than a given width. + Wrap long strings in the array in paragraphs with length less than `width`. This method has the same keyword parameters and defaults as :class:`textwrap.TextWrapper`. @@ -663,38 +666,20 @@ def wrap(self, width, **kwargs): ---------- width : int Maximum line-width - expand_tabs : bool, optional - If true, tab characters will be expanded to spaces (default: True) - replace_whitespace : bool, optional - If true, each whitespace character (as defined by - string.whitespace) remaining after tab expansion will be replaced - by a single space (default: True) - drop_whitespace : bool, optional - If true, whitespace that, after wrapping, happens to end up at the - beginning or end of a line is dropped (default: True) - break_long_words : bool, optional - If true, then words longer than width will be broken in order to - ensure that no lines are longer than width. If it is false, long - words will not be broken, and some lines may be longer than width. - (default: True) - break_on_hyphens : bool, optional - If true, wrapping will occur preferably on whitespace and right - after hyphens in compound words, as it is customary in English. If - false, only whitespaces will be considered as potentially good - places for line breaks, but you need to set break_long_words to - false if you want truly insecable words. (default: True) + **kwargs + keyword arguments passed into :class:`textwrap.TextWrapper`. Returns ------- wrapped : same type as values """ - tw = textwrap.TextWrapper(width=width) + tw = textwrap.TextWrapper(width=width, **kwargs) f = lambda x: "\n".join(tw.wrap(x)) return self._apply(f) def translate(self, table): """ - Map all characters in the string through the given mapping table. + Map characters of each string through the given mapping table. Parameters ---------- diff --git a/xarray/tests/test_accessor_str.py b/xarray/tests/test_accessor_str.py index a987d302202..e0cbdb7377a 100644 --- a/xarray/tests/test_accessor_str.py +++ b/xarray/tests/test_accessor_str.py @@ -596,7 +596,7 @@ def test_wrap(): ) # expected values - xp = xr.DataArray( + expected = xr.DataArray( [ "hello world", "hello world!", @@ -610,15 +610,29 @@ def test_wrap(): ] ) - rs = values.str.wrap(12, break_long_words=True) - assert_equal(rs, xp) + result = values.str.wrap(12, break_long_words=True) + assert_equal(result, expected) # test with pre and post whitespace (non-unicode), NaN, and non-ascii # Unicode values = xr.DataArray([" pre ", "\xac\u20ac\U00008000 abadcafe"]) - xp = xr.DataArray([" pre", "\xac\u20ac\U00008000 ab\nadcafe"]) - rs = values.str.wrap(6) - assert_equal(rs, xp) + expected = xr.DataArray([" pre", "\xac\u20ac\U00008000 ab\nadcafe"]) + result = values.str.wrap(6) + assert_equal(result, expected) + + +def test_wrap_kwargs_passed(): + # GH4334 + + values = xr.DataArray(" hello world ") + + result = values.str.wrap(7) + expected = xr.DataArray(" hello\nworld") + assert_equal(result, expected) + + result = values.str.wrap(7, drop_whitespace=False) + expected = xr.DataArray(" hello\n world\n ") + assert_equal(result, expected) def test_get(dtype): @@ -642,6 +656,15 @@ def test_get(dtype): assert_equal(result, expected) +def test_get_default(dtype): + # GH4334 + values = xr.DataArray(["a_b", "c", ""]).astype(dtype) + + result = values.str.get(2, "default") + expected = xr.DataArray(["b", "default", "default"]).astype(dtype) + assert_equal(result, expected) + + def test_encode_decode(): data = xr.DataArray(["a", "b", "a\xe4"]) encoded = data.str.encode("utf-8")