fix some str accessor inconsistencies (#4339)

mathause · keewis · web-flow · commit e6c111355137 · 2020-08-15T12:28:04.000+02:00
* add testst

* update code

* whats new

* adapt some fnc descriptions

* Apply suggestions from code review

Co-authored-by: keewis &lt;keewis@users.noreply.github.com&gt;

* revert import switch

* fix whats new

Co-authored-by: keewis &lt;keewis@users.noreply.github.com&gt;
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -55,6 +55,8 @@ Bug fixes
 ~~~~~~~~~
 - Fixed a bug in backend caused by basic installation of Dask (:issue:`4164`, :pull:`4318`)
   `Sam Morley <https://github.com/inakleinbottle>`_.
+- Fixed inconsistencies between docstring and functionality for :py:meth:`DataArray.str.get`
+  and :py:meth:`DataArray.str.wrap` (:issue:`4334`). By `Mathias Hauser <https://github.com/mathause>`_.
 
 
 Documentation
diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py
@@ -90,7 +90,7 @@ def _apply(self, f, dtype=None):
 
     def len(self):
         """
-        Compute the length of each element in the array.
+        Compute the length of each string in the array.
 
         Returns
         -------
@@ -104,9 +104,9 @@ def __getitem__(self, key):
         else:
             return self.get(key)
 
-    def get(self, i):
+    def get(self, i, default=""):
         """
-        Extract element from indexable in each element in the array.
+        Extract character number `i` from each string in the array.
 
         Parameters
         ----------
@@ -120,12 +120,18 @@ def get(self, i):
         -------
         items : array of objects
         """
-        obj = slice(-1, None) if i == -1 else slice(i, i + 1)
-        return self._apply(lambda x: x[obj])
+        s = slice(-1, None) if i == -1 else slice(i, i + 1)
+
+        def f(x):
+            item = x[s]
+
+            return item if item else default
+
+        return self._apply(f)
 
     def slice(self, start=None, stop=None, step=None):
         """
-        Slice substrings from each element in the array.
+        Slice substrings from each string in the array.
 
         Parameters
         ----------
@@ -359,7 +365,7 @@ def count(self, pat, flags=0):
 
     def startswith(self, pat):
         """
-        Test if the start of each string element matches a pattern.
+        Test if the start of each string in the array matches a pattern.
 
         Parameters
         ----------
@@ -378,7 +384,7 @@ def startswith(self, pat):
 
     def endswith(self, pat):
         """
-        Test if the end of each string element matches a pattern.
+        Test if the end of each string in the array matches a pattern.
 
         Parameters
         ----------
@@ -432,8 +438,7 @@ def pad(self, width, side="left", fillchar=" "):
 
     def center(self, width, fillchar=" "):
         """
-        Filling left and right side of strings in the array with an
-        additional character.
+        Pad left and right side of each string in the array.
 
         Parameters
         ----------
@@ -451,8 +456,7 @@ def center(self, width, fillchar=" "):
 
     def ljust(self, width, fillchar=" "):
         """
-        Filling right side of strings in the array with an additional
-        character.
+        Pad right side of each string in the array.
 
         Parameters
         ----------
@@ -470,7 +474,7 @@ def ljust(self, width, fillchar=" "):
 
     def rjust(self, width, fillchar=" "):
         """
-        Filling left side of strings in the array with an additional character.
+        Pad left side of each string in the array.
 
         Parameters
         ----------
@@ -488,7 +492,7 @@ def rjust(self, width, fillchar=" "):
 
     def zfill(self, width):
         """
-        Pad strings in the array by prepending '0' characters.
+        Pad each string in the array by prepending '0' characters.
 
         Strings in the array are padded with '0' characters on the
         left of the string to reach a total string length  `width`. Strings
@@ -508,7 +512,7 @@ def zfill(self, width):
 
     def contains(self, pat, case=True, flags=0, regex=True):
         """
-        Test if pattern or regex is contained within a string of the array.
+        Test if pattern or regex is contained within each string of the array.
 
         Return boolean array based on whether a given pattern or regex is
         contained within a string of the array.
@@ -554,7 +558,7 @@ def contains(self, pat, case=True, flags=0, regex=True):
 
     def match(self, pat, case=True, flags=0):
         """
-        Determine if each string matches a regular expression.
+        Determine if each string in the array matches a regular expression.
 
         Parameters
         ----------
@@ -613,7 +617,7 @@ def strip(self, to_strip=None, side="both"):
 
     def lstrip(self, to_strip=None):
         """
-        Remove leading and trailing characters.
+        Remove leading characters.
 
         Strip whitespaces (including newlines) or a set of specified characters
         from each string in the array from the left side.
@@ -633,7 +637,7 @@ def lstrip(self, to_strip=None):
 
     def rstrip(self, to_strip=None):
         """
-        Remove leading and trailing characters.
+        Remove trailing characters.
 
         Strip whitespaces (including newlines) or a set of specified characters
         from each string in the array from the right side.
@@ -653,8 +657,7 @@ def rstrip(self, to_strip=None):
 
     def wrap(self, width, **kwargs):
         """
-        Wrap long strings in the array to be formatted in paragraphs with
-        length less than a given width.
+        Wrap long strings in the array in paragraphs with length less than `width`.
 
         This method has the same keyword parameters and defaults as
         :class:`textwrap.TextWrapper`.
@@ -663,38 +666,20 @@ def wrap(self, width, **kwargs):
         ----------
         width : int
             Maximum line-width
-        expand_tabs : bool, optional
-            If true, tab characters will be expanded to spaces (default: True)
-        replace_whitespace : bool, optional
-            If true, each whitespace character (as defined by
-            string.whitespace) remaining after tab expansion will be replaced
-            by a single space (default: True)
-        drop_whitespace : bool, optional
-            If true, whitespace that, after wrapping, happens to end up at the
-            beginning or end of a line is dropped (default: True)
-        break_long_words : bool, optional
-            If true, then words longer than width will be broken in order to
-            ensure that no lines are longer than width. If it is false, long
-            words will not be broken, and some lines may be longer than width.
-            (default: True)
-        break_on_hyphens : bool, optional
-            If true, wrapping will occur preferably on whitespace and right
-            after hyphens in compound words, as it is customary in English. If
-            false, only whitespaces will be considered as potentially good
-            places for line breaks, but you need to set break_long_words to
-            false if you want truly insecable words. (default: True)
+        **kwargs
+            keyword arguments passed into :class:`textwrap.TextWrapper`.
 
         Returns
         -------
         wrapped : same type as values
         """
-        tw = textwrap.TextWrapper(width=width)
+        tw = textwrap.TextWrapper(width=width, **kwargs)
         f = lambda x: "\n".join(tw.wrap(x))
         return self._apply(f)
 
     def translate(self, table):
         """
-        Map all characters in the string through the given mapping table.
+        Map characters of each string through the given mapping table.
 
         Parameters
         ----------
diff --git a/xarray/tests/test_accessor_str.py b/xarray/tests/test_accessor_str.py
@@ -596,7 +596,7 @@ def test_wrap():
     )
 
     # expected values
-    xp = xr.DataArray(
+    expected = xr.DataArray(
         [
             "hello world",
             "hello world!",
@@ -610,15 +610,29 @@ def test_wrap():
         ]
     )
 
-    rs = values.str.wrap(12, break_long_words=True)
-    assert_equal(rs, xp)
+    result = values.str.wrap(12, break_long_words=True)
+    assert_equal(result, expected)
 
     # test with pre and post whitespace (non-unicode), NaN, and non-ascii
     # Unicode
     values = xr.DataArray(["  pre  ", "\xac\u20ac\U00008000 abadcafe"])
-    xp = xr.DataArray(["  pre", "\xac\u20ac\U00008000 ab\nadcafe"])
-    rs = values.str.wrap(6)
-    assert_equal(rs, xp)
+    expected = xr.DataArray(["  pre", "\xac\u20ac\U00008000 ab\nadcafe"])
+    result = values.str.wrap(6)
+    assert_equal(result, expected)
+
+
+def test_wrap_kwargs_passed():
+    # GH4334
+
+    values = xr.DataArray("  hello world  ")
+
+    result = values.str.wrap(7)
+    expected = xr.DataArray("  hello\nworld")
+    assert_equal(result, expected)
+
+    result = values.str.wrap(7, drop_whitespace=False)
+    expected = xr.DataArray("  hello\n world\n  ")
+    assert_equal(result, expected)
 
 
 def test_get(dtype):
@@ -642,6 +656,15 @@ def test_get(dtype):
     assert_equal(result, expected)
 
 
+def test_get_default(dtype):
+    # GH4334
+    values = xr.DataArray(["a_b", "c", ""]).astype(dtype)
+
+    result = values.str.get(2, "default")
+    expected = xr.DataArray(["b", "default", "default"]).astype(dtype)
+    assert_equal(result, expected)
+
+
 def test_encode_decode():
     data = xr.DataArray(["a", "b", "a\xe4"])
     encoded = data.str.encode("utf-8")