Skip to content

Commit e6c1113

Browse files
mathausekeewis
andauthored
fix some str accessor inconsistencies (#4339)
* add testst * update code * whats new * adapt some fnc descriptions * Apply suggestions from code review Co-authored-by: keewis <[email protected]> * revert import switch * fix whats new Co-authored-by: keewis <[email protected]>
1 parent 3b5a8ee commit e6c1113

File tree

3 files changed

+58
-48
lines changed

3 files changed

+58
-48
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ Bug fixes
5555
~~~~~~~~~
5656
- Fixed a bug in backend caused by basic installation of Dask (:issue:`4164`, :pull:`4318`)
5757
`Sam Morley <https://github.com/inakleinbottle>`_.
58+
- Fixed inconsistencies between docstring and functionality for :py:meth:`DataArray.str.get`
59+
and :py:meth:`DataArray.str.wrap` (:issue:`4334`). By `Mathias Hauser <https://github.com/mathause>`_.
5860

5961

6062
Documentation

xarray/core/accessor_str.py

Lines changed: 27 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def _apply(self, f, dtype=None):
9090

9191
def len(self):
9292
"""
93-
Compute the length of each element in the array.
93+
Compute the length of each string in the array.
9494
9595
Returns
9696
-------
@@ -104,9 +104,9 @@ def __getitem__(self, key):
104104
else:
105105
return self.get(key)
106106

107-
def get(self, i):
107+
def get(self, i, default=""):
108108
"""
109-
Extract element from indexable in each element in the array.
109+
Extract character number `i` from each string in the array.
110110
111111
Parameters
112112
----------
@@ -120,12 +120,18 @@ def get(self, i):
120120
-------
121121
items : array of objects
122122
"""
123-
obj = slice(-1, None) if i == -1 else slice(i, i + 1)
124-
return self._apply(lambda x: x[obj])
123+
s = slice(-1, None) if i == -1 else slice(i, i + 1)
124+
125+
def f(x):
126+
item = x[s]
127+
128+
return item if item else default
129+
130+
return self._apply(f)
125131

126132
def slice(self, start=None, stop=None, step=None):
127133
"""
128-
Slice substrings from each element in the array.
134+
Slice substrings from each string in the array.
129135
130136
Parameters
131137
----------
@@ -359,7 +365,7 @@ def count(self, pat, flags=0):
359365

360366
def startswith(self, pat):
361367
"""
362-
Test if the start of each string element matches a pattern.
368+
Test if the start of each string in the array matches a pattern.
363369
364370
Parameters
365371
----------
@@ -378,7 +384,7 @@ def startswith(self, pat):
378384

379385
def endswith(self, pat):
380386
"""
381-
Test if the end of each string element matches a pattern.
387+
Test if the end of each string in the array matches a pattern.
382388
383389
Parameters
384390
----------
@@ -432,8 +438,7 @@ def pad(self, width, side="left", fillchar=" "):
432438

433439
def center(self, width, fillchar=" "):
434440
"""
435-
Filling left and right side of strings in the array with an
436-
additional character.
441+
Pad left and right side of each string in the array.
437442
438443
Parameters
439444
----------
@@ -451,8 +456,7 @@ def center(self, width, fillchar=" "):
451456

452457
def ljust(self, width, fillchar=" "):
453458
"""
454-
Filling right side of strings in the array with an additional
455-
character.
459+
Pad right side of each string in the array.
456460
457461
Parameters
458462
----------
@@ -470,7 +474,7 @@ def ljust(self, width, fillchar=" "):
470474

471475
def rjust(self, width, fillchar=" "):
472476
"""
473-
Filling left side of strings in the array with an additional character.
477+
Pad left side of each string in the array.
474478
475479
Parameters
476480
----------
@@ -488,7 +492,7 @@ def rjust(self, width, fillchar=" "):
488492

489493
def zfill(self, width):
490494
"""
491-
Pad strings in the array by prepending '0' characters.
495+
Pad each string in the array by prepending '0' characters.
492496
493497
Strings in the array are padded with '0' characters on the
494498
left of the string to reach a total string length `width`. Strings
@@ -508,7 +512,7 @@ def zfill(self, width):
508512

509513
def contains(self, pat, case=True, flags=0, regex=True):
510514
"""
511-
Test if pattern or regex is contained within a string of the array.
515+
Test if pattern or regex is contained within each string of the array.
512516
513517
Return boolean array based on whether a given pattern or regex is
514518
contained within a string of the array.
@@ -554,7 +558,7 @@ def contains(self, pat, case=True, flags=0, regex=True):
554558

555559
def match(self, pat, case=True, flags=0):
556560
"""
557-
Determine if each string matches a regular expression.
561+
Determine if each string in the array matches a regular expression.
558562
559563
Parameters
560564
----------
@@ -613,7 +617,7 @@ def strip(self, to_strip=None, side="both"):
613617

614618
def lstrip(self, to_strip=None):
615619
"""
616-
Remove leading and trailing characters.
620+
Remove leading characters.
617621
618622
Strip whitespaces (including newlines) or a set of specified characters
619623
from each string in the array from the left side.
@@ -633,7 +637,7 @@ def lstrip(self, to_strip=None):
633637

634638
def rstrip(self, to_strip=None):
635639
"""
636-
Remove leading and trailing characters.
640+
Remove trailing characters.
637641
638642
Strip whitespaces (including newlines) or a set of specified characters
639643
from each string in the array from the right side.
@@ -653,8 +657,7 @@ def rstrip(self, to_strip=None):
653657

654658
def wrap(self, width, **kwargs):
655659
"""
656-
Wrap long strings in the array to be formatted in paragraphs with
657-
length less than a given width.
660+
Wrap long strings in the array in paragraphs with length less than `width`.
658661
659662
This method has the same keyword parameters and defaults as
660663
:class:`textwrap.TextWrapper`.
@@ -663,38 +666,20 @@ def wrap(self, width, **kwargs):
663666
----------
664667
width : int
665668
Maximum line-width
666-
expand_tabs : bool, optional
667-
If true, tab characters will be expanded to spaces (default: True)
668-
replace_whitespace : bool, optional
669-
If true, each whitespace character (as defined by
670-
string.whitespace) remaining after tab expansion will be replaced
671-
by a single space (default: True)
672-
drop_whitespace : bool, optional
673-
If true, whitespace that, after wrapping, happens to end up at the
674-
beginning or end of a line is dropped (default: True)
675-
break_long_words : bool, optional
676-
If true, then words longer than width will be broken in order to
677-
ensure that no lines are longer than width. If it is false, long
678-
words will not be broken, and some lines may be longer than width.
679-
(default: True)
680-
break_on_hyphens : bool, optional
681-
If true, wrapping will occur preferably on whitespace and right
682-
after hyphens in compound words, as it is customary in English. If
683-
false, only whitespaces will be considered as potentially good
684-
places for line breaks, but you need to set break_long_words to
685-
false if you want truly insecable words. (default: True)
669+
**kwargs
670+
keyword arguments passed into :class:`textwrap.TextWrapper`.
686671
687672
Returns
688673
-------
689674
wrapped : same type as values
690675
"""
691-
tw = textwrap.TextWrapper(width=width)
676+
tw = textwrap.TextWrapper(width=width, **kwargs)
692677
f = lambda x: "\n".join(tw.wrap(x))
693678
return self._apply(f)
694679

695680
def translate(self, table):
696681
"""
697-
Map all characters in the string through the given mapping table.
682+
Map characters of each string through the given mapping table.
698683
699684
Parameters
700685
----------

xarray/tests/test_accessor_str.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ def test_wrap():
596596
)
597597

598598
# expected values
599-
xp = xr.DataArray(
599+
expected = xr.DataArray(
600600
[
601601
"hello world",
602602
"hello world!",
@@ -610,15 +610,29 @@ def test_wrap():
610610
]
611611
)
612612

613-
rs = values.str.wrap(12, break_long_words=True)
614-
assert_equal(rs, xp)
613+
result = values.str.wrap(12, break_long_words=True)
614+
assert_equal(result, expected)
615615

616616
# test with pre and post whitespace (non-unicode), NaN, and non-ascii
617617
# Unicode
618618
values = xr.DataArray([" pre ", "\xac\u20ac\U00008000 abadcafe"])
619-
xp = xr.DataArray([" pre", "\xac\u20ac\U00008000 ab\nadcafe"])
620-
rs = values.str.wrap(6)
621-
assert_equal(rs, xp)
619+
expected = xr.DataArray([" pre", "\xac\u20ac\U00008000 ab\nadcafe"])
620+
result = values.str.wrap(6)
621+
assert_equal(result, expected)
622+
623+
624+
def test_wrap_kwargs_passed():
625+
# GH4334
626+
627+
values = xr.DataArray(" hello world ")
628+
629+
result = values.str.wrap(7)
630+
expected = xr.DataArray(" hello\nworld")
631+
assert_equal(result, expected)
632+
633+
result = values.str.wrap(7, drop_whitespace=False)
634+
expected = xr.DataArray(" hello\n world\n ")
635+
assert_equal(result, expected)
622636

623637

624638
def test_get(dtype):
@@ -642,6 +656,15 @@ def test_get(dtype):
642656
assert_equal(result, expected)
643657

644658

659+
def test_get_default(dtype):
660+
# GH4334
661+
values = xr.DataArray(["a_b", "c", ""]).astype(dtype)
662+
663+
result = values.str.get(2, "default")
664+
expected = xr.DataArray(["b", "default", "default"]).astype(dtype)
665+
assert_equal(result, expected)
666+
667+
645668
def test_encode_decode():
646669
data = xr.DataArray(["a", "b", "a\xe4"])
647670
encoded = data.str.encode("utf-8")

0 commit comments

Comments
 (0)