Update NA repr (#30821)

TomAugspurger · web-flow · commit 493363ef60dd · 2020-01-09T10:18:57.000-06:00
* Update NA repr Closes #30415
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -298,8 +298,11 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
         -k"-from_arrays -from_breaks -from_intervals -from_tuples -set_closed -to_tuples -interval_range"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    MSG='Doctests arrays/string_.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/arrays/string_.py
+    MSG='Doctests arrays'; echo $MSG
+    pytest -q --doctest-modules \
+        pandas/core/arrays/string_.py \
+        pandas/core/arrays/integer.py \
+        pandas/core/arrays/boolean.py
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests arrays/boolean.py' ; echo $MSG
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -1153,7 +1153,7 @@ To completely override the default values that are recognized as missing, specif
 .. _io.navaluesconst:
 
 The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A',
-'n/a', 'NA', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``.
+'n/a', 'NA', '<NA>', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``.
 
 Let us consider some examples:
 
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -576,6 +576,7 @@ Other API changes
   Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`)
 - When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`)
 - :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`).
+- Added ``<NA>`` to the list of default NA values for :meth:`read_csv` (:issue:`30821`)
 
 
 .. _whatsnew_100.api.documentation:
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -354,10 +354,7 @@ class NAType(C_NAType):
         return NAType._instance
 
     def __repr__(self) -> str:
-        return "NA"
-
-    def __str__(self) -> str:
-        return "NA"
+        return "<NA>"
 
     def __bool__(self):
         raise TypeError("boolean value of NA is ambiguous")
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -1369,6 +1369,7 @@ STR_NA_VALUES = {
     "N/A",
     "n/a",
     "NA",
+    "<NA>",
     "#NA",
     "NULL",
     "null",
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -244,7 +244,7 @@ class BooleanArray(BaseMaskedArray):
 
     >>> pd.array([True, False, None], dtype="boolean")
     <BooleanArray>
-    [True, False, NA]
+    [True, False, <NA>]
     Length: 3, dtype: boolean
     """
 
@@ -527,7 +527,7 @@ def any(self, skipna: bool = True, **kwargs):
         >>> pd.array([True, False, pd.NA]).any(skipna=False)
         True
         >>> pd.array([False, False, pd.NA]).any(skipna=False)
-        NA
+        <NA>
         """
         kwargs.pop("axis", None)
         nv.validate_any((), kwargs)
@@ -592,7 +592,7 @@ def all(self, skipna: bool = True, **kwargs):
         required (whether ``pd.NA`` is True or False influences the result):
 
         >>> pd.array([True, True, pd.NA]).all(skipna=False)
-        NA
+        <NA>
         >>> pd.array([True, False, pd.NA]).all(skipna=False)
         False
         """
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -301,19 +301,19 @@ class IntegerArray(BaseMaskedArray):
     >>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype())
     >>> int_array
     <IntegerArray>
-    [1, NaN, 3]
+    [1, <NA>, 3]
     Length: 3, dtype: Int32
 
     String aliases for the dtypes are also available. They are capitalized.
 
     >>> pd.array([1, None, 3], dtype='Int32')
     <IntegerArray>
-    [1, NaN, 3]
+    [1, <NA>, 3]
     Length: 3, dtype: Int32
 
     >>> pd.array([1, None, 3], dtype='UInt16')
     <IntegerArray>
-    [1, NaN, 3]
+    [1, <NA>, 3]
     Length: 3, dtype: UInt16
     """
 
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -131,7 +131,7 @@ class StringArray(PandasArray):
     --------
     >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string")
     <StringArray>
-    ['This is', 'some text', NA, 'data.']
+    ['This is', 'some text', <NA>, 'data.']
     Length: 4, dtype: string
 
     Unlike ``object`` dtype arrays, ``StringArray`` doesn't allow non-string
@@ -146,7 +146,7 @@ class StringArray(PandasArray):
 
     >>> pd.array(["a", None, "c"], dtype="string") == "a"
     <BooleanArray>
-    [True, NA, False]
+    [True, <NA>, False]
     Length: 3, dtype: boolean
     """
 
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1777,12 +1777,8 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs):
             values = values[slicer]
         mask = isna(values)
 
-        try:
-            values[mask] = na_rep
-        except Exception:
-            # eg SparseArray does not support setitem, needs to be converted to ndarray
-            return super().to_native_types(slicer, na_rep, quoting, **kwargs)
-        values = values.astype(str)
+        values = np.asarray(values.astype(object))
+        values[mask] = na_rep
 
         # we are expected to return a 2-d ndarray
         return values.reshape(1, len(values))
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1230,7 +1230,7 @@ def _format(x):
                     if x is None:
                         return "None"
                     elif x is NA:
-                        return "NA"
+                        return formatter(x)
                     elif x is NaT or np.isnat(x):
                         return "NaT"
                 except (TypeError, ValueError):
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
@@ -9,14 +9,16 @@
 import pandas._testing as tm
 
 
-def test_repr_with_NA():
-    a = pd.array(["a", pd.NA, "b"], dtype="string")
-    for obj in [a, pd.Series(a), pd.DataFrame({"a": a})]:
-        assert "NA" in repr(obj) and "NaN" not in repr(obj)
-        assert "NA" in str(obj) and "NaN" not in str(obj)
-        if hasattr(obj, "_repr_html_"):
-            html_repr = obj._repr_html_()
-            assert "NA" in html_repr and "NaN" not in html_repr
+def test_repr():
+    df = pd.DataFrame({"A": pd.array(["a", pd.NA, "b"], dtype="string")})
+    expected = "      A\n0     a\n1  <NA>\n2     b"
+    assert repr(df) == expected
+
+    expected = "0       a\n1    <NA>\n2       b\nName: A, dtype: string"
+    assert repr(df.A) == expected
+
+    expected = "<StringArray>\n['a', <NA>, 'b']\nLength: 3, dtype: string"
+    assert repr(df.A.array) == expected
 
 
 def test_none_to_nan():
diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py
@@ -251,6 +251,18 @@ def test_coerce_to_numpy_array():
         np.array(arr, dtype="bool")
 
 
+def test_repr():
+    df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")})
+    expected = "       A\n0   True\n1  False\n2   <NA>"
+    assert repr(df) == expected
+
+    expected = "0     True\n1    False\n2     <NA>\nName: A, dtype: boolean"
+    assert repr(df.A) == expected
+
+    expected = "<BooleanArray>\n[True, False, <NA>]\nLength: 3, dtype: boolean"
+    assert repr(df.A.array) == expected
+
+
 @pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
 def test_to_numpy(box):
     con = pd.Series if box else pd.array
@@ -335,7 +347,7 @@ def test_astype():
     tm.assert_numpy_array_equal(result, expected)
 
     result = arr.astype("str")
-    expected = np.array(["True", "False", "NA"], dtype="object")
+    expected = np.array(["True", "False", "<NA>"], dtype="object")
     tm.assert_numpy_array_equal(result, expected)
 
     # no missing values
diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
@@ -90,17 +90,17 @@ def test_repr_dtype(dtype, expected):
 
 def test_repr_array():
     result = repr(integer_array([1, None, 3]))
-    expected = "<IntegerArray>\n[1, NA, 3]\nLength: 3, dtype: Int64"
+    expected = "<IntegerArray>\n[1, <NA>, 3]\nLength: 3, dtype: Int64"
     assert result == expected
 
 
 def test_repr_array_long():
     data = integer_array([1, 2, None] * 1000)
     expected = (
         "<IntegerArray>\n"
-        "[ 1,  2, NA,  1,  2, NA,  1,  2, NA,  1,\n"
+        "[   1,    2, <NA>,    1,    2, <NA>,    1,    2, <NA>,    1,\n"
         " ...\n"
-        " NA,  1,  2, NA,  1,  2, NA,  1,  2, NA]\n"
+        " <NA>,    1,    2, <NA>,    1,    2, <NA>,    1,    2, <NA>]\n"
         "Length: 3000, dtype: Int64"
     )
     result = repr(data)
@@ -673,7 +673,7 @@ def test_to_numpy_na_raises(self, dtype):
 
     def test_astype_str(self):
         a = pd.array([1, 2, None], dtype="Int64")
-        expected = np.array(["1", "2", "NA"], dtype=object)
+        expected = np.array(["1", "2", "<NA>"], dtype=object)
 
         tm.assert_numpy_array_equal(a.astype(str), expected)
         tm.assert_numpy_array_equal(a.astype("str"), expected)
@@ -683,7 +683,7 @@ def test_frame_repr(data_missing):
 
     df = pd.DataFrame({"A": data_missing})
     result = repr(df)
-    expected = "    A\n0  NA\n1   1"
+    expected = "      A\n0  <NA>\n1     1"
     assert result == expected
 
 
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
@@ -89,6 +89,7 @@ def test_default_na_values(all_parsers):
         "N/A",
         "n/a",
         "NA",
+        "<NA>",
         "#NA",
         "NULL",
         "null",
diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py
@@ -16,8 +16,8 @@ def test_singleton():
 
 
 def test_repr():
-    assert repr(NA) == "NA"
-    assert str(NA) == "NA"
+    assert repr(NA) == "<NA>"
+    assert str(NA) == "<NA>"
 
 
 def test_truthiness():

-Original file line number
+Diff line change
     "N/A",
     "n/a",
     "NA",
 +    "<NA>",
     "#NA",
     "NULL",
     "null",