From 34aaaa520201b877749e38151139dca26b577c8b Mon Sep 17 00:00:00 2001 From: Linus <95619282+linus-md@users.noreply.github.com> Date: Tue, 12 Dec 2023 17:41:15 +0100 Subject: [PATCH 01/11] DOC: Add example with ``numpy_nullable`` to ``pd.read_xml()`` --- doc/source/user_guide/io.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 863a663fc2413..7ca5360faa69f 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3279,6 +3279,33 @@ of reading in Wikipedia's very large (12 GB+) latest article data dump. [3578765 rows x 3 columns] +The following is an example with ``use_nullable_dtypes=True``: + +.. code-block:: ipython + xml_data = """ + + + 0 + 1 + 2.5 + True + a + 2019-12-31 00:00:00 + + + 1 + 4.5 + False + b + 2019-12-31 00:00:00 + + + """ + df = df = pd.read_xml(StringIO(xml_data), dtype_backend="numpy_nullable", parse_dates=["e"]) + index a b c d e + 0 0 1 2.5 True a 2019-12-31 + 1 1 4.5 False b 2019-12-31 + .. _io.xml: Writing XML From 4b189db75f7b25d968779b0cb3a254502798bdf8 Mon Sep 17 00:00:00 2001 From: Linus Sommer <95619282+linus-md@users.noreply.github.com> Date: Tue, 12 Dec 2023 18:00:19 +0100 Subject: [PATCH 02/11] Update io.rst --- doc/source/user_guide/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 7ca5360faa69f..ab76e664c26b0 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3301,7 +3301,7 @@ The following is an example with ``use_nullable_dtypes=True``: """ - df = df = pd.read_xml(StringIO(xml_data), dtype_backend="numpy_nullable", parse_dates=["e"]) + df = pd.read_xml(StringIO(xml_data), dtype_backend="numpy_nullable", parse_dates=["e"]) index a b c d e 0 0 1 2.5 True a 2019-12-31 1 1 4.5 False b 2019-12-31 From 4d8f2a27eb33bc144d27720dbb64cae72aff9012 Mon Sep 17 00:00:00 2001 From: linus-md Date: Thu, 14 Dec 2023 13:51:24 +0100 Subject: [PATCH 03/11] Update io.rst --- doc/source/user_guide/io.rst | 37 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index ab76e664c26b0..0cf26242ad44d 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3283,25 +3283,26 @@ The following is an example with ``use_nullable_dtypes=True``: .. code-block:: ipython xml_data = """ - - - 0 - 1 - 2.5 - True - a - 2019-12-31 00:00:00 - - - 1 - 4.5 - False - b - 2019-12-31 00:00:00 - - - """ + + + 0 + 1 + 2.5 + True + a + 2019-12-31 00:00:00 + + + 1 + 4.5 + False + b + 2019-12-31 00:00:00 + + + """ df = pd.read_xml(StringIO(xml_data), dtype_backend="numpy_nullable", parse_dates=["e"]) + print(df) index a b c d e 0 0 1 2.5 True a 2019-12-31 1 1 4.5 False b 2019-12-31 From 0694d8eee9077da80ed07e5dcb90dd9867230821 Mon Sep 17 00:00:00 2001 From: linus-md Date: Thu, 14 Dec 2023 14:03:20 +0100 Subject: [PATCH 04/11] Update --- doc/source/user_guide/io.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 0cf26242ad44d..836855aaf248f 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3282,6 +3282,7 @@ of reading in Wikipedia's very large (12 GB+) latest article data dump. The following is an example with ``use_nullable_dtypes=True``: .. code-block:: ipython + xml_data = """ @@ -3303,9 +3304,6 @@ The following is an example with ``use_nullable_dtypes=True``: """ df = pd.read_xml(StringIO(xml_data), dtype_backend="numpy_nullable", parse_dates=["e"]) print(df) - index a b c d e - 0 0 1 2.5 True a 2019-12-31 - 1 1 4.5 False b 2019-12-31 .. _io.xml: From a4d41530c904f8e685433a86b9dbb21bd1c32eb3 Mon Sep 17 00:00:00 2001 From: linus-md Date: Fri, 15 Dec 2023 16:30:22 +0100 Subject: [PATCH 05/11] Move example --- doc/source/user_guide/io.rst | 26 -------------------------- pandas/io/xml.py | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 836855aaf248f..863a663fc2413 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3279,32 +3279,6 @@ of reading in Wikipedia's very large (12 GB+) latest article data dump. [3578765 rows x 3 columns] -The following is an example with ``use_nullable_dtypes=True``: - -.. code-block:: ipython - - xml_data = """ - - - 0 - 1 - 2.5 - True - a - 2019-12-31 00:00:00 - - - 1 - 4.5 - False - b - 2019-12-31 00:00:00 - - - """ - df = pd.read_xml(StringIO(xml_data), dtype_backend="numpy_nullable", parse_dates=["e"]) - print(df) - .. _io.xml: Writing XML diff --git a/pandas/io/xml.py b/pandas/io/xml.py index bd3b515dbca2f..27c8569b61e36 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -1126,6 +1126,33 @@ def read_xml( 0 square 360 4.0 1 circle 360 NaN 2 triangle 180 3.0 + + >>> xml_data = ''' + ... + ... + ... 0 + ... 1 + ... 2.5 + ... True + ... a + ... 2019-12-31 00:00:00 + ... + ... + ... 1 + ... 4.5 + ... False + ... b + ... 2019-12-31 00:00:00 + ... + ... + ... ''' + + >>> df = pd.read_xml(StringIO(xml_data), dtype_backend="numpy_nullable", parse_dates=["e"]) + >>> print(df) + index a b c d e + 0 0 1 2.5 True a 2019-12-31 + 1 1 4.5 False b 2019-12-31 + """ check_dtype_backend(dtype_backend) From fb948907b4e07376176c0a8b26c6d2a49804cb7a Mon Sep 17 00:00:00 2001 From: linus-md Date: Fri, 15 Dec 2023 16:36:11 +0100 Subject: [PATCH 06/11] fix line length --- pandas/io/xml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/xml.py b/pandas/io/xml.py index 27c8569b61e36..5ffbcea409c44 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -1147,7 +1147,8 @@ def read_xml( ... ... ''' - >>> df = pd.read_xml(StringIO(xml_data), dtype_backend="numpy_nullable", parse_dates=["e"]) + >>> df = pd.read_xml(StringIO(xml_data), dtype_backend="numpy_nullable", + ... parse_dates=["e"]) >>> print(df) index a b c d e 0 0 1 2.5 True a 2019-12-31 From 54aba27c8dd0fa235060737f4403f989d2462f82 Mon Sep 17 00:00:00 2001 From: linus-md Date: Fri, 15 Dec 2023 17:46:39 +0100 Subject: [PATCH 07/11] Remove trailing whitespace --- pandas/io/xml.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/xml.py b/pandas/io/xml.py index 5ffbcea409c44..75b8b545592c4 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -1153,7 +1153,6 @@ def read_xml( index a b c d e 0 0 1 2.5 True a 2019-12-31 1 1 4.5 False b 2019-12-31 - """ check_dtype_backend(dtype_backend) From 238c5b08ac9b3200a4e263eef0257232b912be9c Mon Sep 17 00:00:00 2001 From: Linus Sommer <95619282+linus-md@users.noreply.github.com> Date: Fri, 15 Dec 2023 18:16:00 +0100 Subject: [PATCH 08/11] Update xml.py --- pandas/io/xml.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/xml.py b/pandas/io/xml.py index 75b8b545592c4..07b418b4d9a97 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -1059,6 +1059,7 @@ def read_xml( Examples -------- >>> import io + >>> from io import StringIO >>> xml = ''' ... ... From 72dfe27044eea0a8259a62351bbb2b0fd515742f Mon Sep 17 00:00:00 2001 From: linus-md Date: Fri, 15 Dec 2023 18:22:02 +0100 Subject: [PATCH 09/11] fix imports --- pandas/io/xml.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/xml.py b/pandas/io/xml.py index 75b8b545592c4..fa863b0c3c15a 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -1058,7 +1058,7 @@ def read_xml( Examples -------- - >>> import io + >>> from io import StringIO >>> xml = ''' ... ... @@ -1078,7 +1078,7 @@ def read_xml( ... ... ''' - >>> df = pd.read_xml(io.StringIO(xml)) + >>> df = pd.read_xml(StringIO(xml)) >>> df shape degrees sides 0 square 360 4.0 @@ -1092,7 +1092,7 @@ def read_xml( ... ... ''' - >>> df = pd.read_xml(io.StringIO(xml), xpath=".//row") + >>> df = pd.read_xml(StringIO(xml), xpath=".//row") >>> df shape degrees sides 0 square 360 4.0 @@ -1118,7 +1118,7 @@ def read_xml( ... ... ''' - >>> df = pd.read_xml(io.StringIO(xml), + >>> df = pd.read_xml(StringIO(xml), ... xpath="//doc:row", ... namespaces={{"doc": "https://example.com"}}) >>> df From d6f30feaf81be25603828152be92d030d7bea23f Mon Sep 17 00:00:00 2001 From: linus-md Date: Fri, 15 Dec 2023 18:24:27 +0100 Subject: [PATCH 10/11] Update --- pandas/io/xml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/xml.py b/pandas/io/xml.py index fa863b0c3c15a..c82a5ad57837f 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -1149,7 +1149,7 @@ def read_xml( >>> df = pd.read_xml(StringIO(xml_data), dtype_backend="numpy_nullable", ... parse_dates=["e"]) - >>> print(df) + >>> df index a b c d e 0 0 1 2.5 True a 2019-12-31 1 1 4.5 False b 2019-12-31 From 78167edd64244305c4addc8d021ebc744a922644 Mon Sep 17 00:00:00 2001 From: linus-md Date: Sun, 17 Dec 2023 17:05:30 +0100 Subject: [PATCH 11/11] Formatting --- pandas/io/xml.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/xml.py b/pandas/io/xml.py index c82a5ad57837f..ac497cd266027 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -1147,7 +1147,8 @@ def read_xml( ... ... ''' - >>> df = pd.read_xml(StringIO(xml_data), dtype_backend="numpy_nullable", + >>> df = pd.read_xml(StringIO(xml_data), + ... dtype_backend="numpy_nullable", ... parse_dates=["e"]) >>> df index a b c d e