From 59903c966fa26ae705bc004275a555824e5bf717 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 6 May 2023 12:50:33 +0200 Subject: [PATCH 1/2] REGR: read_sql dropping duplicated columns --- doc/source/whatsnew/v2.0.2.rst | 1 + pandas/io/sql.py | 4 +++- pandas/tests/io/test_sql.py | 12 ++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index 1bc2fda7b8af9..baa61355f2528 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -13,6 +13,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Fixed regression in :func:`read_sql` dropping duplicated columns (:issue:`53117`) - Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`) - Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`) - Fixed regression in :meth:`MultiIndex.join` returning levels in wrong order (:issue:`53093`) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 6a161febfe316..ebb994f92d8ad 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -161,7 +161,9 @@ def _convert_arrays_to_dataframe( ArrowExtensionArray(pa.array(arr, from_pandas=True)) for arr in arrays ] if arrays: - return DataFrame(dict(zip(columns, arrays))) + df = DataFrame(dict(zip(list(range(len(columns))), arrays))) + df.columns = columns + return df else: return DataFrame(columns=columns) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 035fda29f8ffb..77e7e6f8d6c41 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1492,6 +1492,18 @@ def test_escaped_table_name(self): tm.assert_frame_equal(res, df) + def test_read_sql_duplicate_columns(self): + # GH#53117 + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": 1}) + df.to_sql("test_table", self.conn, index=False) + + result = pd.read_sql("SELECT a, b, a +1 as a, c FROM test_table;", self.conn) + expected = DataFrame( + [[1, 0.1, 2, 1], [2, 0.2, 3, 1], [3, 0.3, 4, 1]], + columns=["a", "b", "a", "c"], + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.skipif(not SQLALCHEMY_INSTALLED, reason="SQLAlchemy not installed") class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi): From cd9130a34e4912e8e52f49ef70ab23ae5976d3b4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 8 May 2023 12:33:36 +0200 Subject: [PATCH 2/2] Update doc/source/whatsnew/v2.0.2.rst --- doc/source/whatsnew/v2.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index baa61355f2528..c234de3e3b3ae 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -13,7 +13,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- Fixed regression in :func:`read_sql` dropping duplicated columns (:issue:`53117`) +- Fixed regression in :func:`read_sql` dropping columns with duplicated column names (:issue:`53117`) - Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`) - Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`) - Fixed regression in :meth:`MultiIndex.join` returning levels in wrong order (:issue:`53093`)