Skip to content

Commit b3ebce1

Browse files
xhochywesm
authored andcommitted
ARROW-89: [Python] Add benchmarks for Arrow<->Pandas conversion
Author: Uwe L. Korn <[email protected]> Closes #51 from xhochy/arrow-89 and squashes the following commits: bd6a7cb [Uwe L. Korn] Split benchmarks and add one for a float64 column with NaNs 8f74528 [Uwe L. Korn] ARROW-89: [Python] Add benchmarks for Arrow<->Pandas conversion
1 parent 5a68f8d commit b3ebce1

File tree

1 file changed

+50
-5
lines changed

1 file changed

+50
-5
lines changed

python/benchmarks/array.py

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,67 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
import pyarrow
18+
import numpy as np
19+
import pandas as pd
20+
import pyarrow as A
1921

20-
class Conversions(object):
22+
23+
class PyListConversions(object):
24+
param_names = ('size',)
2125
params = (1, 10 ** 5, 10 ** 6, 10 ** 7)
2226

27+
def setup(self, n):
28+
self.data = list(range(n))
29+
2330
def time_from_pylist(self, n):
24-
pyarrow.from_pylist(list(range(n)))
31+
A.from_pylist(self.data)
2532

2633
def peakmem_from_pylist(self, n):
27-
pyarrow.from_pylist(list(range(n)))
34+
A.from_pylist(self.data)
35+
36+
37+
class PandasConversionsBase(object):
38+
def setup(self, n, dtype):
39+
if dtype == 'float64_nans':
40+
arr = np.arange(n).astype('float64')
41+
arr[arr % 10 == 0] = np.nan
42+
else:
43+
arr = np.arange(n).astype(dtype)
44+
self.data = pd.DataFrame({'column': arr})
45+
46+
47+
class PandasConversionsToArrow(PandasConversionsBase):
48+
param_names = ('size', 'dtype')
49+
params = ((1, 10 ** 5, 10 ** 6, 10 ** 7), ('int64', 'float64', 'float64_nans', 'str'))
50+
51+
def time_from_series(self, n, dtype):
52+
A.from_pandas_dataframe(self.data)
53+
54+
def peakmem_from_series(self, n, dtype):
55+
A.from_pandas_dataframe(self.data)
56+
57+
58+
class PandasConversionsFromArrow(PandasConversionsBase):
59+
param_names = ('size', 'dtype')
60+
params = ((1, 10 ** 5, 10 ** 6, 10 ** 7), ('int64', 'float64', 'float64_nans', 'str'))
61+
62+
def setup(self, n, dtype):
63+
super(PandasConversionsFromArrow, self).setup(n, dtype)
64+
self.arrow_data = A.from_pandas_dataframe(self.data)
65+
66+
def time_to_series(self, n, dtype):
67+
self.arrow_data.to_pandas()
68+
69+
def peakmem_to_series(self, n, dtype):
70+
self.arrow_data.to_pandas()
71+
2872

2973
class ScalarAccess(object):
74+
param_names = ('size',)
3075
params = (1, 10 ** 5, 10 ** 6, 10 ** 7)
3176

3277
def setUp(self, n):
33-
self._array = pyarrow.from_pylist(list(range(n)))
78+
self._array = A.from_pylist(list(range(n)))
3479

3580
def time_as_py(self, n):
3681
for i in range(n):

0 commit comments

Comments
 (0)