|
15 | 15 | # specific language governing permissions and limitations
|
16 | 16 | # under the License.
|
17 | 17 |
|
18 |
| -import pyarrow |
| 18 | +import numpy as np |
| 19 | +import pandas as pd |
| 20 | +import pyarrow as A |
19 | 21 |
|
20 |
| -class Conversions(object): |
| 22 | + |
| 23 | +class PyListConversions(object): |
| 24 | + param_names = ('size',) |
21 | 25 | params = (1, 10 ** 5, 10 ** 6, 10 ** 7)
|
22 | 26 |
|
| 27 | + def setup(self, n): |
| 28 | + self.data = list(range(n)) |
| 29 | + |
23 | 30 | def time_from_pylist(self, n):
|
24 |
| - pyarrow.from_pylist(list(range(n))) |
| 31 | + A.from_pylist(self.data) |
25 | 32 |
|
26 | 33 | def peakmem_from_pylist(self, n):
|
27 |
| - pyarrow.from_pylist(list(range(n))) |
| 34 | + A.from_pylist(self.data) |
| 35 | + |
| 36 | + |
| 37 | +class PandasConversionsBase(object): |
| 38 | + def setup(self, n, dtype): |
| 39 | + if dtype == 'float64_nans': |
| 40 | + arr = np.arange(n).astype('float64') |
| 41 | + arr[arr % 10 == 0] = np.nan |
| 42 | + else: |
| 43 | + arr = np.arange(n).astype(dtype) |
| 44 | + self.data = pd.DataFrame({'column': arr}) |
| 45 | + |
| 46 | + |
| 47 | +class PandasConversionsToArrow(PandasConversionsBase): |
| 48 | + param_names = ('size', 'dtype') |
| 49 | + params = ((1, 10 ** 5, 10 ** 6, 10 ** 7), ('int64', 'float64', 'float64_nans', 'str')) |
| 50 | + |
| 51 | + def time_from_series(self, n, dtype): |
| 52 | + A.from_pandas_dataframe(self.data) |
| 53 | + |
| 54 | + def peakmem_from_series(self, n, dtype): |
| 55 | + A.from_pandas_dataframe(self.data) |
| 56 | + |
| 57 | + |
| 58 | +class PandasConversionsFromArrow(PandasConversionsBase): |
| 59 | + param_names = ('size', 'dtype') |
| 60 | + params = ((1, 10 ** 5, 10 ** 6, 10 ** 7), ('int64', 'float64', 'float64_nans', 'str')) |
| 61 | + |
| 62 | + def setup(self, n, dtype): |
| 63 | + super(PandasConversionsFromArrow, self).setup(n, dtype) |
| 64 | + self.arrow_data = A.from_pandas_dataframe(self.data) |
| 65 | + |
| 66 | + def time_to_series(self, n, dtype): |
| 67 | + self.arrow_data.to_pandas() |
| 68 | + |
| 69 | + def peakmem_to_series(self, n, dtype): |
| 70 | + self.arrow_data.to_pandas() |
| 71 | + |
28 | 72 |
|
29 | 73 | class ScalarAccess(object):
|
| 74 | + param_names = ('size',) |
30 | 75 | params = (1, 10 ** 5, 10 ** 6, 10 ** 7)
|
31 | 76 |
|
32 | 77 | def setUp(self, n):
|
33 |
| - self._array = pyarrow.from_pylist(list(range(n))) |
| 78 | + self._array = A.from_pylist(list(range(n))) |
34 | 79 |
|
35 | 80 | def time_as_py(self, n):
|
36 | 81 | for i in range(n):
|
|
0 commit comments