Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@ jobs:
if: startswith(matrix.os, 'macos')
run: brew install libomp

# xcode 15.2 throws compilation errors ref https://github.com/vaexio/vaex/pull/2432
# select older xcode from the available versions on the runner ref https://github.com/actions/runner-images/blob/ff9acc6/images/macos/macos-13-Readme.md#xcode
- name: Switch to older Xcode (Mac-only)
if: startswith(matrix.os, 'macos')
run: sudo xcode-select -s "/Applications/Xcode_15.0.1.app"

- name: Copy dll (Windows-only)
if: (matrix.os == 'windows-latest')
uses: ./ci/actions/windll
Expand Down
15 changes: 6 additions & 9 deletions .github/workflows/wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
with:
python-version: "3.x"

- run: pip install cibuildwheel==2.17.0 # sync version with pypa/cibuildwheel below
- run: pip install cibuildwheel==2.23.3 # sync version with pypa/cibuildwheel below

- id: set-matrix
env:
Expand Down Expand Up @@ -86,19 +86,16 @@ jobs:
if: runner.os == 'Linux'
uses: docker/setup-qemu-action@v3

# xcode 15.2 throws compilation errors ref https://github.com/vaexio/vaex/pull/2432
# select older xcode from the available versions on the runner ref https://github.com/actions/runner-images/blob/ff9acc6/images/macos/macos-13-Readme.md#xcode
- name: Switch to older Xcode (Mac-only)
if: startswith(matrix.os, 'macos')
run: sudo xcode-select -s "/Applications/Xcode_15.0.1.app"

- uses: pypa/[email protected] # sync version with pip install cibuildwheel above
- uses: pypa/[email protected] # sync version with pip install cibuildwheel above
with:
only: ${{ matrix.only }}
package-dir: packages/vaex-core/
output-dir: packages/vaex-core/dist/
env:
CIBW_BEFORE_BUILD: ${{ startswith(matrix.os, 'ubuntu') && 'bash bin/install_pcre.sh' || startswith(matrix.os, 'macos') && 'sudo -E bash bin/install_pcre.sh' || '' }}
# Installation of cmake on musllinux requires python3-dev and openssl-dev to be present
CIBW_BEFORE_BUILD_LINUX: bash -c "./bin/install_pcre.sh && if command -v apk; then apk add python3-dev openssl-dev; fi"
CIBW_BEFORE_BUILD_MACOS: sudo -E bash bin/install_pcre.sh
#CIBW_BEFORE_BUILD_WINDOWS:
CIBW_BUILD_VERBOSITY: 2
# temporary ref https://github.com/oconnor663/blake3-py/pull/45
CIBW_BEFORE_TEST: pip install --force-reinstall blake3 --find-links https://github.com/ddelange/blake3-py/releases/expanded_assets/0.4.1
Expand Down
2 changes: 1 addition & 1 deletion bin/install_pcre.sh
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ function install_precompiled() {
apt update
apt install -y libpcre3-dev
elif [ -n "$(which apk)" ]; then
apk add --update pcre
apk add --update pcre pcre-dev
elif [ -n "$(which dnf)" ]; then
dnf --setopt install_weak_deps=false -y install pcre
else
Expand Down
63 changes: 31 additions & 32 deletions docs/source/tutorial_ml.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1190,7 +1190,7 @@
"source": [
"## Supervised learning\n",
"\n",
"While `vaex.ml` does not yet implement any supervised machine learning models, it does provide wrappers to several popular libraries such as [scikit-learn](https://scikit-learn.org/), [XGBoost](https://xgboost.readthedocs.io/), [LightGBM](https://lightgbm.readthedocs.io/) and [CatBoost](https://catboost.ai/) (latter only with numpy < 2). \n",
"While `vaex.ml` does not yet implement any supervised machine learning models, it does provide wrappers to several popular libraries such as [scikit-learn](https://scikit-learn.org/), [XGBoost](https://xgboost.readthedocs.io/), [LightGBM](https://lightgbm.readthedocs.io/) and [CatBoost](https://catboost.ai/). \n",
"\n",
"The main benefit of these wrappers is that they turn the models into `vaex.ml` transformers. This means the models become part of the DataFrame _state_ and thus can be serialized, and their predictions can be returned as _virtual columns_. This is especially useful for creating various diagnostic plots and evaluating performance metrics at no memory cost, as well as building ensembles. \n",
"\n",
Expand Down Expand Up @@ -1467,7 +1467,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### `CatBoost` example (numpy < 2 only)\n",
"### `CatBoost` example\n",
"\n",
"The CatBoost library supports summing up models. With this feature, we can use CatBoost to train a model using data that is otherwise too large to fit in memory. The idea is to train a single CatBoost model per chunk of data, and than sum up the invidiual models to create a master model. To use this feature via `vaex.ml` just specify the `batch_size` argument in the `CatBoostModel` wrapper. One can also specify additional options such as the strategy on how to sum up the individual models, or how they should be weighted."
]
Expand Down Expand Up @@ -1543,36 +1543,35 @@
],
"source": [
"import numpy as np\\n",
"if np.lib.NumpyVersion(np.__version__) < '2.0.0':\n",
" from vaex.ml.catboost import CatBoostModel\n",
"\n",
" df = vaex.datasets.iris_1e8()\n",
" df_train, df_test = df.ml.train_test_split(test_size=0.2, verbose=False)\n",
"\n",
" features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n",
" target = 'class_'\n",
"\n",
" params = {\n",
" 'leaf_estimation_method': 'Gradient',\n",
" 'learning_rate': 0.1,\n",
" 'max_depth': 3,\n",
" 'bootstrap_type': 'Bernoulli',\n",
" 'subsample': 0.8,\n",
" 'sampling_frequency': 'PerTree',\n",
" 'colsample_bylevel': 0.8,\n",
" 'reg_lambda': 1,\n",
" 'objective': 'MultiClass',\n",
" 'eval_metric': 'MultiClass',\n",
" 'random_state': 42,\n",
" 'verbose': 0,\n",
" }\n",
"\n",
" booster = CatBoostModel(features=features, target=target, num_boost_round=23, \n",
" params=params, prediction_type='Class', batch_size=11_000_000)\n",
" booster.fit(df=df_train, progress='widget')\n",
"\n",
" df_test = booster.transform(df_train)\n",
" df_test"
"from vaex.ml.catboost import CatBoostModel\n",
"\n",
"df = vaex.datasets.iris_1e8()\n",
"df_train, df_test = df.ml.train_test_split(test_size=0.2, verbose=False)\n",
"\n",
"features = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n",
"target = 'class_'\n",
"\n",
"params = {\n",
" 'leaf_estimation_method': 'Gradient',\n",
" 'learning_rate': 0.1,\n",
" 'max_depth': 3,\n",
" 'bootstrap_type': 'Bernoulli',\n",
" 'subsample': 0.8,\n",
" 'sampling_frequency': 'PerTree',\n",
" 'colsample_bylevel': 0.8,\n",
" 'reg_lambda': 1,\n",
" 'objective': 'MultiClass',\n",
" 'eval_metric': 'MultiClass',\n",
" 'random_state': 42,\n",
" 'verbose': 0,\n",
"}\n",
"\n",
"booster = CatBoostModel(features=features, target=target, num_boost_round=23, \n",
" params=params, prediction_type='Class', batch_size=11_000_000)\n",
"booster.fit(df=df_train, progress='widget')\n",
"\n",
"df_test = booster.transform(df_train)\n",
"df_test"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion packages/vaex-arrow/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
license = 'MIT'
version = version.__version__
url = 'https://www.github.com/maartenbreddels/vaex'
install_requires = ["vaex-core>=2.0.0,<3", "astropy>=2", "matplotlib>=1.3.1", "pillow", "pyarrow>=0.15"]
install_requires = ["vaex-core>=2.0.0,<3", "astropy>=2", "matplotlib>=1.3.1", "pillow"]

setup(name=name + '-arrow',
version=version,
Expand Down
4 changes: 2 additions & 2 deletions packages/vaex-core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ find_package(NumPy REQUIRED)


if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
SET(CMAKE_CXX_FLAGS --std=c++11)
SET(CMAKE_CXX_FLAGS "--std=c++11 -Wno-enum-constexpr-conversion")
endif()

IF(CMAKE_COMPILER_IS_GNUCC)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color -fmax-errors=4 -std=c++11 -Werror=return-type -Wno-unused-parameter -O3 -funroll-loops")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color -fmax-errors=4 -std=c++11 -Werror=return-type -Wno-enum-constexpr-conversion -Wno-unused-parameter -O3 -funroll-loops")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0")
ENDIF(CMAKE_COMPILER_IS_GNUCC)
Expand Down
3 changes: 3 additions & 0 deletions packages/vaex-core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
"numpy>=1.19.3,<3", # 1.19.3 is the first version with 3.9 wheels
"pandas>=1.0,<3",
"pyarrow>=5.0.0",
# windows only: pyarrow<21.0.0 -- https://github.com/apache/arrow/issues/47234
"pyarrow<21.0.0;platform_system=='Windows'",
"pydantic>=1.8.0",
"pyyaml",
"rich",
Expand Down Expand Up @@ -128,6 +130,7 @@ def __str__(self):
extra_compile_args += ["-DNDEBUG"]
if sys.platform == "darwin":
extra_compile_args.append("-mmacosx-version-min=10.9")
extra_compile_args.append("-Wno-enum-constexpr-conversion")



Expand Down
6 changes: 4 additions & 2 deletions packages/vaex-ml/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
'scikit-learn',
'xgboost',
'lightgbm~=4.0',
'catboost',
'catboost>=1.2.8',
]
extras_require_ml = {'all': ['tensorflow~=2.18']}

# https://stackoverflow.com/questions/79744362/import-tensorflow-statement-crashes-or-hangs-on-macos/79744363#79744363
extras_require_ml = {'all': ['tensorflow~=2.18',"tensorflow<2.20.0;sys_platform=='darwin'"]}

setup(name=name + '-ml',
version=version,
Expand Down
Loading
Loading