diff --git a/Project.toml b/Project.toml index 1827bb4e5..8dac6decf 100644 --- a/Project.toml +++ b/Project.toml @@ -1,13 +1,14 @@ name = "MLJ" uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" authors = ["Anthony D. Blaom "] -version = "0.20.5" +version = "0.20.6" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +FeatureSelection = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MLJBalancing = "45f359ea-796d-4f51-95a5-deb1a414c586" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" @@ -31,12 +32,13 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" CategoricalArrays = "0.8,0.9, 0.10" ComputationalResources = "0.3" Distributions = "0.21,0.22,0.23, 0.24, 0.25" +FeatureSelection = "0.1.1" MLJBalancing = "0.1" MLJBase = "1" MLJEnsembles = "0.4" MLJFlow = "0.5" MLJIteration = "0.6" -MLJModels = "0.16" +MLJModels = "0.17" MLJTestIntegration = "0.5.0" MLJTuning = "0.8" OpenML = "0.2,0.3" @@ -89,4 +91,41 @@ SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["BetaML", "CatBoost", "EvoLinear", "EvoTrees", "Imbalance", "InteractiveUtils", "LightGBM", "MLJClusteringInterface", "MLJDecisionTreeInterface", "MLJFlux", "MLJGLMInterface", "MLJLIBSVMInterface", "MLJLinearModels", "MLJMultivariateStatsInterface", "MLJNaiveBayesInterface", "MLJScikitLearnInterface", "MLJTSVDInterface", "MLJTestInterface", "MLJTestIntegration", "MLJText", "MLJXGBoostInterface", "Markdown", "NearestNeighborModels", "OneRule", "OutlierDetectionNeighbors", "OutlierDetectionPython", "ParallelKMeans", "PartialLeastSquaresRegressor", "PartitionedLS", "SelfOrganizingMaps", "SIRUS", "SymbolicRegression", "StableRNGs", "Suppressor","Test"] +test = [ + "BetaML", + "CatBoost", + "EvoLinear", + "EvoTrees", + "Imbalance", + "InteractiveUtils", + "LightGBM", + "MLJClusteringInterface", + "MLJDecisionTreeInterface", + "MLJFlux", + "MLJGLMInterface", + "MLJLIBSVMInterface", + "MLJLinearModels", + "MLJMultivariateStatsInterface", + "MLJNaiveBayesInterface", + "MLJScikitLearnInterface", + "MLJTSVDInterface", + "MLJTestInterface", + "MLJTestIntegration", + "MLJText", + "MLJXGBoostInterface", + "Markdown", + "NearestNeighborModels", + "OneRule", + "OutlierDetectionNeighbors", + "OutlierDetectionPython", + "ParallelKMeans", + "PartialLeastSquaresRegressor", + "PartitionedLS", + "SelfOrganizingMaps", + "SIRUS", + "SymbolicRegression", + "StableRNGs", + "Suppressor", + "Test", +] + diff --git a/docs/ModelDescriptors.toml b/docs/ModelDescriptors.toml index d134e5bef..3d301ae63 100644 --- a/docs/ModelDescriptors.toml +++ b/docs/ModelDescriptors.toml @@ -11,7 +11,9 @@ AutoEncoder_BetaML = ["dimension_reduction"] BM25Transformer_MLJText = ["encoders", "text_analysis"] BaggingClassifier_MLJScikitLearnInterface = ["classification", "ensemble_models"] BaggingRegressor_MLJScikitLearnInterface = ["regression", "ensemble_models"] -BalancedBaggingClassifier_MLJBalancing = ["class_imbalance", "classification"] +BalancedBaggingClassifier_MLJBalancing = ["class_imbalance", "classification", "meta_algorithms"] +BinaryThresholdPredictor_MLJModels = ["meta_algorithms", "classification"] +BalancedModel_MLJBalancing = ["class_imbalance", "meta_algorithms"] BayesianLDA_MultivariateStats = ["dimension_reduction", "classification", "Bayesian_models"] BayesianLDA_MLJScikitLearnInterface = ["dimension_reduction", "classification", "Bayesian_models"] BayesianQDA_MLJScikitLearnInterface = ["dimension_reduction", "classification", "Bayesian_models"] @@ -52,6 +54,7 @@ ElasticNetCVRegressor_MLJScikitLearnInterface = ["regression"] ElasticNetRegressor_MLJLinearModels = ["regression"] ElasticNetRegressor_MLJScikitLearnInterface = ["regression"] ENNUndersampler_Imbalance = ["class_imbalance"] +EnsembleModel_MLJEnsembles = ["ensemble_models", "meta_algorithms"] EpsilonSVR_LIBSVM = ["regression"] EvoLinearRegressor_EvoLinear = ["regression"] EvoTreeClassifier_EvoTrees = ["classification", "ensemble_models", "iterative_models"] @@ -63,8 +66,8 @@ EvoSplineRegressor_EvoLinear = ["regression", "ensemble_models", "iterative_mode ExtraTreesClassifier_MLJScikitLearnInterface = ["classification", "iterative_models"] ExtraTreesRegressor_MLJScikitLearnInterface = ["regression", "iterative_models"] FactorAnalysis_MultivariateStats = ["dimension_reduction", ] -FeatureAgglomeration_MLJScikitLearnInterface = ["clustering", "static_models"] -FeatureSelector_MLJModels = ["dimension_reduction", ] +FeatureAgglomeration_MLJScikitLearnInterface = ["clustering", "static_models", "feature_engineering"] +FeatureSelector_FeatureSelection = ["dimension_reduction", "feature_engineering"] FillImputer_MLJModels = ["missing_value_imputation", ] GaussianMixtureClusterer_BetaML = ["clustering", "distribution_fitter"] GaussianMixtureImputer_BetaML = ["missing_value_imputation", "distribution_fitter"] @@ -88,7 +91,8 @@ ICA_MultivariateStats = ["encoders"] IForestDetector_OutlierDetectionPython = ["outlier_detection"] ImageClassifier_MLJFlux = ["classification", "image_processing", "iterative_models"] INNEDetector_OutlierDetectionPython = ["outlier_detection"] -InteractionTransformer_MLJModels = ["static_models"] +InteractionTransformer_MLJModels = ["static_models", "feature_engineering"] +IteratedModel_MLJIteration = ["iterative_models", "meta_algorithms"] KDEDetector_OutlierDetectionPython = ["outlier_detection"] KMeansClusterer_BetaML = ["clustering"] KMeans_Clustering = ["clustering", "dimension_reduction", ] @@ -104,7 +108,7 @@ KNeighborsClassifier_MLJScikitLearnInterface = ["classification"] KNeighborsRegressor_MLJScikitLearnInterface = ["regression"] KPLSRegressor_PartialLeastSquaresRegressor = ["regression"] KernelPCA_MultivariateStats = ["dimension_reduction", ] -KernelPerceptronClassifier_BetaML = ["classification"] +KernelPerceptronClassifier_BetaML = ["classification", "neural networks"] LADRegressor_MLJLinearModels = ["regression"] LDA_MultivariateStats = ["classification", "dimension_reduction", ] LGBMClassifier_LightGBM = ["classification", "ensemble_models", "iterative_models"] @@ -146,14 +150,14 @@ MultitargetGaussianMixtureRegressor_BetaML = ["regression", "distribution_fitter MultitargetKNNClassifier_NearestNeighborModels = ["classification"] MultitargetKNNRegressor_NearestNeighborModels = ["regression"] MultitargetLinearRegressor_MultivariateStats = ["regression"] -MultitargetNeuralNetworkRegressor_BetaML = ["regression"] -MultitargetNeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models"] +MultitargetNeuralNetworkRegressor_BetaML = ["regression", "neural networks"] +MultitargetNeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models", "neural networks"] MultitargetRidgeRegressor_MultivariateStats = ["regression"] MultitargetSRRegressor_SymbolicRegression = ["regression"] -NeuralNetworkClassifier_BetaML = ["classification"] -NeuralNetworkClassifier_MLJFlux = ["classification", "iterative_models"] -NeuralNetworkRegressor_BetaML = ["regression"] -NeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models"] +NeuralNetworkClassifier_BetaML = ["classification", "neural networks"] +NeuralNetworkClassifier_MLJFlux = ["classification", "iterative_models", "neural networks"] +NeuralNetworkRegressor_BetaML = ["regression", "neural networks"] +NeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models", "neural networks"] NuSVC_LIBSVM = ["classification"] NuSVR_LIBSVM = ["regression"] OCSVMDetector_OutlierDetectionPython = ["outlier_detection"] @@ -171,8 +175,9 @@ PartLS_PartitionedLS = ["regression"] PassiveAggressiveClassifier_MLJScikitLearnInterface = ["classification"] PassiveAggressiveRegressor_MLJScikitLearnInterface = ["regression"] PegasosClassifier_BetaML = ["classification"] -PerceptronClassifier_BetaML = ["classification", "iterative_models"] -PerceptronClassifier_MLJScikitLearnInterface = ["classification", "iterative_models"] +PerceptronClassifier_BetaML = ["classification", "iterative_models", "neural networks"] +PerceptronClassifier_MLJScikitLearnInterface = ["classification", "iterative_models", "neural networks"] +Pipeline_MLJBase = ["meta_algorithms"] ProbabilisticNuSVC_LIBSVM = ["classification"] ProbabilisticSGDClassifier_MLJScikitLearnInterface = ["classification"] ProbabilisticSVC_LIBSVM = ["classification"] @@ -190,6 +195,8 @@ RandomForestImputer_BetaML = ["missing_value_imputation", "ensemble_models", "it RandomForestRegressor_BetaML = ["regression", "ensemble_models", "iterative_models"] RandomForestRegressor_DecisionTree = ["regression", "ensemble_models", "iterative_models"] RandomForestRegressor_MLJScikitLearnInterface = ["regression", "ensemble_models", "iterative_models"] +RecursiveFeatureElimination_FeatureSelection = ["dimension_reduction", "meta_algorithms", "feature_engineering"] +Resampler_MLJBase = ["meta_algorithms"] RidgeCVClassifier_MLJScikitLearnInterface = ["classification"] RidgeCVRegressor_MLJScikitLearnInterface = ["classification"] RidgeClassifier_MLJScikitLearnInterface = ["classification"] @@ -210,6 +217,7 @@ StableForestClassifier_SIRUS = ["classification"] StableForestRegressor_SIRUS = ["regression"] StableRulesClassifier_SIRUS = ["classification"] StableRulesRegressor_SIRUS = ["regression"] +Stack_MLJBase = ["meta_algorithms", "ensemble_models"] SVC_LIBSVM = ["classification"] SVMClassifier_MLJScikitLearnInterface = ["classification"] SVMLinearClassifier_MLJScikitLearnInterface = ["classification"] @@ -222,9 +230,11 @@ SpectralClustering_MLJScikitLearnInterface = ["clustering", "static_models"] Standardizer_MLJModels = ["encoders"] SubspaceLDA_MultivariateStats = ["classification", "dimension_reduction"] TomekUndersampler_Imbalance = ["class_imbalance"] +TunedModel_MLJTuning = ["meta_algorithms"] TSVDTransformer_TSVD = ["dimension_reduction"] TfidfTransformer_MLJText = ["encoders", "text_analysis"] TheilSenRegressor_MLJScikitLearnInterface = ["regression"] +TransformedTargetModel_MLJBase = ["meta_algorithms", "outlier_detection"] UnivariateBoxCoxTransformer_MLJModels = ["encoders"] UnivariateDiscretizer_MLJModels = ["encoders"] UnivariateFillImputer_MLJModels = ["missing_value_imputation"] diff --git a/docs/make.jl b/docs/make.jl index 4b640c143..085778b02 100755 --- a/docs/make.jl +++ b/docs/make.jl @@ -15,6 +15,7 @@ import MLJ.MLJModels import MLJ.MLJEnsembles import MLJ.ScientificTypes import MLJ.MLJBalancing +import MLJ.FeatureSelection import ScientificTypesBase import Distributions using CategoricalArrays @@ -37,7 +38,7 @@ isempty(problems) || error( # compose the individual model docstring pages: @info "Getting individual model docstrings from the registry and generating "* "pages for them, written at /docs/src/models/ ." -for model in models() +for model in models(wrappers=true) write_page(model) end @@ -54,45 +55,62 @@ pages = [ "Model Browser" => "model_browser.md", "About MLJ" => "about_mlj.md", "Learning MLJ" => "learning_mlj.md", - "Getting Started" => "getting_started.md", - "Common MLJ Workflows" => "common_mlj_workflows.md", - "Working with Categorical Data" => "working_with_categorical_data.md", - "Model Search" => "model_search.md", - "Loading Model Code" => "loading_model_code.md", - "Machines" => "machines.md", - "Evaluating Model Performance" => "evaluating_model_performance.md", - "Performance Measures" => "performance_measures.md", - "Weights" => "weights.md", - "Tuning Models" => "tuning_models.md", - "Learning Curves" => "learning_curves.md", - "Preparing Data" => "preparing_data.md", - "Transformers and Other Unsupervised models" => "transformers.md", - "More on Probabilistic Predictors" => "more_on_probabilistic_predictors.md", - "Composing Models" => "composing_models.md", - "Linear Pipelines" => "linear_pipelines.md", - "Target Transformations" => "target_transformations.md", - "Homogeneous Ensembles" => "homogeneous_ensembles.md", - "Correcting Class Imbalance" => "correcting_class_imbalance.md", - "Model Stacking" => "model_stacking.md", - "Learning Networks" => "learning_networks.md", - "Controlling Iterative Models" => "controlling_iterative_models.md", - "Generating Synthetic Data" => "generating_synthetic_data.md", - "Logging Workflows" => "logging_workflows.md", - "OpenML Integration" => "openml_integration.md", - "Acceleration and Parallelism" => "acceleration_and_parallelism.md", - "Simple User Defined Models" => "simple_user_defined_models.md", - "Quick-Start Guide to Adding Models" => - "quick_start_guide_to_adding_models.md", - "Adding Models for General Use" => "adding_models_for_general_use.md", - "Modifying Behavior" => "modifying_behavior.md", - "Internals" => "internals.md", - "List of Supported Models" => "list_of_supported_models.md", - "Third Party Packages" => "third_party_packages.md", - "Glossary" => "glossary.md", - "MLJ Cheatsheet" => "mlj_cheatsheet.md", - "FAQ" => "frequently_asked_questions.md", + "Basics" => [ + "Getting Started" => "getting_started.md", + "Common MLJ Workflows" => "common_mlj_workflows.md", + "Machines" => "machines.md", + "MLJ Cheatsheet" => "mlj_cheatsheet.md", + ], + "Data" => [ + "Working with Categorical Data" => "working_with_categorical_data.md", + "Preparing Data" => "preparing_data.md", + "Generating Synthetic Data" => "generating_synthetic_data.md", + "OpenML Integration" => "openml_integration.md", + ], + "Model Basics" => [ + "Model Search" => "model_search.md", + "Loading Model Code" => "loading_model_code.md", + "Transformers and Other Unsupervised models" => "transformers.md", + "List of Supported Models" => "list_of_supported_models.md", + ], + "Meta-algorithms" => [ + "Evaluating Model Performance" => "evaluating_model_performance.md", + "Tuning Models" => "tuning_models.md", + "Learning Curves" => "learning_curves.md", + "Controlling Iterative Models" => "controlling_iterative_models.md", + "Correcting Class Imbalance" => "correcting_class_imbalance.md", + "Thresholding Probabilistic Predictors" => + "thresholding_probabilistic_predictors.md", + "Target Transformations" => "target_transformations.md", + "Homogeneous Ensembles" => "homogeneous_ensembles.md", + ], + "Model Composition" => [ + "Composing Models" => "composing_models.md", + "Linear Pipelines" => "linear_pipelines.md", + "Model Stacking" => "model_stacking.md", + "Learning Networks" => "learning_networks.md", + ], + "Third Party Tools" => [ + "Logging Workflows using MLflow" => "logging_workflows.md", + "Third Party Packages" => "third_party_packages.md", + ], + "Customization and Extension" => [ + "Simple User Defined Models" => "simple_user_defined_models.md", + "Quick-Start Guide to Adding Models" => + "quick_start_guide_to_adding_models.md", + "Adding Models for General Use" => "adding_models_for_general_use.md", + "Modifying Behavior" => "modifying_behavior.md", + "Internals" => "internals.md", + ], + "Miscellaneous" => [ + "Performance Measures" => "performance_measures.md", + "Weights" => "weights.md", + "Acceleration and Parallelism" => "acceleration_and_parallelism.md", + "Glossary" => "glossary.md", + "FAQ" => "frequently_asked_questions.md", + ], "Index of Methods" => "api.md", - ] +] for (k, v) in pages println("$k\t=>$v") @@ -118,6 +136,7 @@ makedocs( IterationControl, CategoricalDistributions, StatisticalMeasures, + FeatureSelection, ], pages = pages, warnonly = [:cross_references, :missing_docs], diff --git a/docs/model_docstring_tools.jl b/docs/model_docstring_tools.jl index 175d34370..743afcf84 100644 --- a/docs/model_docstring_tools.jl +++ b/docs/model_docstring_tools.jl @@ -5,7 +5,7 @@ const PATH_TO_MODEL_DOCS = joinpath(@__DIR__, "src", "models") """ remove_doc_refs(str::AbstractString) -Removes `@ref` references from `str. For example, a substring of the form +Removes `@ref` references from `str`. For example, a substring of the form "[`some.thing_like_this123!`](@ref)" is replaced with "`some.thing_like_this123!`". """ @@ -27,8 +27,8 @@ handle(model) = model.name*"_"*model.package_name **Private method.** Compose and write to file the documentation page for `model`. Here `model` is an entry in -the MLJ Model Registry, i.e., an element of `MLJModels.models()`. The file name has the -form `"ModelName_PackageName.md"`, for example, +the MLJ Model Registry, i.e., an element of `MLJModels.models(; wrappers=true)`. The file +name has the form `"ModelName_PackageName.md"`, for example, `"DecisionTreeClassifier_DecisionTree.md"`. Such a page can be referenced from any other markdown page in /docs/src/ like this: `[DecisionTreeClassifier](@ref DecisionTreeClassifier_DecisionTree)`. @@ -56,6 +56,7 @@ const DESCRIPTORS_GIVEN_HANDLE = # determined the list of all descriptors, ranked by frequency: const descriptors = vcat(values(DESCRIPTORS_GIVEN_HANDLE)...) const ranking = MLJBase.countmap(descriptors) +ranking["meta algorithms"] = 1e10 const DESCRIPTORS = sort(unique(descriptors), by=d -> ranking[d], rev=true) const HANDLES = keys(DESCRIPTORS_GIVEN_HANDLE) @@ -67,7 +68,7 @@ handle as key in /docs/src/ModelDescriptors.toml. """ function models_missing_descriptors() - handles = handle.(models()) + handles = handle.(models(wrappers=true)) filter(handles) do h !(h in HANDLES) end @@ -82,7 +83,7 @@ Return the list of models with a given `descriptor`, such as "regressor", as these appear in /src/docs/ModelDescriptors.toml. """ -modelswith(descriptor) = filter(models()) do model +modelswith(descriptor) = filter(models(wrappers=true)) do model descriptor in DESCRIPTORS_GIVEN_HANDLE[handle(model)] end diff --git a/docs/src/index.md b/docs/src/index.md index 07b22042f..76b06a208 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -56,8 +56,6 @@ To support MLJ development, please cite these works or star the repo: [Model Search](@ref model_search) | [Loading Model Code](@ref) | [Transformers and Other Unsupervised Models](@ref) | -[More on Probabilistic Predictors](@ref) | -[Composing Models](@ref) | [Simple User Defined Models](@ref) | [List of Supported Models](@ref model_list) | [Third Party Packages](@ref) @@ -65,9 +63,12 @@ To support MLJ development, please cite these works or star the repo: ### Meta-algorithms [Evaluating Model Performance](@ref) | [Tuning Models](@ref) | +[Composing Models](@ref) | [Controlling Iterative Models](@ref) | [Learning Curves](@ref)| -[Correcting Class Imbalance](@ref) +[Correcting Class Imbalance](@ref) | +[Thresholding Probabilistic Predictors](@ref) + ### Composition [Composing Models](@ref) | diff --git a/docs/src/list_of_supported_models.md b/docs/src/list_of_supported_models.md index 7fa4ea1d7..e0dde1ba7 100644 --- a/docs/src/list_of_supported_models.md +++ b/docs/src/list_of_supported_models.md @@ -40,10 +40,11 @@ independent assessment. [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl) | - | RandomOversampler, RandomWalkOversampler, ROSE, SMOTE, BorderlineSMOTE1, SMOTEN, SMOTENC, RandomUndersampler, ClusterUndersampler, ENNUndersampler, TomekUndersampler, | low | [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl) | [MLJLIBSVMInterface.jl](https://github.com/JuliaAI/MLJLIBSVMInterface.jl) | LinearSVC, SVC, NuSVC, NuSVR, EpsilonSVR, OneClassSVM | high | also via ScikitLearn.jl [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) | - | LGBMClassifier, LGBMRegressor | high | +[FeatureSelector.jl](https://github.com/JuliaAI/FeatureSelection.jl) | - | FeatureSelector, RecursiveFeatureElimination | low | [Flux.jl](https://github.com/FluxML/Flux.jl) | [MLJFlux.jl](https://github.com/FluxML/MLJFlux.jl) | NeuralNetworkRegressor, NeuralNetworkClassifier, MultitargetNeuralNetworkRegressor, ImageClassifier | low | [MLJBalancing.jl](https://github.com/JuliaAI/MLJBalancing.jl) | - | BalancedBaggingClassifier | low | [MLJLinearModels.jl](https://github.com/JuliaAI/MLJLinearModels.jl) | - | LinearRegressor, RidgeRegressor, LassoRegressor, ElasticNetRegressor, QuantileRegressor, HuberRegressor, RobustRegressor, LADRegressor, LogisticClassifier, MultinomialClassifier | medium | -[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl) (built-in) | - | ConstantClassifier, ConstantRegressor, ContinuousEncoder, DeterministicConstantClassifier, DeterministicConstantRegressor, FeatureSelector, FillImputer, InteractionTransformer, OneHotEncoder, Standardizer, UnivariateBoxCoxTransformer, UnivariateDiscretizer, UnivariateFillImputer, UnivariateTimeTypeToContinuous, Standardizer, BinaryThreshholdPredictor | medium | +[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl) (built-in) | - | ConstantClassifier, ConstantRegressor, ContinuousEncoder, DeterministicConstantClassifier, DeterministicConstantRegressor, FillImputer, InteractionTransformer, OneHotEncoder, Standardizer, UnivariateBoxCoxTransformer, UnivariateDiscretizer, UnivariateFillImputer, UnivariateTimeTypeToContinuous, Standardizer, BinaryThreshholdPredictor | medium | [MLJText.jl](https://github.com/JuliaAI/MLJText.jl) | - | TfidfTransformer, BM25Transformer, CountTransformer | low | [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl) | [MLJMultivariateStatsInterface.jl](https://github.com/JuliaAI/MLJMultivariateStatsInterface.jl) | LinearRegressor, MultitargetLinearRegressor, RidgeRegressor, MultitargetRidgeRegressor, PCA, KernelPCA, ICA, LDA, BayesianLDA, SubspaceLDA, BayesianSubspaceLDA, FactorAnalysis, PPCA | high | [NaiveBayes.jl](https://github.com/dfdx/NaiveBayes.jl) | [MLJNaiveBayesInterface.jl](https://github.com/JuliaAI/MLJNaiveBayesInterface.jl) | GaussianNBClassifier, MultinomialNBClassifier, HybridNBClassifier | low | diff --git a/docs/src/more_on_probabilistic_predictors.md b/docs/src/thresholding_probabilistic_predictors.md similarity index 92% rename from docs/src/more_on_probabilistic_predictors.md rename to docs/src/thresholding_probabilistic_predictors.md index c75eec639..720d00b67 100644 --- a/docs/src/more_on_probabilistic_predictors.md +++ b/docs/src/thresholding_probabilistic_predictors.md @@ -1,4 +1,4 @@ -# More on Probabilistic Predictors +# Thresholding Probabilistic Predictors Although one can call `predict_mode` on a probabilistic binary classifier to get deterministic predictions, a more flexible strategy diff --git a/docs/src/transformers.md b/docs/src/transformers.md index 59373b9eb..8428db1df 100644 --- a/docs/src/transformers.md +++ b/docs/src/transformers.md @@ -30,7 +30,7 @@ MLJModels.OneHotEncoder MLJModels.ContinuousEncoder MLJModels.FillImputer MLJModels.UnivariateFillImputer -MLJModels.FeatureSelector +FeatureSelection.FeatureSelector MLJModels.UnivariateBoxCoxTransformer MLJModels.UnivariateDiscretizer MLJModels.UnivariateTimeTypeToContinuous diff --git a/src/MLJ.jl b/src/MLJ.jl index 51b4c4230..ccdb4c7f9 100644 --- a/src/MLJ.jl +++ b/src/MLJ.jl @@ -1,15 +1,22 @@ """ MLJ -[`MLJ`](https://juliaai.github.io/MLJ.jl//dev/) is a Machine Learning toolbox -for Julia. It collects together functionality from the following packages, which can be -loaded separately: +[`MLJ`](https://juliaai.github.io/MLJ.jl//dev/) is a Machine Learning toolbox for +Julia. It collects together functionality from separate components listed below, which can +be loaded individually. + +Actual model code (e.g., code for instantiating a `DecisionTreeClassifier`) must be +explicitly loaded from the model-providing package, using `@load`, for example. However +core model wrappers and some common transformers are immediately +available; do `localmodels(wrappers=true)` at startup to list. + +# Components - MLJBase.jl: The `machine` interface, tools to `partition` and `unpack` datasets, `evaluate`/`evaluate!` for model performance, `|>` pipeline syntax, `TransformedTargetModel` wrapper, general model composition syntax (learning networks), synthetic data generators, `scitype` and `schema` methods (from ScientificTypes.jl) for - checking how MLJ interprets your data + checking how MLJ interprets your data. Generally required for any MLJ workflow. - StatisticalMeasures.jl: MLJ-compatible measures (metrics) for machine learning, confusion matrices, ROC curves. @@ -26,10 +33,15 @@ loaded separately: - MLJBalancing.jl: Incorporation of oversampling/undersampling methods in pipelines, via the `BalancedModel` wrapper +- FeatureSelection.jl: Transformers for feature selection, and the supervised model wrapper + `RecursiveFeatureSelection`. + - MLJFlow.jl: Integration with MLflow workflow tracking - OpenML.jl: Tool for grabbing datasets from OpenML.org + + """ module MLJ @@ -49,6 +61,7 @@ import MLJBase.save using MLJEnsembles using MLJTuning using MLJModels +@reexport using FeatureSelection using OpenML @reexport using MLJFlow @reexport using StatisticalMeasures @@ -154,11 +167,11 @@ export Grid, RandomSearch, Explicit, TunedModel, LatinHypercube, # re-export from MLJModels: export models, localmodels, @load, @iload, load, info, doc, ConstantRegressor, ConstantClassifier, # builtins/Constant.jl - FeatureSelector, UnivariateStandardizer, # builtins/Transformers.jl + UnivariateStandardizer, Standardizer, UnivariateBoxCoxTransformer, OneHotEncoder, ContinuousEncoder, UnivariateDiscretizer, FillImputer, matching, BinaryThresholdPredictor, - UnivariateTimeTypeToContinuous, InteractionTransformer + UnivariateTimeTypeToContinuous, InteractionTransformer # builtins/Transformers.jl # re-export from MLJIteration: export MLJIteration diff --git a/test/integration.jl b/test/integration.jl index 4dbbc2124..77b44dfc6 100644 --- a/test/integration.jl +++ b/test/integration.jl @@ -26,7 +26,9 @@ FILTER_GIVEN_ISSUE = Dict( (model.name == "AdaBoostStumpClassifier" && model.package_name == "DecisionTree") || (model.name == "COFDetector" && - model.package_name == "OutlierDetectionNeighbors"), + model.package_name == "OutlierDetectionNeighbors") || + (model.name == "TSVDTransformer" && + model.package_name == "TSVD"), "https://github.com/JuliaML/LIBSVM.jl/issues/98" => model -> model.name == "LinearSVC" && model.package_name == "LIBSVM", @@ -52,7 +54,10 @@ FILTER_GIVEN_ISSUE = Dict( "MultiTaskElasticNetRegressor", "MultiTaskLassoCVRegressor", "MultiTaskLassoRegressor", - ] + ], + "https://github.com/JuliaAI/FeatureSelection.jl/issues/15" => + model -> model.package_name == "FeatureSelection" && + model.name == "RecursiveFeatureElimination" )