From 3892a5fa176e0f6694048776d7b2855c50f1efca Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 11 Jan 2024 12:17:40 +1300 Subject: [PATCH 1/7] first steps to prepare Project.toml for integration --- Project.toml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index efcedce55..18eac11ea 100644 --- a/Project.toml +++ b/Project.toml @@ -39,19 +39,23 @@ MLJIteration = "0.6" MLJModels = "0.16" MLJTuning = "0.8" OpenML = "0.2,0.3" +Pkg = "<0.0.1, 1" ProgressMeter = "1.1" +Random = "<0.0.1, 1" Reexport = "1.2" ScientificTypes = "3" StatisticalMeasures = "0.1" -Statistics = "1" +Statistics = "<0.0.1, 1" StatsBase = "0.32,0.33, 0.34" Tables = "0.2,1.0" julia = "1.6" [extras] +MLJTestIntegration = "697918b4-fdc1-4f9e-8ff9-929724cee270" +Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["NearestNeighborModels", "StableRNGs", "Test"] +test = ["Markdown", "MLJTestIntegration", "NearestNeighborModels", "StableRNGs", "Test"] From 0f49f1f41531fb627a01fb4558809281ac79118d Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 11 Jan 2024 12:33:13 +1300 Subject: [PATCH 2/7] add model-providing pkgs to Project.toml --- Project.toml | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 18eac11ea..48fd045a5 100644 --- a/Project.toml +++ b/Project.toml @@ -8,6 +8,7 @@ CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +EvoLinear = "ab853011-1780-437f-b4b5-5de6f4777246" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MLJBalancing = "45f359ea-796d-4f51-95a5-deb1a414c586" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" @@ -51,11 +52,39 @@ Tables = "0.2,1.0" julia = "1.6" [extras] +BetaML = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +CatBoost = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" +EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" +Imbalance = "c709b415-507b-45b7-9a3d-1767c89fde68" +InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +LightGBM = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" +MLJClusteringInterface = "d354fa79-ed1c-40d4-88ef-b8c7bd1568af" +MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661" +MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845" +MLJGLMInterface = "caf8df21-4939-456d-ac9c-5fefbfb04c0c" +MLJLIBSVMInterface = "61c7150f-6c77-4bb1-949c-13197eac2a52" +MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692" +MLJMultivariateStatsInterface = "1b6a4a23-ba22-4f51-9698-8599985d3728" +MLJNaiveBayesInterface = "33e4bacb-b9e2-458e-9a13-5d9a90b235fa" +MLJScikitLearnInterface = "5ae90465-5518-4432-b9d2-8a1def2f0cab" +MLJTSVDInterface = "7fa162e1-0e29-41ca-a6fa-c000ca4e7e7e" +MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd" MLJTestIntegration = "697918b4-fdc1-4f9e-8ff9-929724cee270" +MLJText = "5e27fcf9-6bac-46ba-8580-b5712f3d6387" +MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36" +OneRule = "90484964-6d6a-4979-af09-8657dbed84ff" +OutlierDetectionNeighbors = "51249a0a-cb36-4849-8e04-30c7f8d311bb" +OutlierDetectionPython = "2449c660-d36c-460e-a68b-92ab3c865b3e" +ParallelKMeans = "42b8e9d4-006b-409a-8472-7f34b3fb58af" +PartialLeastSquaresRegressor = "f4b1acfe-f311-436c-bb79-8483f53c17d5" +SelfOrganizingMaps = "ba4b7379-301a-4be0-bee6-171e4e152787" +SIRUS = "cdeec39e-fb35-4959-aadb-a1dd5dede958" +SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Markdown", "MLJTestIntegration", "NearestNeighborModels", "StableRNGs", "Test"] +test = ["BetaML", "CatBoost", "EvoTrees", "Imbalance", "InteractiveUtils", "LightGBM", "MLJClusteringInterface", "MLJDecisionTreeInterface", "MLJFlux", "MLJGLMInterface", "MLJLIBSVMInterface", "MLJLinearModels", "MLJMultivariateStatsInterface", "MLJNaiveBayesInterface", "MLJScikitLearnInterface", "MLJTSVDInterface", "MLJTestInterface", "MLJTestIntegration", "MLJText", "MLJXGBoostInterface", "Markdown", "NearestNeighborModels", "OneRule", "OutlierDetectionNeighbors", "OutlierDetectionPython", "ParallelKMeans", "PartialLeastSquaresRegressor", "SelfOrganizingMaps", "SIRUS", "SymbolicRegression", "StableRNGs", "Test"] + From 0eb8c5231dcab999af05cc0dc8e088789cc7ea15 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 12 Jan 2024 15:14:20 +1300 Subject: [PATCH 3/7] add integration tests --- .github/workflows/ci.yml | 4 + Project.toml | 1 + test/integration.jl | 214 +++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 10 ++ 4 files changed, 229 insertions(+) create mode 100644 test/integration.jl diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index febf81ac5..37de72758 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,6 +25,10 @@ jobs: arch: - x64 steps: + - name: Set integration test flag + run: | + julia -e 'ENV["MLJ_TEST_INTEGRATION"]="true"' + if: (${{ github.head_ref }} == "dev") && (${{ github.repository }} == ${{ github.event.pull_request.head.repo.full_name }}) - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 with: diff --git a/Project.toml b/Project.toml index 48fd045a5..040657bde 100644 --- a/Project.toml +++ b/Project.toml @@ -38,6 +38,7 @@ MLJEnsembles = "0.4" MLJFlow = "0.3" MLJIteration = "0.6" MLJModels = "0.16" +MLJTestIntegration = "0.5.0" MLJTuning = "0.8" OpenML = "0.2,0.3" Pkg = "<0.0.1, 1" diff --git a/test/integration.jl b/test/integration.jl new file mode 100644 index 000000000..9ea3874f7 --- /dev/null +++ b/test/integration.jl @@ -0,0 +1,214 @@ +using MLJTestIntegration, MLJModels, MLJ, Test, Markdown +import MLJTestIntegration as MTI +import Pkg.TOML as TOML + +const JULIA_TEST_LEVEL = 1 +const OTHER_TEST_LEVEL = 1 + + +# # RECORD OF OUTSTANDING ISSUES + +FILTER_GIVEN_ISSUE = Dict( + "https://github.com/JuliaAI/CatBoost.jl/pull/28 (waiting for 0.3.3 release)" => + model -> model.name == "CatBoostRegressor", + "LOCIDetector too slow to train!" => + model -> model.name == "LOCIDetector", + "https://github.com/JuliaML/LIBSVM.jl/issues/98" => + model -> model.name == "LinearSVC" && + model.package_name == "LIBSVM", + "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl/issues/4" => + model -> model.name == "CDDetector" && + model.package_name == "OutlierDetectionPython", + "https://github.com/JuliaAI/CatBoost.jl/issues/22" => + model -> model.name == "CatBoostClassifier", + "https://github.com/sylvaticus/BetaML.jl/issues/65" => + model -> model.name in ["KMeans", "KMedoids"] && + model.package_name == "BetaML", + "https://github.com/JuliaAI/MLJTSVDInterface.jl/pull/17" => + model -> model.name == "TSVDTransformer", + "https://github.com/alan-turing-institute/MLJ.jl/issues/1074" => + model -> model.name == "AutoEncoderMLJ", + "https://github.com/sylvaticus/BetaML.jl/issues/64" => + model -> model.name =="GaussianMixtureClusterer" && model.package_name=="BetaML", + "https://github.com/rikhuijzer/SIRUS.jl/issues/78" => + model -> model.package_name == "SIRUS", + "https://github.com/lalvim/PartialLeastSquaresRegressor.jl/issues/29 "* + "(still need release > 2.2.0)" => + model -> model.package_name == "PartialLeastSquaresRegressor", + "MLJScikitLearnInterface - multiple issues, hangs tests, WIP" => + model -> model.package_name == "MLJScikitLearnInterface", +) + +# # LOG OUTSTANDING ISSUES TO STDOUT + +const MODELS= models(); +const JULIA_MODELS = filter(m->m.is_pure_julia, MODELS); +const OTHER_MODELS = setdiff(MODELS, JULIA_MODELS); + +const EXCLUDED_BY_ISSUE = filter(MODELS) do model + any([p(model) for p in values(FILTER_GIVEN_ISSUE)]) +end; + +affected_packages = unique([m.package_name for m in EXCLUDED_BY_ISSUE]) +n_excluded = length(EXCLUDED_BY_ISSUE) +report = """ + +# Integration Tests + +Currently, $n_excluded models are excluded from integration tests because of outstanding +issues. When fixed, update `FILTER_GIVEN_ISSUE` in /test/integration.jl. + +If an issue is related to model traits (aka metadata), then the MLJ Model Registry may +need to be updated to resolve the integration test failures. See the `MLJModels.@update` +document string for how to do that. + +## Oustanding issues + +"""; +for issue in keys(FILTER_GIVEN_ISSUE) + global report *= "\n- $issue\n" +end; +report *= "\n## Affected packages\n" +for pkg in affected_packages + global report *= "\n- $pkg" +end; +report_md = Markdown.parse(report); + +n_excluded > 0 && begin + show(stdout, MIME("text/plain"), report_md) + println() + println() + sleep(1) +end + + +# # FLAG MODELS THAT DON'T HAVE COMPATIBLE DATASETS FOR TESTING + +# We use the version of `MLJTestIntegration.test` that infers appropriate datasets. The +# datasets provided by MLJTestIntegration.jl are not yet comprehensive, so we exclude +# models from testing when no compatible dataset can be found. +WITHOUT_DATASETS = filter(MODELS) do model + # multi-target datasets: + model.target_scitype <: Union{Table, AbstractMatrix} || + # https://github.com/JuliaAI/MLJTestInterface.jl/issues/19 + model.package_name == "MLJText" || + # univariate transformers: + model.input_scitype <: AbstractVector || + # image data: + model.input_scitype <: AbstractVector{<:Image} || + # other data: + (model.name == "BernoulliNBClassifier" && + model.package_name == "MLJScikitLearnInterface") || + (model.name == "MultinomialNBClassifier" && + model.package_name == "NaiveBayes") || + (model.name == "OneRuleClassifier" && + model.package_name == "OneRule") || + (model.name == "ComplementNBClassifier" && + model.package_name == "MLJScikitLearnInterface") || + (model.name == "MultinomialNBClassifier" && + model.package_name == "MLJScikitLearnInterface") || + (model.name == "SMOTEN" && + model.package_name == "Imbalance") +end; + +# To remove any warning issued below, update `WITHOUT_DATASETS` defined above: +for model in WITHOUT_DATASETS + !isempty(MLJTestIntegration.datasets(model)) && + @warn "The model `$(model.name)` from `$(model.package_name)` "* + "is currently excluded "* + "from integration tests even though a compatible dataset appears "* + "to be available now. " +end + +# Additionally exclude some models for which the inferred datasets have a model-specific +# pathololgy that prevents a valid test: + +PATHOLOGIES = filter(MODELS) do model + # in the subsampling occuring in stacking, we get a Cholesky + # factorization fail (`PosDefException`): + (model.name=="GaussianNBClassifier" && model.package_name=="NaiveBayes") || + # https://github.com/JuliaStats/MultivariateStats.jl/issues/224 + (model.name =="ICA" && model.package_name=="MultivariateStats") || + # in tuned_pipe_evaluation C library gives "Incorrect parameter: specified nu is + # infeasible": + (model.name in ["NuSVC", "ProbabilisticNuSVC"] && + model.package_name == "LIBSVM") +end + +WITHOUT_DATASETS = vcat(WITHOUT_DATASETS, PATHOLOGIES) + + +# # CHECK PROJECT FILE INCLUDES ALL MODEL-PROVIDING PACKAGES + +# helper; `project_lines` are lines from a Project.toml file: +function pkgs(project_lines) + project = TOML.parse(join(project_lines, "\n")) + headings = Set(keys(project)) ∩ ["deps", "extras"] + return vcat(collect.(keys.([project[h] for h in headings]))...) +end + +# identify missing pkgs: +project_path = joinpath(@__DIR__, "..", "Project.toml") +project_lines = open(project_path) do io + readlines(io) +end +pkgs_in_project = pkgs(project_lines) +registry_project_lines = MLJModels.Registry.registry_project() +pkgs_in_registry = pkgs(registry_project_lines) +missing_pkgs = setdiff(pkgs_in_registry, pkgs_in_project) + +# throw error if there are any: +isempty(missing_pkgs) || error( + "Integration tests cannot proceed because the following packages are "* + "missing from the [extras] section of the MLJ Project.toml file: "* + join(missing_pkgs, ", ") +) + +# # LOAD ALL MODEL CODE + +# Load all the model providing packages with a broad level=1 test: +MLJTestIntegration.test(MODELS, (nothing, ), level=1, throw=true, verbosity=0); + + +# # JULIA TESTS + +options = ( + level = JULIA_TEST_LEVEL, + verbosity = 0, # bump to 2 to debug + throw = true, +) +@testset "level 4 tests" begin + println() + for model in JULIA_MODELS + + # exclusions: + model in WITHOUT_DATASETS && continue + model in EXCLUDED_BY_ISSUE && continue + + print("\rTesting $(model.name) ($(model.package_name)) ") + @test isempty(MLJTestIntegration.test(model; mod=@__MODULE__, options...)) + end +end + + +# # NON-JULIA TESTS + +options = ( + level = OTHER_TEST_LEVEL, + verbosity = 0, # bump to 2 to debug + throw = true, +) +@testset "level 3 tests" begin + println() + for model in OTHER_MODELS + + # exclusions: + model in WITHOUT_DATASETS && continue + model in EXCLUDED_BY_ISSUE && continue + + print("\rTesting $(model.name) ($(model.package_name)) ") + @test isempty(MLJTestIntegration.test(model; mod=@__MODULE__, options...)) + end +end + +true diff --git a/test/runtests.jl b/test/runtests.jl index 8babfa8e2..676be5349 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -19,3 +19,13 @@ end @testset "scitypes" begin @test include("scitypes.jl") end + +if get(ENV, "MLJ_TEST_INTEGRATION", "false") == "true" + @testset "integration" begin + @test include("integration.jl") + end +else + @info "Integration tests skipped. Set environment variable "* + "MLJ_TEST_INTEGRATION = \"true\" to include them.\n"* + "Integration tests take at least one hour. " +end From a09cd445d03eb0ec494bc2a2612092b1bde97eab Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 12 Jan 2024 16:43:52 +1300 Subject: [PATCH 4/7] try to fix ci.yml syntax error --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 37de72758..ed6194b08 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,7 @@ jobs: steps: - name: Set integration test flag run: | - julia -e 'ENV["MLJ_TEST_INTEGRATION"]="true"' + 'julia -e 'ENV["MLJ_TEST_INTEGRATION"]="true"' if: (${{ github.head_ref }} == "dev") && (${{ github.repository }} == ${{ github.event.pull_request.head.repo.full_name }}) - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 From 2d3ae0b118ceea74a8b849fca01529ca1389ddf6 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 12 Jan 2024 16:47:23 +1300 Subject: [PATCH 5/7] try again --- .github/workflows/ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ed6194b08..8153381e0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,9 @@ jobs: steps: - name: Set integration test flag run: | - 'julia -e 'ENV["MLJ_TEST_INTEGRATION"]="true"' + julia -e ' + ENV["MLJ_TEST_INTEGRATION"]="true"' + if: (${{ github.head_ref }} == "dev") && (${{ github.repository }} == ${{ github.event.pull_request.head.repo.full_name }}) - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 From e52e1409d14de85538294c295ed3e3fac26cee9b Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 12 Jan 2024 17:26:26 +1300 Subject: [PATCH 6/7] rm tabs --- .github/workflows/ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8153381e0..ee86a017e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,10 +27,10 @@ jobs: steps: - name: Set integration test flag run: | - julia -e ' - ENV["MLJ_TEST_INTEGRATION"]="true"' - - if: (${{ github.head_ref }} == "dev") && (${{ github.repository }} == ${{ github.event.pull_request.head.repo.full_name }}) + julia -e ' + ENV["MLJ_TEST_INTEGRATION"]="true"' + + if: (${{ github.head_ref }} == "dev") && (${{ github.repository }} == ${{ github.event.pull_request.head.repo.full_name }}) - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 with: From 5d1f56c6ce04fbba0a0c25c64ea938e1f8fd7f8e Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 12 Jan 2024 17:29:23 +1300 Subject: [PATCH 7/7] remove julia 1.6 from ci --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee86a017e..6f457adc2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,6 @@ jobs: fail-fast: false matrix: version: - - '1.6' - '1' # automatically expands to the latest stable 1.x release of Julia. os: - ubuntu-latest