diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ac1e885e..568d8e48 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,6 +15,11 @@ env: TEST_MLJBASE: "true" jobs: test: + services: + mlflow: + image: adacotechjp/mlflow:2.3.1 + ports: + - 5000:5000 name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} runs-on: ${{ matrix.os }} timeout-minutes: 60 diff --git a/Project.toml b/Project.toml index a5829d3c..792a7ca5 100644 --- a/Project.toml +++ b/Project.toml @@ -68,4 +68,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9" [targets] -test = ["DataFrames", "DecisionTree", "Distances", "Logging", "MultivariateStats", "NearestNeighbors", "StableRNGs", "Test", "TypedTables"] +test = ["DataFrames", "DecisionTree", "Distances", "Logging", "MultivariateStats", "NearestNeighbors", "StableRNGs", "Test", "TypedTables", "MLFlowClient"] diff --git a/ext/LoggersExt/LoggersExt.jl b/ext/LoggersExt/LoggersExt.jl index 7d722512..cc4d882f 100644 --- a/ext/LoggersExt/LoggersExt.jl +++ b/ext/LoggersExt/LoggersExt.jl @@ -1,10 +1,11 @@ module LoggersExt using MLJBase: info, name, Model, - params, Machine, Measure, - flat_params + Machine, Measure, flat_params -import MLJBase: save, evaluate!, MLFlowLogger +import MLJBase: save, evaluate!, mlflow_logger + +include("utils.jl") include("mlflow.jl") diff --git a/ext/LoggersExt/mlflow.jl b/ext/LoggersExt/mlflow.jl index d066de18..a793c982 100644 --- a/ext/LoggersExt/mlflow.jl +++ b/ext/LoggersExt/mlflow.jl @@ -2,14 +2,14 @@ using MLFlowClient: MLFlow, logparam, logmetric, createrun, MLFlowRun, updaterun, logartifact, getorcreateexperiment -struct MLFlowInstance +struct MLFlowLogger client::MLFlow experiment_name::String artifact_location::Union{String, Missing} end -MLFlowLogger(base_uri::String, experiment_name::String, +mlflow_logger(base_uri::String, experiment_name::String, artifact_location::Union{String, Missing}) = - MLFlowInstance(MLFlow(base_uri), experiment_name, artifact_location) + MLFlowLogger(MLFlow(base_uri), experiment_name, artifact_location) function _logmodelparams(client::MLFlow, run::MLFlowRun, model::Model) model_params = params(model) |> flat_params |> collect @@ -18,8 +18,8 @@ function _logmodelparams(client::MLFlow, run::MLFlowRun, model::Model) end end -function _logmachinemeasures(client::MLFlow, run::MLFlowRun, measures::Vector{Measure}, - measurements::Vector{Float64}) +function _logmachinemeasures(client::MLFlow, run::MLFlowRun, measures::Vector{T}, + measurements::Vector{Float64}) where T<:Measure measure_names = measures .|> info .|> x -> x.name for (name, value) in zip(measure_names, measurements) logmetric(client, run, name, value) @@ -29,7 +29,7 @@ end function evaluate!(mach::Machine, resampling, weights, class_weights, rows, verbosity, repeats, measures, operations, - acceleration, force, logger::MLFlowInstance) + acceleration, force, logger::MLFlowLogger) performance_evaluation = evaluate!(mach, resampling, weights, class_weights, rows, verbosity, repeats, measures, operations, @@ -46,10 +46,11 @@ function evaluate!(mach::Machine, resampling, weights, return performance_evaluation end -function save(logger::MLFlowInstance, mach::Machine) +function save(logger::MLFlowLogger, mach::Machine) + io = IOBuffer() + save(io, mach) + model_name = name(mach.model) - fname = "$(model_name).jls" - save(fname, mach) experiment = getorcreateexperiment(logger.client, logger.experiment_name, artifact_location=logger.artifact_location) @@ -57,6 +58,7 @@ function save(logger::MLFlowInstance, mach::Machine) run_name="$(model_name) run") _logmodelparams(logger.client, run, mach.model) - logartifact(logger.client, run, fname) - rm(fname) + fname = "$(model_name).jls" + logartifact(logger.client, run, fname, io) + updaterun(logger.client, run, "FINISHED") end diff --git a/src/parameter_inspection.jl b/ext/LoggersExt/utils.jl similarity index 68% rename from src/parameter_inspection.jl rename to ext/LoggersExt/utils.jl index f58864e4..2bb63749 100644 --- a/src/parameter_inspection.jl +++ b/ext/LoggersExt/utils.jl @@ -1,19 +1,19 @@ -istransparent(::Any) = false -istransparent(::MLJType) = true +isamodel(::Any) = false +isamodel(::Model) = true """ - params(m::MLJType) + params(m::Model) Recursively convert any transparent object `m` into a named tuple, keyed on the property names of `m`. An object is *transparent* if -`MLJBase.istransparent(m) == true`. The named tuple is possibly nested +`isamodel(m) == true`. The named tuple is possibly nested because `params` is recursively applied to the property values, which themselves might be transparent. -For most `MLJType` objects, properties are synonymous with fields, but +For most `Model` objects, properties are synonymous with fields, but this is not a hard requirement. -Most objects of type `MLJType` are transparent. +Most objects of type `Model` are transparent. julia> params(EnsembleModel(atom=ConstantClassifier())) (atom = (target_type = Bool,), @@ -24,7 +24,7 @@ Most objects of type `MLJType` are transparent. parallel = true,) """ -params(m) = params(m, Val(istransparent(m))) +params(m) = params(m, Val(isamodel(m))) params(m, ::Val{false}) = m function params(m, ::Val{true}) fields = propertynames(m) diff --git a/src/MLJBase.jl b/src/MLJBase.jl index ffa0aac0..ae895a19 100644 --- a/src/MLJBase.jl +++ b/src/MLJBase.jl @@ -294,11 +294,15 @@ export coerce, coerce!, autotype, schema, info export UnivariateFiniteArray, UnivariateFiniteVector # ----------------------------------------------------------------------- -# abstract model types defined in MLJModelInterface.jl and extended here: +# re-export from MLJModelInterface.jl + +#abstract model types defined in MLJModelInterface.jl and extended here: for T in EXTENDED_ABSTRACT_MODEL_TYPES @eval(export $T) end +export params + # ------------------------------------------------------------------- # exports from this module, MLJBase @@ -308,9 +312,6 @@ export default_resource # one_dimensional_ranges.jl: export ParamRange, NumericRange, NominalRange, iterator, scale -# parameter_inspection.jl: -export params # note this is *not* an extension of StatsBase.params - # data.jl: export partition, unpack, complement, restrict, corestrict @@ -381,7 +382,7 @@ export pdf, sampler, mode, median, mean, shuffle!, categorical, shuffle, levels, levels!, std, Not, support, logpdf, LittleDict # loggers.jl -export MLFlowLogger +export mlflow_logger if !isdefined(Base, :get_extension) include("../ext/LoggersExt/LoggersExt.jl") diff --git a/src/loggers.jl b/src/loggers.jl index dce7b2d1..c2849b2f 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -1,7 +1,8 @@ """ - MLFlowLogger(; base_uri="localhost:5000", experiment_name=missing) + mlflow_logger(; base_uri="localhost:5000", experiment_name=missing) -Base type for MLFlow logger. Creates an instance of MLFlow, as defined in +Constructor for the base type for MLFlow logger. Creates an instance of MLFlow, +as defined in [`MLFlowClient.jl`](https://juliaai.github.io/MLFlowClient.jl/dev/), and logs to an experiment. @@ -13,14 +14,14 @@ If `experiment_name` is not provided, a new experiment with the name "MLJ.jl experiments" will be created. ### Return value -A `MLFlowInstance` object, containing a +A `MLFlowLogger` object, containing a [`MLFlow`](https://juliaai.github.io/MLFlowClient.jl/dev/reference/#MLFlowClient.MLFlow) object and the experiment name """ -MLFlowLogger(; base_uri="http://localhost:5000", +mlflow_logger(; base_uri="http://localhost:5000", experiment_name="MLJ experiments", artifact_location=missing) = - MLFlowLogger(base_uri, experiment_name, artifact_location) -MLFlowLogger(_, _, _) = - error("Please run `import MLFlowClient` to use MLFlowLogger.") + mlflow_logger(base_uri, experiment_name, artifact_location) +mlflow_logger(_, _, _) = + error("Please run `import MLFlowClient` to use mlflow_logger.") diff --git a/src/utilities.jl b/src/utilities.jl index f19eceec..1dcc0235 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -48,14 +48,14 @@ LittleDict{...} with 3 entries: "Y" => 3 ``` """ -function flat_params(params::NamedTuple) +function flat_params(parameters::NamedTuple) result = LittleDict{String, Any}() - for key in keys(params) - value = getproperty(params, key) + for key in keys(parameters) + value = params(getproperty(parameters, key)) if value isa NamedTuple sub_dict = flat_params(value) for (sub_key, sub_value) in pairs(sub_dict) - new_key = string(key, "_", sub_key) + new_key = string(key, "__", sub_key) result[new_key] = sub_value end else diff --git a/test/extensions/loggers.jl b/test/extensions/loggers.jl new file mode 100644 index 00000000..64d2830d --- /dev/null +++ b/test/extensions/loggers.jl @@ -0,0 +1,60 @@ +module TestLoggers + +using Test +using MLJBase +using ..Models + +@testset "mlflow logger" begin + artifact_directory = "mlj-test" + experiment_name = "mlflow logger tests" + + @testset "outside extension tests" begin + @test_throws ErrorException mlflow_logger() + + using MLFlowClient + logger = mlflow_logger(; experiment_name=experiment_name, artifact_location=artifact_directory) + + @test logger.client isa MLFlow + @test logger.experiment_name == experiment_name + @test logger.artifact_location == artifact_directory + end # @testset "outside extension tests" + + @testset "extension tests" begin + X = (x=rand(4),) + y = ["Chenta", "Missy", "Gala", "Wendy"] |> categorical + + mach = machine(ConstantClassifier(), X, y) + fit!(mach, verbosity=0) + + logger = mlflow_logger(; experiment_name=experiment_name, artifact_location=artifact_directory) + + @testset "save" begin + run = MLJBase.save(logger, mach) + experiment = getexperiment(logger.client, run.info.experiment_id) + @test run isa MLFlowRun + @test experiment isa MLFlowExperiment + + deleterun(logger.client, run) + deleteexperiment(logger.client, experiment) + end # @testset "save" + + @testset "evaluate!" begin + evaluate!(mach, resampling=Holdout(), logger=logger) + + experiments = searchexperiments(logger.client) + experiments_ids = experiments .|> (e -> e.experiment_id) + runs = searchruns(logger.client, experiments_ids) + + # it's 2 because of the default experiment + @test length(experiments_ids) == 2 + @test length(runs) == 1 + + deleterun(logger.client, runs[1]) + deleteexperiment(logger.client, experiments[2]) + end # @testset "evaluate!" + end # @testset "extension tests" +end # @testset "mlflow logger" + +end # module + +true diff --git a/test/runtests.jl b/test/runtests.jl index 8b07929e..a55c5d19 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -80,3 +80,7 @@ end @test include("hyperparam/one_dimensional_ranges.jl") @test include("hyperparam/one_dimensional_range_methods.jl") end + +@conditional_testset "extensions" begin + @test include("extensions/loggers.jl") +end