diff --git a/NEWS.md b/NEWS.md index 3971b3405..b0920e6cc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,11 @@ MixedModels v5.0.0 Release Notes ============================== -- Optimization is now performed _without constraints_. In a post-fitting step, the the Cholesky factor is canonicalized to have non-negative diagonal elements. [#840] -- The default optimizer has changed to NLopt's implementation of NEWUOA where possible. NLopt's implementation fails on 1-dimensional problems, so in the case A single, scalar random effect, BOBYQA is used instead. In the future, the default optimizer backend will likely change to PRIMA and NLopt support will be moved to an extension. Blocking this change in backend is an issue with PRIMA.jl when running in VSCode's built-in REPL on Linux. [#840] +- Optimization is now performed _without constraints_. In a post-fitting step, the Cholesky factor is canonicalized to have non-negative diagonal elements. [#840] +- The default optimizer has changed to NLopt's implementation of NEWUOA where possible. NLopt's implementation fails on 1-dimensional problems, so in the case of a single, scalar random effect, BOBYQA is used instead. In the future, the default optimizer backend will likely change to PRIMA and NLopt support will be moved to an extension. Blocking this change in backend is an issue with PRIMA.jl when running in VSCode's built-in REPL on Linux. [#840] - [BREAKING] Support for constrained optimization has been completely removed, i.e. the field `lowerbd` has been removed from `OptSummary`. - +- Internal code around the default optimizer has been restructured. In particular, the NLopt backend has been moved to a submodule, which will make it easier to move it to an extension if we promote another backend to the default. [#853] +- Internal code around optimization in profiling has been restructuring so that fitting done during calls to `profile` respect the `backend` and `optimizer` settings. [#853] +- The `prfit!` convenience function has been removed. [#853] MixedModels v4.38.0 Release Notes ============================== diff --git a/docs/make.jl b/docs/make.jl index 1e2dd0aff..1ee57b3c4 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -11,6 +11,7 @@ makedocs(; doctest=true, # pagesonly=true, # warnonly=true, + # warnonly=[:cross_references], pages=[ "index.md", "constructors.md", diff --git a/docs/src/api.md b/docs/src/api.md index 42b840280..1deee7aab 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -79,9 +79,9 @@ stderrror! varest ``` -## Non-Exported Functions +## Non-Exported Functions and Constants -Note that unless discussed elsewhere in the online documentation, non-exported functions should be considered implementation details. +Note that unless discussed elsewhere in the online documentation, non-exported functions and constants should be considered implementation details. ```@autodocs Modules = [MixedModels] @@ -89,3 +89,7 @@ Public = false Order = [:function] Filter = f -> !startswith(string(f), "_") ``` + +```@docs +MixedModels.OPTIMIZATION_BACKENDS +``` diff --git a/docs/src/optimization.md b/docs/src/optimization.md index f73a8e887..65c6f8756 100644 --- a/docs/src/optimization.md +++ b/docs/src/optimization.md @@ -222,16 +222,35 @@ Typically, the (1,1) block is the largest block in `A` and `L` and it has a spec `UniformBlockDiagonal` providing a compact representation and fast matrix multiplication or solutions of linear systems of equations. -### Modifying the optimization process +## Modifying the optimization process -The `OptSummary` object contains both input and output fields for the optimizer. +The [`OptSummary`](@ref) object contains both input and output fields for the optimizer. To modify the optimization process the input fields can be changed after constructing the model but before fitting it. +In addition to various tolerances, which we will not discuss further here, users can specify the choice of `backend` (i.e., the non-linear optimization library used) and `optimizer` (i.e., the implementation of an algorithm provided by the backend). + +The current default backend is [NLopt](https://github.com/JuliaOpt/NLopt.jl), which is a direct dependency of MixedModels.jl. +A [PRIMA](https://github.com/libprima/PRIMA.jl/) backend is also provided as a package extension and thus only +available when the PRIMA package is loaded. +The list of currently loaded backends is available as [`MixedModels.OPTIMIZATION_BACKENDS`](@ref). +For each individual backend, the list of available optimizers can be inspected with the function [`MixedModels.optimizers`](@ref). +```@example Main +MixedModels.optimizers(:nlopt) +``` +Similarly, the list of applicable optimization parameters can be inspected with the function [`MixedModels.opt_params`](@ref). +```@example Main +MixedModels.opt_params(:nlopt) +``` + +!!! note "Optimizer defaults subject to change" + The choice of default backend and optimizer is subject to change without being considered a breaking change. + If you want to guarantee a particular backend and optimizer, then you should + explicitly load the associated backend's package (e.g. NLopt or PRIMA) and manually set the `optimizer` and `backend` fields. Suppose, for example, that the user wishes to try a [Nelder-Mead](https://en.wikipedia.org/wiki/Nelder%E2%80%93Mead_method) optimization method instead of the default [`BOBYQA`](https://en.wikipedia.org/wiki/BOBYQA) (Bounded Optimization BY Quadratic Approximation) method. ```@example Main fm2nm = LinearMixedModel(@formula(reaction ~ 1+days+(1+days|subj)), sleepstudy); fm2nm.optsum.optimizer = :LN_NELDERMEAD; -fit!(fm2nm; thin=1) +fit!(fm2nm; fitlog=true) fm2nm.optsum DisplayAs.Text(ans) # hide ``` @@ -252,8 +271,6 @@ plot(convdf, x=:step, y=:objective, color=:algorithm, Geom.line) Run time can be constrained with `maxfeval` and `maxtime`. -See the documentation for the [`NLopt`](https://github.com/JuliaOpt/NLopt.jl) package for details about the various settings. - ### Convergence to singular covariance matrices To ensure identifiability of $\Sigma_\theta=\sigma^2\Lambda_\theta \Lambda_\theta$, the elements of $\theta$ corresponding to diagonal elements of $\Lambda_\theta$ are constrained to be non-negative. diff --git a/ext/MixedModelsPRIMAExt.jl b/ext/MixedModelsPRIMAExt.jl index 39dd3a4ef..33a509584 100644 --- a/ext/MixedModelsPRIMAExt.jl +++ b/ext/MixedModelsPRIMAExt.jl @@ -2,7 +2,8 @@ module MixedModelsPRIMAExt using MixedModels using MixedModels: Statistics -using MixedModels: ProgressMeter, ProgressUnknown, objective!, _objective! +using MixedModels.ProgressMeter: ProgressMeter, ProgressUnknown +using MixedModels: objective!, _objective!, rectify! using LinearAlgebra: PosDefException using PRIMA: PRIMA @@ -13,19 +14,16 @@ end const PRIMABackend = Val{:prima} -function MixedModels.prfit!(m::LinearMixedModel; - kwargs...) - MixedModels.unfit!(m) - m.optsum.optimizer = :bobyqa - m.optsum.backend = :prima - - return fit!(m; kwargs...) +function _optimizer(o::Val{O}, f, x0::Vector, args...; kwargs...) where {O} + x0 = copy(x0) + info = _optimizer!(o, f, x0, args...; kwargs...) + return x0, info end -prima_optimizer!(::Val{:bobyqa}, args...; kwargs...) = PRIMA.bobyqa!(args...; kwargs...) -prima_optimizer!(::Val{:cobyla}, args...; kwargs...) = PRIMA.cobyla!(args...; kwargs...) -prima_optimizer!(::Val{:lincoa}, args...; kwargs...) = PRIMA.lincoa!(args...; kwargs...) -prima_optimizer!(::Val{:newuoa}, args...; kwargs...) = PRIMA.newuoa!(args...; kwargs...) +_optimizer!(::Val{:bobyqa}, args...; kwargs...) = PRIMA.bobyqa!(args...; kwargs...) +_optimizer!(::Val{:cobyla}, args...; kwargs...) = PRIMA.cobyla!(args...; kwargs...) +_optimizer!(::Val{:lincoa}, args...; kwargs...) = PRIMA.lincoa!(args...; kwargs...) +_optimizer!(::Val{:newuoa}, args...; kwargs...) = PRIMA.newuoa!(args...; kwargs...) function MixedModels.optimize!(m::LinearMixedModel, ::PRIMABackend; progress::Bool=true, fitlog::Bool=false, kwargs...) @@ -56,8 +54,7 @@ function MixedModels.optimize!(m::LinearMixedModel, ::PRIMABackend; end maxfun = optsum.maxfeval > 0 ? optsum.maxfeval : 500 * length(optsum.initial) - info = prima_optimizer!(Val(optsum.optimizer), obj, optsum.final; - # xl=optsum.lowerbd, + info = _optimizer!(Val(optsum.optimizer), obj, optsum.final; maxfun, optsum.rhoend, optsum.rhobeg) ProgressMeter.finish!(prog) @@ -108,7 +105,7 @@ function MixedModels.optimize!(m::GeneralizedLinearMixedModel, ::PRIMABackend; end sc end - info = prima_optimizer!(Val(optsum.optimizer), obj, optsum.final; + info = _optimizer!(Val(optsum.optimizer), obj, optsum.final; xl=optsum.lowerbd, maxfun, optsum.rhoend, optsum.rhobeg, scale) @@ -130,6 +127,34 @@ function _check_prima_return(info::PRIMA.Info) return nothing end -MixedModels.opt_params(::PRIMABackend) = (:rhobeg, :rhoend, :maxfeval) +MixedModels.opt_params(::PRIMABackend) = [:rhobeg, :rhoend, :maxfeval] +MixedModels.optimizers(::PRIMABackend) = [:bobyqa, :cobyla, :lincoa, :newuoa] + +function MixedModels.profilevc(obj, optsum::OptSummary, ::PRIMABackend; kwargs...) + maxfun = optsum.maxfeval > 0 ? optsum.maxfeval : 500 * length(optsum.initial) + xmin, info = _optimizer(Val(optsum.optimizer), obj, + copyto!(optsum.final, optsum.initial); + maxfun, + optsum.rhoend, optsum.rhobeg, + scale=nothing) # will need to scale for GLMM + _check_prima_return(info) + fmin = info.fx + return fmin, xmin +end + +function MixedModels.profileobj!(obj, + m::LinearMixedModel{T}, θ::AbstractVector{T}, osj::OptSummary, ::PRIMABackend; + kwargs...) where {T} + maxfun = osj.maxfeval > 0 ? osj.maxfeval : 500 * length(osj.initial) + xmin = copyto!(osj.final, osj.initial) + info = _optimizer!(Val(osj.optimizer), obj, xmin; + maxfun, + osj.rhoend, osj.rhobeg, + scale=nothing) # will need to scale for GLMM + fmin = info.fx + _check_prima_return(info) + rectify!(m) + return fmin +end end # module diff --git a/src/MixedModels.jl b/src/MixedModels.jl index 180a99490..852be8fdd 100644 --- a/src/MixedModels.jl +++ b/src/MixedModels.jl @@ -20,10 +20,10 @@ using LinearAlgebra: ldiv!, lmul!, logdet, mul!, norm, normalize, normalize!, qr using LinearAlgebra: rank, rdiv!, rmul!, svd, tril! using Markdown: Markdown using MixedModelsDatasets: dataset, datasets -using NLopt: NLopt, Opt using PooledArrays: PooledArrays, PooledArray +using NLopt: NLopt using PrecompileTools: PrecompileTools, @setup_workload, @compile_workload -using ProgressMeter: ProgressMeter, Progress, ProgressUnknown, finish!, next! +using ProgressMeter: ProgressMeter, Progress, finish!, next! using Random: Random, AbstractRNG, randn! using SparseArrays: SparseArrays, SparseMatrixCSC, SparseVector, dropzeros!, nnz using SparseArrays: nonzeros, nzrange, rowvals, sparse @@ -213,8 +213,9 @@ include("grouping.jl") include("mimeshow.jl") include("serialization.jl") include("profile/profile.jl") -include("nlopt.jl") -include("prima.jl") +include("MixedModelsNLoptExt.jl") +using .MixedModelsNLoptExt + include("derivatives.jl") # aliases with non-unicode function names diff --git a/src/nlopt.jl b/src/MixedModelsNLoptExt.jl similarity index 72% rename from src/nlopt.jl rename to src/MixedModelsNLoptExt.jl index 323b2ec23..7a92b48a5 100644 --- a/src/nlopt.jl +++ b/src/MixedModelsNLoptExt.jl @@ -1,8 +1,24 @@ -push!(OPTIMIZATION_BACKENDS, :nlopt) +module MixedModelsNLoptExt # not actually an extension at the moment + +using ..MixedModels +using ..MixedModels: objective!, _objective!, rectify! +# are part of the package's dependencies and will not be part +# of the extension's dependencies +using ..MixedModels.ProgressMeter: ProgressMeter, ProgressUnknown + +# stdlib +using LinearAlgebra: PosDefException +# will be a weakdep when this is moved to an extension +using NLopt: NLopt, Opt + +function __init__() + push!(MixedModels.OPTIMIZATION_BACKENDS, :nlopt) + return nothing +end const NLoptBackend = Val{:nlopt} -function optimize!(m::LinearMixedModel, ::NLoptBackend; +function MixedModels.optimize!(m::LinearMixedModel, ::NLoptBackend; progress::Bool=true, fitlog::Bool=false, kwargs...) optsum = m.optsum @@ -42,7 +58,7 @@ function optimize!(m::LinearMixedModel, ::NLoptBackend; return xmin, fmin end -function optimize!(m::GeneralizedLinearMixedModel, ::NLoptBackend; +function MixedModels.optimize!(m::GeneralizedLinearMixedModel, ::NLoptBackend; progress::Bool=true, fitlog::Bool=false, fast::Bool=false, verbose::Bool=false, nAGQ=1, kwargs...) @@ -121,6 +137,32 @@ function _check_nlopt_return(ret, failure_modes=_NLOPT_FAILURE_MODES) end end -function opt_params(::NLoptBackend) - return (:ftol_rel, :ftol_abs, :xtol_rel, :xtol_abs, :initial_step, :maxfeval, :maxtime) +function MixedModels.opt_params(::NLoptBackend) + return [:ftol_rel, :ftol_abs, :xtol_rel, :xtol_abs, :initial_step, :maxfeval, :maxtime] end + +function MixedModels.optimizers(::NLoptBackend) + return [:LN_NEWUOA, :LN_BOBYQA, :LN_COBYLA, :LN_NELDERMEAD, :LN_PRAXIS] +end + +function MixedModels.profilevc(obj, optsum::OptSummary, ::NLoptBackend; kwargs...) + opt = NLopt.Opt(optsum) + NLopt.min_objective!(opt, obj) + fmin, xmin, ret = NLopt.optimize!(opt, copyto!(optsum.final, optsum.initial)) + _check_nlopt_return(ret) + + return fmin, xmin +end + +function MixedModels.profileobj!(obj, + m::LinearMixedModel{T}, θ::AbstractVector{T}, osj::OptSummary, ::NLoptBackend; + kwargs...) where {T} + opt = NLopt.Opt(osj) + NLopt.min_objective!(opt, obj) + fmin, xmin, ret = NLopt.optimize(opt, copyto!(osj.final, osj.initial)) + _check_nlopt_return(ret) + rectify!(m) + return fmin +end + +end # module diff --git a/src/optsummary.jl b/src/optsummary.jl index 9be1e3db3..de361894b 100644 --- a/src/optsummary.jl +++ b/src/optsummary.jl @@ -20,7 +20,21 @@ Summary of an optimization ## Choice of optimizer and backend * `optimizer`: the name of the optimizer used, as a `Symbol` -* `backend`: the optimization library providing the optimizer, default is `NLoptBackend`. +* `backend`: the optimization library providing the optimizer, stored as a symbol. + The current default is `:nlopt`. + +The current default backend is NLopt, which is a direct dependency of MixedModels.jl. +A PRIMA backend is also provided as a package extension and thus only +available when the library PRIMA is loaded. +The list of currently loaded backends is available as [`MixedModels.OPTIMIZATION_BACKENDS`](@ref). +For each individual backend, the list of available optimizers can be inspected with the function [`MixedModels.optimizers`](@ref). +Similarly, the list of applicable optimization parameters can be inspected with the function [`MixedModels.opt_params`](@ref). + +!!! note "Optimizer defaults subject to change" + The choice of backend and optimizer is subject to change without being considered a breaking + change. If you want to guarantee a particular backend and optimizer, then you should + explicitly load the associated backend's package (e.g. NLopt or PRIMA) and manually + set the `optimizer` and `backend` fields. ## Backend-specific fields * `ftol_rel`: as in NLopt, not used in PRIMA @@ -185,3 +199,20 @@ Return a collection of the fields of [`OptSummary`](@ref) used by backend. they are used _after_ optimization and are thus shared across backends. """ function opt_params end + +opt_params(s::Symbol) = opt_params(Val(s)) + +""" + optimizers(::Val{backend}) + +Return a collection of the algorithms supported by the backend. + +!!! note + The names of the algorithms are not necessarily consistent across backends. + For example, NLopt has `:LN_BOBYQA` and PRIMA has `:bobyqa` for Powell's + BOBYQA algorithm. In other words, we have not yet abstracted over the + backends' differing naming conventions for algorithms. +""" +function optimizers end + +optimizers(s::Symbol) = optimizers(Val(s)) diff --git a/src/prima.jl b/src/prima.jl deleted file mode 100644 index 0d9cbe823..000000000 --- a/src/prima.jl +++ /dev/null @@ -1,21 +0,0 @@ - -""" - prfit!(m::LinearMixedModel; kwargs...) - -Fit a mixed model using the [PRIMA](https://github.com/libprima/PRIMA.jl) implementation -of the BOBYQA optimizer. - -!!! warning "Experimental feature" - This function is an experimental feature that will go away in the future. - Do **not** rely on it, unless you are willing to pin the precise MixedModels.jl - version. The purpose of the function is to provide the MixedModels developers - a chance to explore the performance of the PRIMA implementation without the large - and potentially breaking changes it would take to fully replace the current NLopt - backend with a PRIMA backend or a backend supporting a range of optimizers. - -!!! note "Package extension" - In order to reduce the dependency burden, all methods of this function are - implemented in a package extension and are only defined when PRIMA.jl is loaded - by the user. -""" -function prfit! end diff --git a/src/profile/thetapr.jl b/src/profile/thetapr.jl index 6f3b357b5..a89902504 100644 --- a/src/profile/thetapr.jl +++ b/src/profile/thetapr.jl @@ -10,17 +10,15 @@ function optsumj(os::OptSummary, j::Integer) return OptSummary( deleteat!(copy(os.final), j), deleteat!(copy(os.lowerbd), j), - os.optimizer, + os.optimizer; + os.backend, ) end -function profileobj!( - m::LinearMixedModel{T}, θ::AbstractVector{T}, opt::Opt, osj::OptSummary -) where {T} +function profileobj!(obj, + m::LinearMixedModel{T}, θ::AbstractVector{T}, osj::OptSummary) where {T} isone(length(θ)) && return objective!(m, θ) - fmin, xmin, ret = NLopt.optimize(opt, copyto!(osj.final, osj.initial)) - _check_nlopt_return(ret) - return fmin + return profileobj!(obj, m, θ, osj, Val(osj.backend)) end function profileθj!( @@ -32,14 +30,12 @@ function profileθj!( j = parsej(sym) θ = copy(final) osj = optsum - opt = Opt(osj) pmj = m.parmap[j] lbj = pmj[2] == pmj[3] ? zero(T) : T(-Inf) if length(θ) > 1 # set up the conditional optimization problem notj = deleteat!(collect(axes(final, 1)), j) osj = optsumj(optsum, j) - opt = Opt(osj) # create an NLopt optimizer object for the reduced problem - function obj(x, g) + function obj(x, g=T[]) isempty(g) || throw(ArgumentError("gradients are not evaluated by this objective")) for i in eachindex(notj, x) @@ -47,7 +43,8 @@ function profileθj!( end return objective!(m, θ) end - NLopt.min_objective!(opt, obj) + else + obj = nothing end pnm = (; p=sym) ζold = zero(T) @@ -56,7 +53,7 @@ function profileθj!( θj = final[j] θ[j] = θj - δj while (abs(ζold) < threshold) && θ[j] ≥ lbj && length(tbl) < 100 # decreasing values of θ[j] - ζ = sign(θ[j] - θj) * sqrt(profileobj!(m, θ, opt, osj) - fmin) + ζ = sign(θ[j] - θj) * sqrt(profileobj!(obj, m, θ, osj) - fmin) push!(tbl, merge(pnm, mkrow!(tc, m, ζ))) θ[j] == lbj && break δj /= (4 * abs(ζ - ζold)) # take smaller steps when evaluating negative zeta @@ -80,12 +77,12 @@ function profileθj!( copyto!(θ, final) θ[j] += δj while (ζold < threshold) && (length(tbl) < 120) - fval = profileobj!(m, θ, opt, osj) + fval = profileobj!(obj, m, θ, osj) if fval < fmin @warn "Negative difference ", fval - fmin, " for ", sym, " at ", θ[j] ζ = zero(T) else - ζ = sqrt(profileobj!(m, θ, opt, osj) - fmin) + ζ = sqrt(profileobj!(obj, m, θ, osj) - fmin) end push!(tbl, merge(pnm, mkrow!(tc, m, ζ))) δj /= (2 * abs(ζ - ζold)) diff --git a/src/profile/vcpr.jl b/src/profile/vcpr.jl index 6457c2fcc..087996ecb 100644 --- a/src/profile/vcpr.jl +++ b/src/profile/vcpr.jl @@ -10,18 +10,17 @@ Profile an element of the variance components. """ function profilevc(m::LinearMixedModel{T}, val::T, rowj::AbstractVector{T}) where {T} optsum = m.optsum - function obj(x, g) + # by giving g a default we can also + # work with backends which don't have a gradient slot + function obj(x, g=T[]) isempty(g) || throw(ArgumentError("g must be empty")) updateL!(setθ!(m, x)) optsum.sigma = val / norm(rowj) objctv = objective(m) return objctv end - opt = Opt(optsum) - NLopt.min_objective!(opt, obj) - fmin, xmin, ret = NLopt.optimize!(opt, copyto!(optsum.final, optsum.initial)) - _check_nlopt_return(ret) - return fmin, xmin + + return profilevc(obj, optsum, Val(m.optsum.backend)) end """ diff --git a/test/prima.jl b/test/prima.jl index 714f603b1..110ed78a4 100644 --- a/test/prima.jl +++ b/test/prima.jl @@ -1,14 +1,24 @@ using PRIMA -using MixedModels: prfit!, unfit!, dataset +using MixedModels: unfit!, dataset +using Suppressor include("modelcache.jl") -@testset "formula($model)" for model in models(:sleepstudy) - prmodel = prfit!(LinearMixedModel(formula(model), dataset(:sleepstudy)); progress=false) +@testset "$(formula(model))" for model in models(:sleepstudy) + prmodel = LinearMixedModel(formula(model), dataset(:sleepstudy)) + prmodel.optsum.backend = :prima + prmodel.optsum.optimizer = :bobyqa + fit!(prmodel; progress=false) @test isapprox(loglikelihood(model), loglikelihood(prmodel)) @test prmodel.optsum.optimizer == :bobyqa @test prmodel.optsum.backend == :prima + + @testset "profile" begin + profile_prima = @suppress profile(prmodel) + profile_nlopt = @suppress profile(model) + @test isapprox(profile_prima.tbl.ζ, profile_nlopt.tbl.ζ; rtol=0.0001) + end end model = first(models(:sleepstudy))