From 09ba5d9bacfed8627c61e5f276d1e145abd30e83 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 25 Aug 2025 16:36:27 +0200 Subject: [PATCH 01/13] rework likelihoodratio test to be just a pretty-printing wrapper arounnd lrtest --- NEWS.md | 2 + Project.toml | 2 + src/MixedModels.jl | 5 +- src/likelihoodratiotest.jl | 372 +++++++++++++++++------------------- src/mimeshow.jl | 22 ++- src/remat.jl | 2 +- test/likelihoodratiotest.jl | 231 +++++++++++----------- test/mime.jl | 19 +- test/pls.jl | 1 - 9 files changed, 312 insertions(+), 344 deletions(-) diff --git a/NEWS.md b/NEWS.md index 578a33141..d872f06f1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,8 @@ MixedModels v5.0.0 Release Notes - The `prfit!` convenience function has been removed. [#853] - The `dataset` and `datasets` functions have been removed. They are now housed in `MixedModelsDatasets`.[#854] - One argument `predict(::GeneralizedLinearMixedModel)`, i.e. prediction on the original data, now supports the `type` keyword argument. [#856] +- `isnested(A::ReMat, B::ReMat)` is now a method of `StatsModels.isnested` +- [BREAKING ]`likelihoodratiotest` has been reworked to be a thin wrapper around `StatsModels.lrtest`. The historical difference in behavior in terms of nesting checks created some confusion. Users advanced enough to create models with non-obvious nesting are assumed to be advanced enough to manually compute the likelihood ratio test. The function `likelihoodratiotest` and associated `LikelihoodRatioTest` type (now with a type parameter for number of models) has been kept to enable printing of test results with model formulae. Most users should not notice a difference in outputs, but the internal field structure has changed. MixedModels v4.38.0 Release Notes ============================== diff --git a/Project.toml b/Project.toml index 02dcd04cc..4a7da0873 100644 --- a/Project.toml +++ b/Project.toml @@ -17,6 +17,7 @@ MixedModelsDatasets = "7e9fb7ac-9f67-43bf-b2c8-96ba0796cbb6" NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd" PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" @@ -61,6 +62,7 @@ NLopt = "0.6, 1" PRIMA = "0.2" PooledArrays = "0.5, 1" PrecompileTools = "1" +Printf = "1.11.0" ProgressMeter = "1.7" Random = "1" RegressionFormulae = "0.1.3" diff --git a/src/MixedModels.jl b/src/MixedModels.jl index 111c1e9ba..eda2bf3d1 100644 --- a/src/MixedModels.jl +++ b/src/MixedModels.jl @@ -23,6 +23,7 @@ using MixedModelsDatasets: dataset using PooledArrays: PooledArrays, PooledArray using NLopt: NLopt using PrecompileTools: PrecompileTools, @setup_workload, @compile_workload +using Printf: @sprintf using ProgressMeter: ProgressMeter, Progress, finish!, next! using Random: Random, AbstractRNG, randn! using SparseArrays: SparseArrays, SparseMatrixCSC, SparseVector, dropzeros!, nnz @@ -36,12 +37,12 @@ using StatsAPI: loglikelihood, meanresponse, modelmatrix, nobs, pvalue, predict, r2, residuals using StatsAPI: response, responsename, stderror, vcov, weights using StatsBase: StatsBase, CoefTable, model_response, summarystats -using StatsFuns: log2π, normccdf +using StatsFuns: chisqccdf, log2π, normccdf using StatsModels: StatsModels, AbstractContrasts, AbstractTerm, CategoricalTerm using StatsModels: ConstantTerm, DummyCoding, EffectsCoding, FormulaTerm, FunctionTerm using StatsModels: HelmertCoding, HypothesisCoding, InteractionTerm, InterceptTerm using StatsModels: MatrixTerm, SeqDiffCoding, TableRegressionModel -using StatsModels: apply_schema, drop_term, formula, lrtest, modelcols, @formula +using StatsModels: apply_schema, drop_term, formula, lrtest, modelcols, isnested, @formula using StructTypes: StructTypes using Tables: Tables, columntable using TypedTables: TypedTables, DictTable, FlexTable, Table diff --git a/src/likelihoodratiotest.jl b/src/likelihoodratiotest.jl index f0be3447e..329474a93 100644 --- a/src/likelihoodratiotest.jl +++ b/src/likelihoodratiotest.jl @@ -1,31 +1,22 @@ """ - LikelihoodRatioTest + LikelihoodRatioTest{N} -Results of MixedModels.likelihoodratiotest +Result of `MixedModels.likelihoodratiotest`. -## Fields -* `formulas`: Vector of model formulae -* `models`: NamedTuple of the `dof` and `deviance` of the models -* `tests`: NamedTuple of the sequential `dofdiff`, `deviancediff`, - and resulting `pvalues` - -## Properties -* `deviance` : note that this is actually -2 log likelihood for linear models - (i.e. without subtracting the constant for a saturated model) -* `pvalues` +This struct wraps [`StatsModels.LRTestResult`](@ref) with a bit more metadata +to enable a few additional `show` methods. +# Fields +- `formulas::NTuple{N, String}` +- `lrt::StatsModels.LRTestResult{N}` +- `linear::Bool` """ -struct LikelihoodRatioTest <: StatsAPI.HypothesisTest - formulas::AbstractVector{String} - models::NamedTuple{(:dof, :deviance)} - tests::NamedTuple{(:dofdiff, :deviancediff, :pvalues)} +Base.@kwdef struct LikelihoodRatioTest{N} <: StatsAPI.HypothesisTest + formulas::NTuple{N, String} + lrt::StatsModels.LRTestResult{N} linear::Bool end -function Base.propertynames(lrt::LikelihoodRatioTest, private::Bool=false) - return (:deviance, :formulas, :models, :pvalues, :tests) -end - """ pvalue(lrt::LikelihoodRatioTest) @@ -38,7 +29,7 @@ To get p-values for multiple tests, use `lrt.pvalues`. """ function StatsAPI.pvalue(lrt::LikelihoodRatioTest) pvalues = lrt.pvalues - if length(pvalues) > 1 + if length(pvalues) > 2 throw( ArgumentError( "Cannot extract **only one** p-value from a multiple test result." @@ -46,25 +37,22 @@ function StatsAPI.pvalue(lrt::LikelihoodRatioTest) ) end - return only(pvalues) + return last(pvalues) end +StatsModels.lrtest(x::LikelihoodRatioTest) = x.lrt + function Base.getproperty(lrt::LikelihoodRatioTest, s::Symbol) - if s == :dof - lrt.models.dof - elseif s == :deviance - lrt.models.deviance + if s in fieldnames(LikelihoodRatioTest) + getfield(lrt, s) elseif s == :pvalues - lrt.tests.pvalues - elseif s == :formulae - lrt.formulas + lrt.pval else - getfield(lrt, s) + getproperty(lrt.lrt, s) end end -# backward syntactic but not type compatibility -Base.getindex(lrt::LikelihoodRatioTest, s::Symbol) = getfield(lrt, s) +const GLM_TYPES = Union{TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}, LinearModel, GeneralizedLinearModel} """ likelihoodratiotest(m::MixedModel...) @@ -76,132 +64,62 @@ Base.getindex(lrt::LikelihoodRatioTest, s::Symbol) = getfield(lrt, s) Likeihood ratio test applied to a set of nested models. -!!! note - The nesting of the models is not checked. It is incumbent on the user - to check this. This differs from `StatsModels.lrtest` as nesting in - mixed models, especially in the random effects specification, may be non obvious. - -!!! note - For comparisons between mixed and non-mixed models, the deviance for the non-mixed - model is taken to be -2 log likelihood, i.e. omitting the additive constant for the - fully saturated model. This is in line with the computation of the deviance for mixed - models. - -This functionality may be deprecated in the future in favor of `StatsModels.lrtest`. +This function is wrapper around `StatsModels.lrtest` that provides improved `show` methods +that include the model formulae, where available. For mixed models, the [`isnested`](@ref) +functionality may be overly conservative. For example, nested models with "clever" parameterizations +may be incorrectly rejected as being non-negative. In such cases, it is incumbent on the +user to perform the likelihood ratio test manually and construct the resulting [`LikelihoodRatioTest`](@ref) +object. """ -function likelihoodratiotest(m::MixedModel...) - _iscomparable(m...) || - throw(ArgumentError("""Models are not comparable: are the objectives, data - and, where appropriate, the link and family the same? - """)) - - m = collect(m) # change the tuple to an array - dofs = dof.(m) - formulas = String.(Symbol.(getproperty.(m, :formula))) - ord = sortperm(dofs) - dofs = dofs[ord] - formulas = formulas[ord] - devs = objective.(m)[ord] - dofdiffs = diff(dofs) - devdiffs = .-(diff(devs)) - pvals = map(zip(dofdiffs, devdiffs)) do (dof, dev) - if dev > 0 - ccdf(Chisq(dof), dev) - else - NaN - end - end - - return LikelihoodRatioTest( - formulas, - (dof=dofs, deviance=devs), - (dofdiff=dofdiffs, deviancediff=devdiffs, pvalues=pvals), - first(m) isa LinearMixedModel, - ) +function likelihoodratiotest(m0::Union{GLM_TYPES, MixedModel}, + m::MixedModel...; kwargs...) + formulas = (_formula(m0), _formula.(m)...) + return LikelihoodRatioTest(formulas, lrtest(m0, m...; kwargs...), first(m) isa LinearMixedModel) end _formula(::Union{LinearModel,GeneralizedLinearModel}) = "NA" function _formula(x::TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}) return String(Symbol(x.mf.f)) end +_formula(x::MixedModel) = string(formula(x)) -# for GLMMs we're actually looking at the deviance and additive constants are comparable -# (because GLM deviance is actually part of the GLMM deviance computation) -# for LMMs, we're always looking at the "deviance scale" but without the additive constant -# for the fully saturated model -function _criterion( - x::Union{GeneralizedLinearModel,TableRegressionModel{<:GeneralizedLinearModel}} -) - return deviance(x) -end -function _criterion(x::Union{LinearModel,TableRegressionModel{<:LinearModel}}) - return -2 * loglikelihood(x) -end - -function likelihoodratiotest( - m0::Union{ - TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}, - LinearModel, - GeneralizedLinearModel, - }, - m::MixedModel..., -) - _iscomparable(m0, first(m)) || - throw(ArgumentError("""Models are not comparable: are the objectives, data - and, where appropriate, the link and family the same? - """)) - lrt = likelihoodratiotest(m...) - devs = pushfirst!(lrt.deviance, _criterion(m0)) - formulas = pushfirst!(lrt.formulas, _formula(m0)) - dofs = pushfirst!(lrt.models.dof, dof(m0)) - devdiffs = pushfirst!(lrt.tests.deviancediff, devs[1] - devs[2]) - dofdiffs = pushfirst!(lrt.tests.dofdiff, dofs[2] - dofs[1]) - - df, dev = first(dofdiffs), first(devdiffs) - p = dev > 0 ? ccdf(Chisq(df), dev) : NaN - pvals = pushfirst!(lrt.tests.pvalues, p) - - return LikelihoodRatioTest( - formulas, - (dof=dofs, deviance=devs), - (dofdiff=dofdiffs, deviancediff=devdiffs, pvalues=pvals), - lrt.linear, - ) -end -function Base.show(io::IO, ::MIME"text/plain", lrt::LikelihoodRatioTest) +function Base.show(io::IO, ::MIME"text/plain", lrt::LikelihoodRatioTest{N}) where {N} + println(io, "Likelihood-ratio test: $N models fitted on $(lrt.nobs) observations") println(io, "Model Formulae") - for (i, f) in enumerate(lrt.formulas) println(io, "$i: $f") end - # the following was adapted from StatsModels#162 - # from nalimilan - Δdf = lrt.tests.dofdiff - Δdev = lrt.tests.deviancediff + # adapted from https://github.com/JuliaStats/StatsModels.jl/blob/ad89c6755066511e99148f013e81437e29459ed2/src/lrtest.jl#L130-L178 + + Δdf = _diff(lrt.dof) + chisq = abs.(2 .* _diff(lrt.loglikelihood)) + linear = lrt.linear nc = 6 - nr = length(lrt.formulas) + nr = N outrows = Matrix{String}(undef, nr + 1, nc) - outrows[1, :] = [ - "", "model-dof", lrt.linear ? "-2 logLik" : "deviance", "χ²", "χ²-dof", "P(>χ²)" - ] # colnms - - outrows[2, :] = [ - "[1]", string(lrt.dof[1]), Ryu.writefixed(lrt.deviance[1], 4), " ", " ", " " - ] + outrows[1, :] = ["", + "DoF", + "logLik", + "χ²", + "χ²-dof", "P(>χ²)"] + outrows[2, :] = ["[1]", + @sprintf("%.0d", lrt.dof[1]), + @sprintf("%.4f", lrt.loglikelihood[1]), + " ", + " ", + " "] for i in 2:nr - outrows[i + 1, :] = [ - "[$i]", - string(lrt.dof[i]), - Ryu.writefixed(lrt.deviance[i], 4), - Ryu.writefixed(Δdev[i - 1], 4), - string(Δdf[i - 1]), - string(StatsBase.PValue(lrt.pvalues[i - 1])), - ] + outrows[i+1, :] = ["[$i]", + @sprintf("%.0d", lrt.dof[i]), + @sprintf("%.4f", lrt.loglikelihood[i]), + @sprintf("%.4f", chisq[i-1]), + @sprintf("%.0d", Δdf[i-1]), + string(StatsBase.PValue(lrt.pval[i]))] end colwidths = length.(outrows) max_colwidths = [maximum(view(colwidths, :, i)) for i in 1:nc] @@ -230,58 +148,67 @@ function Base.show(io::IO, ::MIME"text/plain", lrt::LikelihoodRatioTest) return nothing end -Base.show(io::IO, lrt::LikelihoodRatioTest) = Base.show(io, MIME("text/plain"), lrt) +##### +##### StatsModels methods for lrtest and isnested +##### -function _iscomparable(m::LinearMixedModel...) - isconstant(getproperty.(getproperty.(m, :optsum), :REML)) || throw( - ArgumentError( - "Models must all be fit with the same objective (i.e. all ML or all REML)" - ), - ) +_diff(t::NTuple{N}) where {N} = ntuple(i->t[i+1]-t[i], N-1) - if any(getproperty.(getproperty.(m, :optsum), :REML)) - isconstant(coefnames.(m)) || throw( - ArgumentError( - "Likelihood-ratio tests for REML-fitted models are only valid when the fixed-effects specifications are identical" - ), - ) +# adapted from StatsModels with the type check elided +# https://github.com/JuliaStats/StatsModels.jl/blob/ad89c6755066511e99148f013e81437e29459ed2/src/lrtest.jl#L74-L128 +# we only accept one non mixed GLM so that we can require a MixedModel and avoid piracy +function StatsModels.lrtest(m0::GLM_TYPES, m::MixedModel...; atol::Real=0.0) + mods = (m0, m...) + if length(mods) < 2 + throw(ArgumentError("At least two models are needed to perform LR test")) + end + df = dof.(mods) + forward = df[1] <= df[2] + if !all(==(nobs(mods[1])), nobs.(mods)) + throw(ArgumentError("LR test is only valid for models fitted on the same data, " * + "but number of observations differ")) + end + if forward + for i in 2:length(mods) + if df[i-1] >= df[i] || !isnested(mods[i-1], mods[i]; atol=atol) + throw(ArgumentError("LR test is only valid for nested models")) + end + end + else + for i in 2:length(mods) + if df[i] >= df[i-1] || !isnested(mods[i], mods[i-1], atol=atol) + throw(ArgumentError("LR test is only valid for nested models")) + end + end end - isconstant(nobs.(m)) || - throw(ArgumentError("Models must have the same number of observations")) - - return true -end - -# XXX we need the where clause to distinguish from the general method -# but static analysis complains if we don't use the type parameter -function _samefamily( - ::GeneralizedLinearMixedModel{<:AbstractFloat,S}... -) where {S<:Distribution} - return true -end -_samefamily(::GeneralizedLinearMixedModel...) = false + dev = deviance.(mods) -function _iscomparable(m::GeneralizedLinearMixedModel...) - # TODO: test that all models are fit with same fast/nAGQ option? - _samefamily(m...) || throw(ArgumentError("Models must be fit to the same distribution")) + Δdf = (NaN, _diff(df)...) + dfr = Int.(dof_residual.(mods)) - isconstant(string.(Link.(m))) || - throw(ArgumentError("Models must have the same link function")) + ll = loglikelihood.(mods) + chisq = (NaN, 2 .* abs.(_diff(ll))...) - isconstant(nobs.(m)) || - throw(ArgumentError("Models must have the same number of observations")) + for i in 2:length(ll) + if ((forward && ll[i-1] > ll[i]) || + (!forward && ll[i-1] < ll[i])) && + !isapprox(ll[i-1], ll[i], atol=atol) + throw(ArgumentError("Log-likelihood must not be lower " * + "in models with more degrees of freedom")) + end + end - return true + pval = chisqccdf.(abs.(Δdf), chisq) + return StatsModels.LRTestResult(Int(nobs(mods[1])), dev, ll, df, pval) end """ isnested(m1::MixedModel, m2::MixedModel; atol::Real=0.0) -Indicate whether model `m1` is nested in model `m2`, i.e. whether -`m1` can be obtained by constraining some parameters in `m2`. -Both models must have been fitted on the same data. This check -is conservative for `MixedModel`s and may reject nested models with different -parameterizations as being non nested. + +Indicate whether model `m1` is nested in model `m2`, i.e. whether `m1` can be obtained by constraining some parameters in `m2`. +Both models must have been fitted on the same data. +This check is conservative for `MixedModel`s and may reject nested models with different parameterizations as being non nested. """ function StatsModels.isnested(m1::MixedModel, m2::MixedModel; atol::Real=0.0) try @@ -310,27 +237,12 @@ function StatsModels.isnested(m1::MixedModel, m2::MixedModel; atol::Real=0.0) return true end -function _iscomparable( - m1::TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}, m2::MixedModel -) - _iscomparable(m1.model, m2) || return false - - # check that the nested fixef are a subset of the outer - all(in.(coefnames(m1), Ref(coefnames(m2)))) || return false - - return true -end - -# GLM isn't nested with in LMM and LM isn't nested within GLMM -_iscomparable(m1::Union{LinearModel,GeneralizedLinearModel}, m2::MixedModel) = false - -function _iscomparable(m1::LinearModel, m2::LinearMixedModel) +function StatsModels.isnested(m1::Union{TableRegressionModel{<:LinearModel},LinearModel}, m2::LinearMixedModel; atol::Real=0.0) nobs(m1) == nobs(m2) || return false - # XXX This reaches into the internal structure of GLM - size(m1.pp.X, 2) <= size(m2.X, 2) || return false + size(modelmatrix(m1), 2) <= size(modelmatrix(m2), 2) || return false - _isnested(m1.pp.X, m2.X) || return false + _isnested(modelmatrix(m1), modelmatrix(m2)) || return false !m2.optsum.REML || throw(ArgumentError("REML-fitted models cannot be compared to linear models")) @@ -338,14 +250,16 @@ function _iscomparable(m1::LinearModel, m2::LinearMixedModel) return true end -function _iscomparable(m1::GeneralizedLinearModel, m2::GeneralizedLinearMixedModel) +function StatsModels.isnested(m1::Union{TableRegressionModel{<:GeneralizedLinearModel},GeneralizedLinearModel}, m2::GeneralizedLinearMixedModel; atol::Real=0.0) nobs(m1) == nobs(m2) || return false size(modelmatrix(m1), 2) <= size(modelmatrix(m2), 2) || return false _isnested(modelmatrix(m1), modelmatrix(m2)) || return false - Distribution(m1) == Distribution(m2) || + d1 = m1 isa TableRegressionModel ? Distribution(m1.model) : Distribution(m1) + + d1 == Distribution(m2) || throw(ArgumentError("Models must be fit to the same distribution")) Link(m1) == Link(m2) || throw(ArgumentError("Models must have the same link function")) @@ -353,6 +267,68 @@ function _iscomparable(m1::GeneralizedLinearModel, m2::GeneralizedLinearMixedMod return true end +##### +##### Helper functions for isnested +##### + +function _iscomparable(m::LinearMixedModel...) + isconstant(getproperty.(getproperty.(m, :optsum), :REML)) || throw( + ArgumentError( + "Models must all be fit with the same objective (i.e. all ML or all REML)" + ), + ) + + if any(getproperty.(getproperty.(m, :optsum), :REML)) + isconstant(coefnames.(m)) || throw( + ArgumentError( + "Likelihood-ratio tests for REML-fitted models are only valid when the fixed-effects specifications are identical" + ), + ) + end + + isconstant(nobs.(m)) || + throw(ArgumentError("Models must have the same number of observations")) + + return true +end + +# XXX we need the where clause to distinguish from the general method +# but static analysis complains if we don't use the type parameter +function _samefamily( + ::GeneralizedLinearMixedModel{<:AbstractFloat,S}... +) where {S<:Distribution} + return true +end +_samefamily(::GeneralizedLinearMixedModel...) = false + +function _iscomparable(m::GeneralizedLinearMixedModel...) + # TODO: test that all models are fit with same fast/nAGQ option? + _samefamily(m...) || throw(ArgumentError("Models must be fit to the same distribution")) + + isconstant(string.(Link.(m))) || + throw(ArgumentError("Models must have the same link function")) + + isconstant(nobs.(m)) || + throw(ArgumentError("Models must have the same number of observations")) + + return true +end + +function _iscomparable( + m1::TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}, m2::MixedModel +) + _iscomparable(m1.model, m2) || return false + + # check that the nested fixef are a subset of the outer + all(in.(coefnames(m1), Ref(coefnames(m2)))) || return false + + return true +end + +# GLM isn't nested with in LMM and LM isn't nested within GLMM +_iscomparable(m1::Union{LinearModel,GeneralizedLinearModel}, m2::MixedModel) = false + + """ _isnested(x::AbstractMatrix, y::AbstractMatrix; atol::Real=0.0) diff --git a/src/mimeshow.jl b/src/mimeshow.jl index 88bc6b451..47983eba9 100644 --- a/src/mimeshow.jl +++ b/src/mimeshow.jl @@ -64,19 +64,23 @@ function _markdown(b::BlockDescription) return tbl end -function _markdown(lrt::LikelihoodRatioTest) - Δdf = lrt.tests.dofdiff - Δdev = lrt.tests.deviancediff +function _markdown(lrt::LikelihoodRatioTest{N}) where {N} + Δdf = _diff(lrt.dof) + chisq = abs.(2 .* _diff(lrt.loglikelihood)) + linear = lrt.linear - nr = length(lrt.formulas) + nc = 6 + nr = N + + nr = N outrows = Vector{Vector{String}}(undef, nr + 1) - outrows[1] = ["", "model-dof", "deviance", "χ²", "χ²-dof", "P(>χ²)"] # colnms + outrows[1] = ["", "model-dof", "-2 logLik","χ²", "χ²-dof", "P(>χ²)"] # colnms outrows[2] = [ string(lrt.formulas[1]), string(lrt.dof[1]), - string(round(Int, lrt.deviance[1])), + string(round(Int, 2 * lrt.loglikelihood[1])), " ", " ", " ", @@ -86,10 +90,10 @@ function _markdown(lrt::LikelihoodRatioTest) outrows[i + 1] = [ string(lrt.formulas[i]), string(lrt.dof[i]), - string(round(Int, lrt.deviance[i])), - string(round(Int, Δdev[i - 1])), + string(round(Int, 2 .* lrt.loglikelihood[i])), + string(round(Int, chisq[i - 1])), string(Δdf[i - 1]), - string(StatsBase.PValue(lrt.pvalues[i - 1])), + string(StatsBase.PValue(lrt.pvalues[i])), ] end diff --git a/src/remat.jl b/src/remat.jl index 3a8b39c95..17379d927 100644 --- a/src/remat.jl +++ b/src/remat.jl @@ -195,7 +195,7 @@ Is the grouping factor for `A` nested in the grouping factor for `B`? That is, does each value of `A` occur with just one value of B? """ -function isnested(A::ReMat, B::ReMat) +function StatsModels.isnested(A::ReMat, B::ReMat) size(A, 1) == size(B, 1) || throw(DimensionMismatch("must have size(A,1) == size(B,1)")) bins = zeros(Int32, nlevs(A)) @inbounds for (a, b) in zip(A.refs, B.refs) diff --git a/test/likelihoodratiotest.jl b/test/likelihoodratiotest.jl index d24f4d378..d0d29dd4b 100644 --- a/test/likelihoodratiotest.jl +++ b/test/likelihoodratiotest.jl @@ -71,131 +71,120 @@ end lm0 = lm(@formula(reaction ~ 1), slp) lm1 = lm(@formula(reaction ~ 1 + days), slp) - @test MixedModels._iscomparable(lm0, fm1) + # @test MixedModels._iscomparable(lm0, fm1) @test !MixedModels._iscomparable(lm1, fm0) lrt = likelihoodratiotest(fm0, fm1) - @test [deviance(fm0), deviance(fm1)] == lrt.deviance - @test deviance(fm0) - deviance(fm1) == only(lrt.tests.deviancediff) - @test only(lrt.tests.dofdiff) == 1 - @test sum(length, lrt.tests) == 3 - @test sum(length, lrt.pvalues) == 1 - @test sum(length, lrt.models) == 4 - @test length(lrt.formulae) == 2 - show(IOBuffer(), lrt) - @test :pvalues in propertynames(lrt) - @test only(lrt.pvalues) == pvalue(lrt) + @test (deviance(fm0), deviance(fm1)) == lrt.deviance + @test sprint(show, lrt) = "MixedModels.LikelihoodRatioTest{2}((\"reaction ~ 1 + (1 + days | subj)\", \"reaction ~ 1 + days + (1 + days | subj)\"), Likelihood-ratio test: 2 models fitted on 180 observations\n────────────────────────────────────────────────────────\n DOF ΔDOF LogLik Deviance Chisq p(>Chisq)\n────────────────────────────────────────────────────────\n[1] 5 -887.7379 1775.4759 \n[2] 6 1 -875.9697 1751.9393 23.5365 <1e-05\n────────────────────────────────────────────────────────, true)" + @test last(lrt.pvalues) == pvalue(lrt) lrt = likelihoodratiotest(lm1, fm1) - @test lrt.deviance ≈ likelihoodratiotest(lm1.model, fm1).deviance - @test lrt.dof == [3, 6] - @test lrt.deviance ≈ -2 * loglikelihood.([lm1, fm1]) - shown = sprint(show, lrt) - @test occursin("-2 logLik", shown) - @test !occursin("deviance", shown) - - lrt = likelihoodratiotest(lm0, fm0, fm1) - @test_throws ArgumentError pvalue(lrt) - - # non nested FE between non-mixed and mixed - @test_throws ArgumentError likelihoodratiotest(lm1, fm0) - - # mix of REML and ML - fm0 = fit( - MixedModel, - @formula(reaction ~ 1 + (1 + days | subj)), - slp; - REML=true, - progress=false, - ) - @test_throws ArgumentError likelihoodratiotest(fm0, fm1) - @test_throws ArgumentError likelihoodratiotest(lm0, fm0) - - # differing FE with REML - fm1 = fit( - MixedModel, - @formula(reaction ~ 1 + days + (1 + days | subj)), - slp; - REML=true, - progress=false, - ) - - @test_throws ArgumentError likelihoodratiotest(fm0, fm1) - contra = MixedModels.dataset(:contra) - # glm doesn't like categorical responses, so we convert it to numeric ourselves - # TODO: upstream fix - cc = DataFrame(contra) - cc.usenum = ifelse.(cc.use .== "Y", 1, 0) - gmf = glm(@formula(usenum ~ 1 + age + urban + livch), cc, Bernoulli()) - gmf2 = glm(@formula(usenum ~ 1 + age + abs2(age) + urban + livch), cc, Bernoulli()) - gm0 = fit( - MixedModel, - @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), - contra, - Bernoulli(); - fast=true, - progress=false, - ) - gm1 = fit( - MixedModel, - @formula(use ~ 1 + age + abs2(age) + urban + livch + (1 | urban & dist)), - contra, - Bernoulli(); - fast=true, - progress=false, - ) - - lrt = likelihoodratiotest(gmf, gm1) - @test [-2 * loglikelihood(gmf), deviance(gm1)] ≈ lrt.deviance - @test -2 * loglikelihood(gmf) - deviance(gm1) ≈ only(lrt.tests.deviancediff) - shown = sprint(show, lrt) - @test !occursin("-2 logLik", shown) - @test occursin("deviance", shown) - - lrt = likelihoodratiotest(gm0, gm1) - @test [deviance(gm0), deviance(gm1)] == lrt.deviance - @test deviance(gm0) - deviance(gm1) == only(lrt.tests.deviancediff) - @test first(lrt.tests.dofdiff) == 1 - @test sum(length, lrt.tests) == 3 - @test sum(length, lrt.pvalues) == 1 - @test sum(length, lrt.models) == 4 - @test length(lrt.formulae) == 2 - - # mismatched links - gm_probit = fit( - MixedModel, - @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), - contra, - Bernoulli(), - ProbitLink(); - fast=true, - progress=false, - ) - @test_throws ArgumentError likelihoodratiotest(gmf, gm_probit) - @test_throws ArgumentError likelihoodratiotest(gm0, gm_probit) - - # mismatched families - gm_poisson = fit( - MixedModel, - @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), - contra, - Poisson(); - fast=true, - progress=false, - ) - @test_throws ArgumentError likelihoodratiotest(gmf, gm_poisson) - @test_throws ArgumentError likelihoodratiotest(gm0, gm_poisson) - - @test !MixedModels._iscomparable(lm0, gm0) - @test !MixedModels._iscomparable(gmf, fm1) - - @test MixedModels._iscomparable(gmf, gm0) - @test !MixedModels._iscomparable(gmf2, gm0) - - @test MixedModels._isnested(gmf.mm.m, gm0.X) - @test !MixedModels._isnested(gmf2.mm.m, gm0.X) - # this skips the linear term so that the model matrices - # have the same column rank - @test !MixedModels._isnested(gmf2.mm.m[:, Not(2)], gm0.X) + @test pvalue(lrt) ≈ 5.9e-32 atol=1e-16 + + # shown = sprint(show, lrt) + # lrt = likelihoodratiotest(lm0, fm0, fm1) + # @test_throws ArgumentError pvalue(lrt) + + # # non nested FE between non-mixed and mixed + # @test_throws ArgumentError likelihoodratiotest(lm1, fm0) + + # # mix of REML and ML + # fm0 = fit( + # MixedModel, + # @formula(reaction ~ 1 + (1 + days | subj)), + # slp; + # REML=true, + # progress=false, + # ) + # @test_throws ArgumentError likelihoodratiotest(fm0, fm1) + # @test_throws ArgumentError likelihoodratiotest(lm0, fm0) + + # # differing FE with REML + # fm1 = fit( + # MixedModel, + # @formula(reaction ~ 1 + days + (1 + days | subj)), + # slp; + # REML=true, + # progress=false, + # ) + + # @test_throws ArgumentError likelihoodratiotest(fm0, fm1) + # contra = MixedModels.dataset(:contra) + # # glm doesn't like categorical responses, so we convert it to numeric ourselves + # # TODO: upstream fix + # cc = DataFrame(contra) + # cc.usenum = ifelse.(cc.use .== "Y", 1, 0) + # gmf = glm(@formula(usenum ~ 1 + age + urban + livch), cc, Bernoulli()) + # gmf2 = glm(@formula(usenum ~ 1 + age + abs2(age) + urban + livch), cc, Bernoulli()) + # gm0 = fit( + # MixedModel, + # @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), + # contra, + # Bernoulli(); + # fast=true, + # progress=false, + # ) + # gm1 = fit( + # MixedModel, + # @formula(use ~ 1 + age + abs2(age) + urban + livch + (1 | urban & dist)), + # contra, + # Bernoulli(); + # fast=true, + # progress=false, + # ) + + # lrt = likelihoodratiotest(gmf, gm1) + # @test [-2 * loglikelihood(gmf), deviance(gm1)] ≈ lrt.deviance + # @test -2 * loglikelihood(gmf) - deviance(gm1) ≈ only(lrt.tests.deviancediff) + # shown = sprint(show, lrt) + # @test !occursin("-2 logLik", shown) + # @test occursin("deviance", shown) + + # lrt = likelihoodratiotest(gm0, gm1) + # @test [deviance(gm0), deviance(gm1)] == lrt.deviance + # @test deviance(gm0) - deviance(gm1) == only(lrt.tests.deviancediff) + # @test first(lrt.tests.dofdiff) == 1 + # @test sum(length, lrt.tests) == 3 + # @test sum(length, lrt.pvalues) == 1 + # @test sum(length, lrt.models) == 4 + # @test length(lrt.formulae) == 2 + + # # mismatched links + # gm_probit = fit( + # MixedModel, + # @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), + # contra, + # Bernoulli(), + # ProbitLink(); + # fast=true, + # progress=false, + # ) + # @test_throws ArgumentError likelihoodratiotest(gmf, gm_probit) + # @test_throws ArgumentError likelihoodratiotest(gm0, gm_probit) + + # # mismatched families + # gm_poisson = fit( + # MixedModel, + # @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), + # contra, + # Poisson(); + # fast=true, + # progress=false, + # ) + # @test_throws ArgumentError likelihoodratiotest(gmf, gm_poisson) + # @test_throws ArgumentError likelihoodratiotest(gm0, gm_poisson) + + # @test !MixedModels._iscomparable(lm0, gm0) + # @test !MixedModels._iscomparable(gmf, fm1) + + # @test MixedModels._iscomparable(gmf, gm0) + # @test !MixedModels._iscomparable(gmf2, gm0) + + # @test MixedModels._isnested(gmf.mm.m, gm0.X) + # @test !MixedModels._isnested(gmf2.mm.m, gm0.X) + # # this skips the linear term so that the model matrices + # # have the same column rank + # @test !MixedModels._isnested(gmf2.mm.m[:, Not(2)], gm0.X) end diff --git a/test/mime.jl b/test/mime.jl index 7f740dbe9..1798745d8 100644 --- a/test/mime.jl +++ b/test/mime.jl @@ -93,17 +93,12 @@ lrt = likelihoodratiotest(fm0, fm1) end @testset "lrt" begin - @test sprint(show, mime, lrt) in (""" -| | model-dof | deviance | χ² | χ²-dof | P(>χ²) | -|:---------------------------------------- | ---------:| --------:| ---:| ------:|:------ | -| reaction ~ 1 + days + (1 \\| subj) | 4 | 1794 | | | | -| reaction ~ 1 + days + (1 + days \\| subj) | 6 | 1752 | 42 | 2 | <1e-09 | -""", """ - | | model-dof | deviance | χ² | χ²-dof | P(>χ²) | - |:---------------------------------------- | ---------:| --------:| ---:| ------:|:------ | - | reaction ~ 1 + days + (1 \\| subj) | 4 | 1794 | | | | - | reaction ~ 1 + days + (1 + days \\| subj) | 6 | 1752 | 42 | 2 | <1e-9 | - """) + @test sprint(show, mime, lrt) == """ +| | model-dof | -2 logLik | χ² | χ²-dof | P(>χ²) | +|:---------------------------------------- | ---------:| ---------:| ---:| ------:|:------ | +| reaction ~ 1 + days + (1 \\| subj) | 4 | -1794 | | | | +| reaction ~ 1 + days + (1 + days \\| subj) | 6 | -1752 | 42 | 2 | <1e-09 | +""" end @testset "blockdescription" begin @@ -214,7 +209,7 @@ rows & subj & item & fixed \\\\ # not doing the full comparison here because there's a zero-padded exponent # that will render differently on different platforms @test startswith(sprint(show, MIME("text/latex"), lrt), - "\\begin{tabular}\n{l | r | r | r | r | l}\n & model-dof & deviance & \$\\chi^2\$ & \$\\chi^2\$-dof & P(>\$\\chi^2\$) \\\\", + "\\begin{tabular}\n{l | r | r | r | r | l}\n & model-dof & -2 logLik & \$\\chi^2\$ & \$\\chi^2\$-dof & P(>\$\\chi^2\$) \\\\", ) optsum = sprint(show, MIME("text/latex"), fm0.optsum) diff --git a/test/pls.jl b/test/pls.jl index f5a4ba87e..eeb7ca900 100644 --- a/test/pls.jl +++ b/test/pls.jl @@ -264,7 +264,6 @@ end @test "Diagonal" in tokens lrt = likelihoodratiotest(models(:pastes)...) - @test length(lrt.deviance) == length(lrt.formulas) == length(lrt.models) == 2 @test only(lrt.tests.pvalues) ≈ 0.5233767965780878 atol = 0.0001 @testset "missing variables in formula" begin From 3a3e3480cb3f7a4a08266dabcaf43620608e9fc1 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 25 Aug 2025 16:40:29 +0200 Subject: [PATCH 02/13] NEWS --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index d872f06f1..662bf460b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,8 +10,8 @@ MixedModels v5.0.0 Release Notes - The `prfit!` convenience function has been removed. [#853] - The `dataset` and `datasets` functions have been removed. They are now housed in `MixedModelsDatasets`.[#854] - One argument `predict(::GeneralizedLinearMixedModel)`, i.e. prediction on the original data, now supports the `type` keyword argument. [#856] -- `isnested(A::ReMat, B::ReMat)` is now a method of `StatsModels.isnested` -- [BREAKING ]`likelihoodratiotest` has been reworked to be a thin wrapper around `StatsModels.lrtest`. The historical difference in behavior in terms of nesting checks created some confusion. Users advanced enough to create models with non-obvious nesting are assumed to be advanced enough to manually compute the likelihood ratio test. The function `likelihoodratiotest` and associated `LikelihoodRatioTest` type (now with a type parameter for number of models) has been kept to enable printing of test results with model formulae. Most users should not notice a difference in outputs, but the internal field structure has changed. +- `isnested(A::ReMat, B::ReMat)` is now a method of `StatsModels.isnested`.[#858] +- [BREAKING ]`likelihoodratiotest` has been reworked to be a thin wrapper around `StatsModels.lrtest`. The historical difference in behavior in terms of nesting checks created some confusion. Users advanced enough to create models with non-obvious nesting are assumed to be advanced enough to manually compute the likelihood ratio test. The function `likelihoodratiotest` and associated `LikelihoodRatioTest` type (now with a type parameter for number of models) has been kept to enable printing of test results with model formulae. Most users should not notice a difference in behavior, but the display has been slightly changed and the internal field structure has changed.[#858] MixedModels v4.38.0 Release Notes ============================== From 68710c13d0057b90737c609fada48b0688bacff2 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 25 Aug 2025 16:46:32 +0200 Subject: [PATCH 03/13] fixes --- Project.toml | 2 +- test/pls.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 4a7da0873..8d2c8229a 100644 --- a/Project.toml +++ b/Project.toml @@ -62,7 +62,7 @@ NLopt = "0.6, 1" PRIMA = "0.2" PooledArrays = "0.5, 1" PrecompileTools = "1" -Printf = "1.11.0" +Printf = "1" ProgressMeter = "1.7" Random = "1" RegressionFormulae = "0.1.3" diff --git a/test/pls.jl b/test/pls.jl index eeb7ca900..d5d3af870 100644 --- a/test/pls.jl +++ b/test/pls.jl @@ -264,7 +264,7 @@ end @test "Diagonal" in tokens lrt = likelihoodratiotest(models(:pastes)...) - @test only(lrt.tests.pvalues) ≈ 0.5233767965780878 atol = 0.0001 + @test last(lrt.pvalues) ≈ 0.5233767965780878 atol = 0.0001 @testset "missing variables in formula" begin ae = ArgumentError( From fa2119948ec671c2e2c8f69829db93184239b123 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 25 Aug 2025 16:55:00 +0200 Subject: [PATCH 04/13] oops --- test/likelihoodratiotest.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/likelihoodratiotest.jl b/test/likelihoodratiotest.jl index d0d29dd4b..8ccafc138 100644 --- a/test/likelihoodratiotest.jl +++ b/test/likelihoodratiotest.jl @@ -77,7 +77,7 @@ end lrt = likelihoodratiotest(fm0, fm1) @test (deviance(fm0), deviance(fm1)) == lrt.deviance - @test sprint(show, lrt) = "MixedModels.LikelihoodRatioTest{2}((\"reaction ~ 1 + (1 + days | subj)\", \"reaction ~ 1 + days + (1 + days | subj)\"), Likelihood-ratio test: 2 models fitted on 180 observations\n────────────────────────────────────────────────────────\n DOF ΔDOF LogLik Deviance Chisq p(>Chisq)\n────────────────────────────────────────────────────────\n[1] 5 -887.7379 1775.4759 \n[2] 6 1 -875.9697 1751.9393 23.5365 <1e-05\n────────────────────────────────────────────────────────, true)" + @test sprint(show, lrt) == "MixedModels.LikelihoodRatioTest{2}((\"reaction ~ 1 + (1 + days | subj)\", \"reaction ~ 1 + days + (1 + days | subj)\"), Likelihood-ratio test: 2 models fitted on 180 observations\n────────────────────────────────────────────────────────\n DOF ΔDOF LogLik Deviance Chisq p(>Chisq)\n────────────────────────────────────────────────────────\n[1] 5 -887.7379 1775.4759 \n[2] 6 1 -875.9697 1751.9393 23.5365 <1e-05\n────────────────────────────────────────────────────────, true)" @test last(lrt.pvalues) == pvalue(lrt) lrt = likelihoodratiotest(lm1, fm1) From 8d384109606ff0133c1a8dd7ea2df789a8ee8ec9 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 25 Aug 2025 16:55:57 +0200 Subject: [PATCH 05/13] remove xref --- src/likelihoodratiotest.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/likelihoodratiotest.jl b/src/likelihoodratiotest.jl index 329474a93..2295468f5 100644 --- a/src/likelihoodratiotest.jl +++ b/src/likelihoodratiotest.jl @@ -3,7 +3,7 @@ Result of `MixedModels.likelihoodratiotest`. -This struct wraps [`StatsModels.LRTestResult`](@ref) with a bit more metadata +This struct wraps `StatsModels.LRTestResult` with a bit more metadata to enable a few additional `show` methods. # Fields From a64e16544eeb92a9ff5356f8c9a869335d4ac654 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 25 Aug 2025 21:48:24 +0200 Subject: [PATCH 06/13] fix some tests --- src/likelihoodratiotest.jl | 13 ++--- test/likelihoodratiotest.jl | 100 ++++++++++++++++++------------------ 2 files changed, 57 insertions(+), 56 deletions(-) diff --git a/src/likelihoodratiotest.jl b/src/likelihoodratiotest.jl index 2295468f5..31c1652bc 100644 --- a/src/likelihoodratiotest.jl +++ b/src/likelihoodratiotest.jl @@ -84,6 +84,7 @@ end _formula(x::MixedModel) = string(formula(x)) +Base.show(io::IO, lrt::LikelihoodRatioTest) = show(io, MIME("text/plain"), lrt) function Base.show(io::IO, ::MIME"text/plain", lrt::LikelihoodRatioTest{N}) where {N} println(io, "Likelihood-ratio test: $N models fitted on $(lrt.nobs) observations") println(io, "Model Formulae") @@ -103,20 +104,20 @@ function Base.show(io::IO, ::MIME"text/plain", lrt::LikelihoodRatioTest{N}) wher outrows[1, :] = ["", "DoF", - "logLik", + "-2 logLik", "χ²", "χ²-dof", "P(>χ²)"] outrows[2, :] = ["[1]", @sprintf("%.0d", lrt.dof[1]), - @sprintf("%.4f", lrt.loglikelihood[1]), - " ", - " ", - " "] + @sprintf("%.4f", -2 * lrt.loglikelihood[1]), + "", + "", + ""] for i in 2:nr outrows[i+1, :] = ["[$i]", @sprintf("%.0d", lrt.dof[i]), - @sprintf("%.4f", lrt.loglikelihood[i]), + @sprintf("%.4f", -2 * lrt.loglikelihood[i]), @sprintf("%.4f", chisq[i-1]), @sprintf("%.0d", Δdf[i-1]), string(StatsBase.PValue(lrt.pval[i]))] diff --git a/test/likelihoodratiotest.jl b/test/likelihoodratiotest.jl index 8ccafc138..c34fced68 100644 --- a/test/likelihoodratiotest.jl +++ b/test/likelihoodratiotest.jl @@ -71,69 +71,69 @@ end lm0 = lm(@formula(reaction ~ 1), slp) lm1 = lm(@formula(reaction ~ 1 + days), slp) - # @test MixedModels._iscomparable(lm0, fm1) - @test !MixedModels._iscomparable(lm1, fm0) + @test MixedModels.isnested(lm0, fm1) + @test !MixedModels.isnested(lm1, fm0) lrt = likelihoodratiotest(fm0, fm1) @test (deviance(fm0), deviance(fm1)) == lrt.deviance - @test sprint(show, lrt) == "MixedModels.LikelihoodRatioTest{2}((\"reaction ~ 1 + (1 + days | subj)\", \"reaction ~ 1 + days + (1 + days | subj)\"), Likelihood-ratio test: 2 models fitted on 180 observations\n────────────────────────────────────────────────────────\n DOF ΔDOF LogLik Deviance Chisq p(>Chisq)\n────────────────────────────────────────────────────────\n[1] 5 -887.7379 1775.4759 \n[2] 6 1 -875.9697 1751.9393 23.5365 <1e-05\n────────────────────────────────────────────────────────, true)" + @test sprint(show, lrt) == "Likelihood-ratio test: 2 models fitted on 180 observations\nModel Formulae\n1: reaction ~ 1 + (1 + days | subj)\n2: reaction ~ 1 + days + (1 + days | subj)\n────────────────────────────────────────────\n DoF -2 logLik χ² χ²-dof P(>χ²)\n────────────────────────────────────────────\n[1] 5 1775.4759 \n[2] 6 1751.9393 23.5365 1 <1e-05\n────────────────────────────────────────────" @test last(lrt.pvalues) == pvalue(lrt) lrt = likelihoodratiotest(lm1, fm1) @test pvalue(lrt) ≈ 5.9e-32 atol=1e-16 - # shown = sprint(show, lrt) - # lrt = likelihoodratiotest(lm0, fm0, fm1) - # @test_throws ArgumentError pvalue(lrt) + lrt = likelihoodratiotest(lm0, fm0, fm1) + @test_throws ArgumentError pvalue(lrt) - # # non nested FE between non-mixed and mixed - # @test_throws ArgumentError likelihoodratiotest(lm1, fm0) + # non nested FE between non-mixed and mixed + @test_throws ArgumentError likelihoodratiotest(lm1, fm0) - # # mix of REML and ML - # fm0 = fit( - # MixedModel, - # @formula(reaction ~ 1 + (1 + days | subj)), - # slp; - # REML=true, - # progress=false, - # ) - # @test_throws ArgumentError likelihoodratiotest(fm0, fm1) - # @test_throws ArgumentError likelihoodratiotest(lm0, fm0) + # mix of REML and ML + fm0 = fit( + MixedModel, + @formula(reaction ~ 1 + (1 + days | subj)), + slp; + REML=true, + progress=false, + ) + @test_throws ArgumentError likelihoodratiotest(fm0, fm1) + @test_throws ArgumentError likelihoodratiotest(lm0, fm0) - # # differing FE with REML - # fm1 = fit( - # MixedModel, - # @formula(reaction ~ 1 + days + (1 + days | subj)), - # slp; - # REML=true, - # progress=false, - # ) + # differing FE with REML + fm1 = fit( + MixedModel, + @formula(reaction ~ 1 + days + (1 + days | subj)), + slp; + REML=true, + progress=false, + ) - # @test_throws ArgumentError likelihoodratiotest(fm0, fm1) - # contra = MixedModels.dataset(:contra) - # # glm doesn't like categorical responses, so we convert it to numeric ourselves - # # TODO: upstream fix - # cc = DataFrame(contra) - # cc.usenum = ifelse.(cc.use .== "Y", 1, 0) - # gmf = glm(@formula(usenum ~ 1 + age + urban + livch), cc, Bernoulli()) - # gmf2 = glm(@formula(usenum ~ 1 + age + abs2(age) + urban + livch), cc, Bernoulli()) - # gm0 = fit( - # MixedModel, - # @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), - # contra, - # Bernoulli(); - # fast=true, - # progress=false, - # ) - # gm1 = fit( - # MixedModel, - # @formula(use ~ 1 + age + abs2(age) + urban + livch + (1 | urban & dist)), - # contra, - # Bernoulli(); - # fast=true, - # progress=false, - # ) + @test_throws ArgumentError likelihoodratiotest(fm0, fm1) + + contra = MixedModels.dataset(:contra) + # glm doesn't like categorical responses, so we convert it to numeric ourselves + # TODO: upstream fix + cc = DataFrame(dataset(:contra)) + cc.usenum = ifelse.(cc.use .== "Y", 1, 0) + gmf = glm(@formula(usenum ~ 1 + age + urban + livch), cc, Bernoulli()) + gmf2 = glm(@formula(usenum ~ 1 + age + abs2(age) + urban + livch), cc, Bernoulli()) + gm0 = fit( + MixedModel, + @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), + dataset(:contra), + Bernoulli(); + fast=true, + progress=false, + ) + gm1 = fit( + MixedModel, + @formula(use ~ 1 + age + abs2(age) + urban + livch + (1 | urban & dist)), + dataset(:contra), + Bernoulli(); + fast=true, + progress=false, + ) # lrt = likelihoodratiotest(gmf, gm1) # @test [-2 * loglikelihood(gmf), deviance(gm1)] ≈ lrt.deviance From e6ad1184b3a6944db931fc182827ca3abdbb319b Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 25 Aug 2025 21:59:32 +0200 Subject: [PATCH 07/13] remaining tests --- src/likelihoodratiotest.jl | 12 +++-- test/likelihoodratiotest.jl | 95 +++++++++++++++---------------------- 2 files changed, 46 insertions(+), 61 deletions(-) diff --git a/src/likelihoodratiotest.jl b/src/likelihoodratiotest.jl index 31c1652bc..b4f8a7982 100644 --- a/src/likelihoodratiotest.jl +++ b/src/likelihoodratiotest.jl @@ -251,23 +251,27 @@ function StatsModels.isnested(m1::Union{TableRegressionModel{<:LinearModel},Line return true end -function StatsModels.isnested(m1::Union{TableRegressionModel{<:GeneralizedLinearModel},GeneralizedLinearModel}, m2::GeneralizedLinearMixedModel; atol::Real=0.0) +function StatsModels.isnested(m1::GeneralizedLinearModel, m2::GeneralizedLinearMixedModel; atol::Real=0.0) nobs(m1) == nobs(m2) || return false size(modelmatrix(m1), 2) <= size(modelmatrix(m2), 2) || return false _isnested(modelmatrix(m1), modelmatrix(m2)) || return false - d1 = m1 isa TableRegressionModel ? Distribution(m1.model) : Distribution(m1) - - d1 == Distribution(m2) || + Distribution(m1) == Distribution(m2) || throw(ArgumentError("Models must be fit to the same distribution")) + Link(m1) == Link(m2) || throw(ArgumentError("Models must have the same link function")) return true end +function StatsModels.isnested(m1::TableRegressionModel{<:GeneralizedLinearModel}, m2::GeneralizedLinearMixedModel; atol::Real=0.0) + return isnested(m1.model, m2; atol) +end + + ##### ##### Helper functions for isnested ##### diff --git a/test/likelihoodratiotest.jl b/test/likelihoodratiotest.jl index c34fced68..010482750 100644 --- a/test/likelihoodratiotest.jl +++ b/test/likelihoodratiotest.jl @@ -84,10 +84,10 @@ end @test pvalue(lrt) ≈ 5.9e-32 atol=1e-16 lrt = likelihoodratiotest(lm0, fm0, fm1) - @test_throws ArgumentError pvalue(lrt) + @suppress @test_throws ArgumentError pvalue(lrt) # non nested FE between non-mixed and mixed - @test_throws ArgumentError likelihoodratiotest(lm1, fm0) + @suppress @test_throws ArgumentError likelihoodratiotest(lm1, fm0) # mix of REML and ML fm0 = fit( @@ -97,8 +97,8 @@ end REML=true, progress=false, ) - @test_throws ArgumentError likelihoodratiotest(fm0, fm1) - @test_throws ArgumentError likelihoodratiotest(lm0, fm0) + @suppress @test_throws ArgumentError likelihoodratiotest(fm0, fm1) + @suppress @test_throws ArgumentError likelihoodratiotest(lm0, fm0) # differing FE with REML fm1 = fit( @@ -109,7 +109,7 @@ end progress=false, ) - @test_throws ArgumentError likelihoodratiotest(fm0, fm1) + @suppress @test_throws ArgumentError likelihoodratiotest(fm0, fm1) contra = MixedModels.dataset(:contra) # glm doesn't like categorical responses, so we convert it to numeric ourselves @@ -135,56 +135,37 @@ end progress=false, ) - # lrt = likelihoodratiotest(gmf, gm1) - # @test [-2 * loglikelihood(gmf), deviance(gm1)] ≈ lrt.deviance - # @test -2 * loglikelihood(gmf) - deviance(gm1) ≈ only(lrt.tests.deviancediff) - # shown = sprint(show, lrt) - # @test !occursin("-2 logLik", shown) - # @test occursin("deviance", shown) - - # lrt = likelihoodratiotest(gm0, gm1) - # @test [deviance(gm0), deviance(gm1)] == lrt.deviance - # @test deviance(gm0) - deviance(gm1) == only(lrt.tests.deviancediff) - # @test first(lrt.tests.dofdiff) == 1 - # @test sum(length, lrt.tests) == 3 - # @test sum(length, lrt.pvalues) == 1 - # @test sum(length, lrt.models) == 4 - # @test length(lrt.formulae) == 2 - - # # mismatched links - # gm_probit = fit( - # MixedModel, - # @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), - # contra, - # Bernoulli(), - # ProbitLink(); - # fast=true, - # progress=false, - # ) - # @test_throws ArgumentError likelihoodratiotest(gmf, gm_probit) - # @test_throws ArgumentError likelihoodratiotest(gm0, gm_probit) - - # # mismatched families - # gm_poisson = fit( - # MixedModel, - # @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), - # contra, - # Poisson(); - # fast=true, - # progress=false, - # ) - # @test_throws ArgumentError likelihoodratiotest(gmf, gm_poisson) - # @test_throws ArgumentError likelihoodratiotest(gm0, gm_poisson) - - # @test !MixedModels._iscomparable(lm0, gm0) - # @test !MixedModels._iscomparable(gmf, fm1) - - # @test MixedModels._iscomparable(gmf, gm0) - # @test !MixedModels._iscomparable(gmf2, gm0) - - # @test MixedModels._isnested(gmf.mm.m, gm0.X) - # @test !MixedModels._isnested(gmf2.mm.m, gm0.X) - # # this skips the linear term so that the model matrices - # # have the same column rank - # @test !MixedModels._isnested(gmf2.mm.m[:, Not(2)], gm0.X) + lrt = likelihoodratiotest(gmf, gm1) + @test 2 * only(diff(collect(lrt.loglikelihood))) ≈ 95.0725 atol=0.0001 + + lrt = likelihoodratiotest(gm0, gm1) + @test 2 * only(diff(collect(lrt.loglikelihood))) ≈ 38.0713 atol=0.0001 + + # mismatched links + gm_probit = fit( + MixedModel, + @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), + dataset(:contra), + Bernoulli(), + ProbitLink(); + fast=true, + progress=false, + ) + @suppress @test_throws ArgumentError likelihoodratiotest(gmf, gm_probit) + @suppress @test_throws ArgumentError likelihoodratiotest(gm0, gm_probit) + + # mismatched families + gm_poisson = fit( + MixedModel, + @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), + dataset(:contra), + Poisson(); + fast=true, + progress=false, + ) + @suppress @test_throws ArgumentError likelihoodratiotest(gmf, gm_poisson) + @suppress @test_throws ArgumentError likelihoodratiotest(gm0, gm_poisson) + # this skips the linear term so that the model matrices + # have the same column rank + @test !MixedModels._isnested(gmf2.mm.m[:, Not(2)], gm0.X) end From b663d7e8ccafc77d958b1cfb232e4f6f06a4365a Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 25 Aug 2025 22:09:08 +0200 Subject: [PATCH 08/13] NEWS tweak --- NEWS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NEWS.md b/NEWS.md index 662bf460b..3a2ad9269 100644 --- a/NEWS.md +++ b/NEWS.md @@ -673,3 +673,9 @@ Package dependencies [#840]: https://github.com/JuliaStats/MixedModels.jl/issues/840 [#841]: https://github.com/JuliaStats/MixedModels.jl/issues/841 [#842]: https://github.com/JuliaStats/MixedModels.jl/issues/842 +[#849]: https://github.com/JuliaStats/MixedModels.jl/issues/849 +[#850]: https://github.com/JuliaStats/MixedModels.jl/issues/850 +[#853]: https://github.com/JuliaStats/MixedModels.jl/issues/853 +[#854]: https://github.com/JuliaStats/MixedModels.jl/issues/854 +[#856]: https://github.com/JuliaStats/MixedModels.jl/issues/856 +[#858]: https://github.com/JuliaStats/MixedModels.jl/issues/858 From 9c682793682e8b73f3fa774973607e44ba0d439f Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 25 Aug 2025 23:30:13 +0200 Subject: [PATCH 09/13] improve test coverage --- src/likelihoodratiotest.jl | 79 +++++++++++++++++++------------------ test/likelihoodratiotest.jl | 7 ++++ 2 files changed, 47 insertions(+), 39 deletions(-) diff --git a/src/likelihoodratiotest.jl b/src/likelihoodratiotest.jl index b4f8a7982..25e57f81a 100644 --- a/src/likelihoodratiotest.jl +++ b/src/likelihoodratiotest.jl @@ -160,26 +160,15 @@ _diff(t::NTuple{N}) where {N} = ntuple(i->t[i+1]-t[i], N-1) # we only accept one non mixed GLM so that we can require a MixedModel and avoid piracy function StatsModels.lrtest(m0::GLM_TYPES, m::MixedModel...; atol::Real=0.0) mods = (m0, m...) - if length(mods) < 2 + length(mods) >= 2 || throw(ArgumentError("At least two models are needed to perform LR test")) - end df = dof.(mods) - forward = df[1] <= df[2] - if !all(==(nobs(mods[1])), nobs.(mods)) + all(==(nobs(mods[1])), nobs.(mods)) || throw(ArgumentError("LR test is only valid for models fitted on the same data, " * "but number of observations differ")) - end - if forward - for i in 2:length(mods) - if df[i-1] >= df[i] || !isnested(mods[i-1], mods[i]; atol=atol) - throw(ArgumentError("LR test is only valid for nested models")) - end - end - else - for i in 2:length(mods) - if df[i] >= df[i-1] || !isnested(mods[i], mods[i-1], atol=atol) - throw(ArgumentError("LR test is only valid for nested models")) - end + for i in 2:length(mods) + if df[i-1] >= df[i] || !isnested(mods[i-1], mods[i]; atol=atol) + throw(ArgumentError("LR test is only valid for nested models")) end end @@ -238,7 +227,11 @@ function StatsModels.isnested(m1::MixedModel, m2::MixedModel; atol::Real=0.0) return true end -function StatsModels.isnested(m1::Union{TableRegressionModel{<:LinearModel},LinearModel}, m2::LinearMixedModel; atol::Real=0.0) +function StatsModels.isnested(m1::TableRegressionModel{Union{<:GeneralizedLinearModel,<:LinearModel}}, m2::MixedModel; atol::Real=0.0) + return _iscomparable(m1.model, m2) && isnested(m1.model, m2; atol) +end + +function StatsModels.isnested(m1::LinearModel, m2::LinearMixedModel; atol::Real=0.0) nobs(m1) == nobs(m2) || return false size(modelmatrix(m1), 2) <= size(modelmatrix(m2), 2) || return false @@ -267,15 +260,17 @@ function StatsModels.isnested(m1::GeneralizedLinearModel, m2::GeneralizedLinearM return true end -function StatsModels.isnested(m1::TableRegressionModel{<:GeneralizedLinearModel}, m2::GeneralizedLinearMixedModel; atol::Real=0.0) - return isnested(m1.model, m2; atol) -end - ##### ##### Helper functions for isnested ##### +""" + _iscomparable(m::LinearMixedModel...) + + +Check whether LMMs are comparable on the basis of their REML criterion. +""" function _iscomparable(m::LinearMixedModel...) isconstant(getproperty.(getproperty.(m, :optsum), :REML)) || throw( ArgumentError( @@ -291,21 +286,15 @@ function _iscomparable(m::LinearMixedModel...) ) end - isconstant(nobs.(m)) || - throw(ArgumentError("Models must have the same number of observations")) - return true end -# XXX we need the where clause to distinguish from the general method -# but static analysis complains if we don't use the type parameter -function _samefamily( - ::GeneralizedLinearMixedModel{<:AbstractFloat,S}... -) where {S<:Distribution} - return true -end -_samefamily(::GeneralizedLinearMixedModel...) = false +""" + _iscomparable(m::GeneralizedLinearMixedModel...) + +Check whethere GLMMs are comparable in terms of their model families and links. +""" function _iscomparable(m::GeneralizedLinearMixedModel...) # TODO: test that all models are fit with same fast/nAGQ option? _samefamily(m...) || throw(ArgumentError("Models must be fit to the same distribution")) @@ -313,25 +302,37 @@ function _iscomparable(m::GeneralizedLinearMixedModel...) isconstant(string.(Link.(m))) || throw(ArgumentError("Models must have the same link function")) - isconstant(nobs.(m)) || - throw(ArgumentError("Models must have the same number of observations")) - return true end +""" + _iscomparable(m1::TableRegressionModel, m2::MixedModel)_samefamily( + ::GeneralizedLinearMixedModel + +Check whethere a TableRegressionModel and a MixedModel have coefficient names indicative of nesting. +""" function _iscomparable( m1::TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}, m2::MixedModel ) - _iscomparable(m1.model, m2) || return false - # check that the nested fixef are a subset of the outer all(in.(coefnames(m1), Ref(coefnames(m2)))) || return false return true end -# GLM isn't nested with in LMM and LM isn't nested within GLMM -_iscomparable(m1::Union{LinearModel,GeneralizedLinearModel}, m2::MixedModel) = false +""" + _samefamily(::GeneralizedLinearMixedModel...) + +Check whether all GLMMS come from the same model family. +""" +function _samefamily( + ::GeneralizedLinearMixedModel{<:AbstractFloat,S}... +) where {S<:Distribution} + # XXX we need the where clause to distinguish from the general method + # but static analysis complains if we don't use the type parameter + return true +end +_samefamily(::GeneralizedLinearMixedModel...) = false """ diff --git a/test/likelihoodratiotest.jl b/test/likelihoodratiotest.jl index 010482750..e6030f42b 100644 --- a/test/likelihoodratiotest.jl +++ b/test/likelihoodratiotest.jl @@ -75,6 +75,7 @@ end @test !MixedModels.isnested(lm1, fm0) lrt = likelihoodratiotest(fm0, fm1) + @test lrtest(fm0, fm1) == lrtest(lrt) @test (deviance(fm0), deviance(fm1)) == lrt.deviance @test sprint(show, lrt) == "Likelihood-ratio test: 2 models fitted on 180 observations\nModel Formulae\n1: reaction ~ 1 + (1 + days | subj)\n2: reaction ~ 1 + days + (1 + days | subj)\n────────────────────────────────────────────\n DoF -2 logLik χ² χ²-dof P(>χ²)\n────────────────────────────────────────────\n[1] 5 1775.4759 \n[2] 6 1751.9393 23.5365 1 <1e-05\n────────────────────────────────────────────" @@ -83,6 +84,10 @@ end lrt = likelihoodratiotest(lm1, fm1) @test pvalue(lrt) ≈ 5.9e-32 atol=1e-16 + lrt2 = likelihoodratiotest(lm1.model, fm1) + @test first(lrt2.formulas) == "NA" + @test lrtest(lrt2) == lrtest(lrt) + lrt = likelihoodratiotest(lm0, fm0, fm1) @suppress @test_throws ArgumentError pvalue(lrt) @@ -165,6 +170,8 @@ end ) @suppress @test_throws ArgumentError likelihoodratiotest(gmf, gm_poisson) @suppress @test_throws ArgumentError likelihoodratiotest(gm0, gm_poisson) + + @test !MixedModels._samefamily(gm0, gm_poisson) # this skips the linear term so that the model matrices # have the same column rank @test !MixedModels._isnested(gmf2.mm.m[:, Not(2)], gm0.X) From 88f48fbec933d6c160153e81bf521a7abdb5c016 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 25 Aug 2025 21:34:03 +0000 Subject: [PATCH 10/13] style Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/likelihoodratiotest.jl | 88 ++++++++++++++++++++++---------------- src/mimeshow.jl | 2 +- 2 files changed, 53 insertions(+), 37 deletions(-) diff --git a/src/likelihoodratiotest.jl b/src/likelihoodratiotest.jl index 25e57f81a..e82290d49 100644 --- a/src/likelihoodratiotest.jl +++ b/src/likelihoodratiotest.jl @@ -12,7 +12,7 @@ to enable a few additional `show` methods. - `linear::Bool` """ Base.@kwdef struct LikelihoodRatioTest{N} <: StatsAPI.HypothesisTest - formulas::NTuple{N, String} + formulas::NTuple{N,String} lrt::StatsModels.LRTestResult{N} linear::Bool end @@ -52,7 +52,11 @@ function Base.getproperty(lrt::LikelihoodRatioTest, s::Symbol) end end -const GLM_TYPES = Union{TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}, LinearModel, GeneralizedLinearModel} +const GLM_TYPES = Union{ + TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}, + LinearModel, + GeneralizedLinearModel, +} """ likelihoodratiotest(m::MixedModel...) @@ -71,10 +75,12 @@ may be incorrectly rejected as being non-negative. In such cases, it is incumben user to perform the likelihood ratio test manually and construct the resulting [`LikelihoodRatioTest`](@ref) object. """ -function likelihoodratiotest(m0::Union{GLM_TYPES, MixedModel}, +function likelihoodratiotest(m0::Union{GLM_TYPES,MixedModel}, m::MixedModel...; kwargs...) formulas = (_formula(m0), _formula.(m)...) - return LikelihoodRatioTest(formulas, lrtest(m0, m...; kwargs...), first(m) isa LinearMixedModel) + return LikelihoodRatioTest( + formulas, lrtest(m0, m...; kwargs...), first(m) isa LinearMixedModel + ) end _formula(::Union{LinearModel,GeneralizedLinearModel}) = "NA" @@ -83,7 +89,6 @@ function _formula(x::TableRegressionModel{<:Union{LinearModel,GeneralizedLinearM end _formula(x::MixedModel) = string(formula(x)) - Base.show(io::IO, lrt::LikelihoodRatioTest) = show(io, MIME("text/plain"), lrt) function Base.show(io::IO, ::MIME"text/plain", lrt::LikelihoodRatioTest{N}) where {N} println(io, "Likelihood-ratio test: $N models fitted on $(lrt.nobs) observations") @@ -103,24 +108,24 @@ function Base.show(io::IO, ::MIME"text/plain", lrt::LikelihoodRatioTest{N}) wher outrows = Matrix{String}(undef, nr + 1, nc) outrows[1, :] = ["", - "DoF", - "-2 logLik", - "χ²", - "χ²-dof", "P(>χ²)"] + "DoF", + "-2 logLik", + "χ²", + "χ²-dof", "P(>χ²)"] outrows[2, :] = ["[1]", - @sprintf("%.0d", lrt.dof[1]), - @sprintf("%.4f", -2 * lrt.loglikelihood[1]), - "", - "", - ""] + @sprintf("%.0d", lrt.dof[1]), + @sprintf("%.4f", -2 * lrt.loglikelihood[1]), + "", + "", + ""] for i in 2:nr - outrows[i+1, :] = ["[$i]", - @sprintf("%.0d", lrt.dof[i]), - @sprintf("%.4f", -2 * lrt.loglikelihood[i]), - @sprintf("%.4f", chisq[i-1]), - @sprintf("%.0d", Δdf[i-1]), - string(StatsBase.PValue(lrt.pval[i]))] + outrows[i + 1, :] = ["[$i]", + @sprintf("%.0d", lrt.dof[i]), + @sprintf("%.4f", -2 * lrt.loglikelihood[i]), + @sprintf("%.4f", chisq[i - 1]), + @sprintf("%.0d", Δdf[i - 1]), + string(StatsBase.PValue(lrt.pval[i]))] end colwidths = length.(outrows) max_colwidths = [maximum(view(colwidths, :, i)) for i in 1:nc] @@ -153,7 +158,7 @@ end ##### StatsModels methods for lrtest and isnested ##### -_diff(t::NTuple{N}) where {N} = ntuple(i->t[i+1]-t[i], N-1) +_diff(t::NTuple{N}) where {N} = ntuple(i -> t[i + 1] - t[i], N - 1) # adapted from StatsModels with the type check elided # https://github.com/JuliaStats/StatsModels.jl/blob/ad89c6755066511e99148f013e81437e29459ed2/src/lrtest.jl#L74-L128 @@ -164,10 +169,14 @@ function StatsModels.lrtest(m0::GLM_TYPES, m::MixedModel...; atol::Real=0.0) throw(ArgumentError("At least two models are needed to perform LR test")) df = dof.(mods) all(==(nobs(mods[1])), nobs.(mods)) || - throw(ArgumentError("LR test is only valid for models fitted on the same data, " * - "but number of observations differ")) + throw( + ArgumentError( + "LR test is only valid for models fitted on the same data, " * + "but number of observations differ", + ), + ) for i in 2:length(mods) - if df[i-1] >= df[i] || !isnested(mods[i-1], mods[i]; atol=atol) + if df[i - 1] >= df[i] || !isnested(mods[i - 1], mods[i]; atol=atol) throw(ArgumentError("LR test is only valid for nested models")) end end @@ -181,11 +190,15 @@ function StatsModels.lrtest(m0::GLM_TYPES, m::MixedModel...; atol::Real=0.0) chisq = (NaN, 2 .* abs.(_diff(ll))...) for i in 2:length(ll) - if ((forward && ll[i-1] > ll[i]) || - (!forward && ll[i-1] < ll[i])) && - !isapprox(ll[i-1], ll[i], atol=atol) - throw(ArgumentError("Log-likelihood must not be lower " * - "in models with more degrees of freedom")) + if ((forward && ll[i - 1] > ll[i]) || + (!forward && ll[i - 1] < ll[i])) && + !isapprox(ll[i - 1], ll[i]; atol=atol) + throw( + ArgumentError( + "Log-likelihood must not be lower " * + "in models with more degrees of freedom", + ), + ) end end @@ -227,7 +240,11 @@ function StatsModels.isnested(m1::MixedModel, m2::MixedModel; atol::Real=0.0) return true end -function StatsModels.isnested(m1::TableRegressionModel{Union{<:GeneralizedLinearModel,<:LinearModel}}, m2::MixedModel; atol::Real=0.0) +function StatsModels.isnested( + m1::TableRegressionModel{Union{<:GeneralizedLinearModel,<:LinearModel}}, + m2::MixedModel; + atol::Real=0.0, +) return _iscomparable(m1.model, m2) && isnested(m1.model, m2; atol) end @@ -244,7 +261,9 @@ function StatsModels.isnested(m1::LinearModel, m2::LinearMixedModel; atol::Real= return true end -function StatsModels.isnested(m1::GeneralizedLinearModel, m2::GeneralizedLinearMixedModel; atol::Real=0.0) +function StatsModels.isnested( + m1::GeneralizedLinearModel, m2::GeneralizedLinearMixedModel; atol::Real=0.0 +) nobs(m1) == nobs(m2) || return false size(modelmatrix(m1), 2) <= size(modelmatrix(m2), 2) || return false @@ -254,13 +273,11 @@ function StatsModels.isnested(m1::GeneralizedLinearModel, m2::GeneralizedLinearM Distribution(m1) == Distribution(m2) || throw(ArgumentError("Models must be fit to the same distribution")) - Link(m1) == Link(m2) || throw(ArgumentError("Models must have the same link function")) return true end - ##### ##### Helper functions for isnested ##### @@ -293,7 +310,7 @@ end _iscomparable(m::GeneralizedLinearMixedModel...) -Check whethere GLMMs are comparable in terms of their model families and links. +Check whether GLMMs are comparable in terms of their model families and links. """ function _iscomparable(m::GeneralizedLinearMixedModel...) # TODO: test that all models are fit with same fast/nAGQ option? @@ -309,7 +326,7 @@ end _iscomparable(m1::TableRegressionModel, m2::MixedModel)_samefamily( ::GeneralizedLinearMixedModel -Check whethere a TableRegressionModel and a MixedModel have coefficient names indicative of nesting. +Check whether a TableRegressionModel and a MixedModel have coefficient names indicative of nesting. """ function _iscomparable( m1::TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}, m2::MixedModel @@ -334,7 +351,6 @@ function _samefamily( end _samefamily(::GeneralizedLinearMixedModel...) = false - """ _isnested(x::AbstractMatrix, y::AbstractMatrix; atol::Real=0.0) diff --git a/src/mimeshow.jl b/src/mimeshow.jl index 47983eba9..981a53823 100644 --- a/src/mimeshow.jl +++ b/src/mimeshow.jl @@ -75,7 +75,7 @@ function _markdown(lrt::LikelihoodRatioTest{N}) where {N} nr = N outrows = Vector{Vector{String}}(undef, nr + 1) - outrows[1] = ["", "model-dof", "-2 logLik","χ²", "χ²-dof", "P(>χ²)"] # colnms + outrows[1] = ["", "model-dof", "-2 logLik", "χ²", "χ²-dof", "P(>χ²)"] # colnms outrows[2] = [ string(lrt.formulas[1]), From fe0aae3dd7cd300216224a28b3f2df5eea89a744 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Mon, 25 Aug 2025 23:51:33 +0200 Subject: [PATCH 11/13] oops --- src/likelihoodratiotest.jl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/likelihoodratiotest.jl b/src/likelihoodratiotest.jl index e82290d49..8f3794a14 100644 --- a/src/likelihoodratiotest.jl +++ b/src/likelihoodratiotest.jl @@ -163,6 +163,8 @@ _diff(t::NTuple{N}) where {N} = ntuple(i -> t[i + 1] - t[i], N - 1) # adapted from StatsModels with the type check elided # https://github.com/JuliaStats/StatsModels.jl/blob/ad89c6755066511e99148f013e81437e29459ed2/src/lrtest.jl#L74-L128 # we only accept one non mixed GLM so that we can require a MixedModel and avoid piracy +# we always do forward selection for GLM/GLMM comparisons +# because we need the GLM first for dispatch reasons function StatsModels.lrtest(m0::GLM_TYPES, m::MixedModel...; atol::Real=0.0) mods = (m0, m...) length(mods) >= 2 || @@ -190,16 +192,13 @@ function StatsModels.lrtest(m0::GLM_TYPES, m::MixedModel...; atol::Real=0.0) chisq = (NaN, 2 .* abs.(_diff(ll))...) for i in 2:length(ll) - if ((forward && ll[i - 1] > ll[i]) || - (!forward && ll[i - 1] < ll[i])) && - !isapprox(ll[i - 1], ll[i]; atol=atol) + ll[i - 1] > ll[i] && !isapprox(ll[i - 1], ll[i]; atol=atol) && throw( ArgumentError( "Log-likelihood must not be lower " * "in models with more degrees of freedom", ), ) - end end pval = chisqccdf.(abs.(Δdf), chisq) @@ -241,11 +240,11 @@ function StatsModels.isnested(m1::MixedModel, m2::MixedModel; atol::Real=0.0) end function StatsModels.isnested( - m1::TableRegressionModel{Union{<:GeneralizedLinearModel,<:LinearModel}}, + m1::TableRegressionModel{<:Union{<:GeneralizedLinearModel,<:LinearModel}}, m2::MixedModel; atol::Real=0.0, ) - return _iscomparable(m1.model, m2) && isnested(m1.model, m2; atol) + return _iscomparable(m1, m2) && isnested(m1.model, m2; atol) end function StatsModels.isnested(m1::LinearModel, m2::LinearMixedModel; atol::Real=0.0) @@ -256,7 +255,7 @@ function StatsModels.isnested(m1::LinearModel, m2::LinearMixedModel; atol::Real= _isnested(modelmatrix(m1), modelmatrix(m2)) || return false !m2.optsum.REML || - throw(ArgumentError("REML-fitted models cannot be compared to linear models")) + throw(ArgumentError("REML-fitted modMixedModels.isnested(lm0, fm1)els cannot be compared to linear models")) return true end From 152ee2405cd6263163d1fd3bd40745a425ecd955 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 26 Aug 2025 04:41:20 +0000 Subject: [PATCH 12/13] style Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/likelihoodratiotest.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/likelihoodratiotest.jl b/src/likelihoodratiotest.jl index 8f3794a14..ee2dd94bc 100644 --- a/src/likelihoodratiotest.jl +++ b/src/likelihoodratiotest.jl @@ -255,7 +255,11 @@ function StatsModels.isnested(m1::LinearModel, m2::LinearMixedModel; atol::Real= _isnested(modelmatrix(m1), modelmatrix(m2)) || return false !m2.optsum.REML || - throw(ArgumentError("REML-fitted modMixedModels.isnested(lm0, fm1)els cannot be compared to linear models")) + throw( + ArgumentError( + "REML-fitted modMixedModels.isnested(lm0, fm1)els cannot be compared to linear models", + ), + ) return true end From 54b2546eebaf7a6aa6d59e7e37ca1191f7c13fae Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 26 Aug 2025 04:42:24 +0000 Subject: [PATCH 13/13] Update src/likelihoodratiotest.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/likelihoodratiotest.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/likelihoodratiotest.jl b/src/likelihoodratiotest.jl index ee2dd94bc..4a74cc541 100644 --- a/src/likelihoodratiotest.jl +++ b/src/likelihoodratiotest.jl @@ -257,7 +257,7 @@ function StatsModels.isnested(m1::LinearModel, m2::LinearMixedModel; atol::Real= !m2.optsum.REML || throw( ArgumentError( - "REML-fitted modMixedModels.isnested(lm0, fm1)els cannot be compared to linear models", + "REML-fitted modMixedModels.isnested(lm0, fm1)els cannot be compared to linear models" ), )