diff --git a/NEWS.md b/NEWS.md index 578a33141..3a2ad9269 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,8 @@ MixedModels v5.0.0 Release Notes - The `prfit!` convenience function has been removed. [#853] - The `dataset` and `datasets` functions have been removed. They are now housed in `MixedModelsDatasets`.[#854] - One argument `predict(::GeneralizedLinearMixedModel)`, i.e. prediction on the original data, now supports the `type` keyword argument. [#856] +- `isnested(A::ReMat, B::ReMat)` is now a method of `StatsModels.isnested`.[#858] +- [BREAKING ]`likelihoodratiotest` has been reworked to be a thin wrapper around `StatsModels.lrtest`. The historical difference in behavior in terms of nesting checks created some confusion. Users advanced enough to create models with non-obvious nesting are assumed to be advanced enough to manually compute the likelihood ratio test. The function `likelihoodratiotest` and associated `LikelihoodRatioTest` type (now with a type parameter for number of models) has been kept to enable printing of test results with model formulae. Most users should not notice a difference in behavior, but the display has been slightly changed and the internal field structure has changed.[#858] MixedModels v4.38.0 Release Notes ============================== @@ -671,3 +673,9 @@ Package dependencies [#840]: https://github.com/JuliaStats/MixedModels.jl/issues/840 [#841]: https://github.com/JuliaStats/MixedModels.jl/issues/841 [#842]: https://github.com/JuliaStats/MixedModels.jl/issues/842 +[#849]: https://github.com/JuliaStats/MixedModels.jl/issues/849 +[#850]: https://github.com/JuliaStats/MixedModels.jl/issues/850 +[#853]: https://github.com/JuliaStats/MixedModels.jl/issues/853 +[#854]: https://github.com/JuliaStats/MixedModels.jl/issues/854 +[#856]: https://github.com/JuliaStats/MixedModels.jl/issues/856 +[#858]: https://github.com/JuliaStats/MixedModels.jl/issues/858 diff --git a/Project.toml b/Project.toml index 02dcd04cc..8d2c8229a 100644 --- a/Project.toml +++ b/Project.toml @@ -17,6 +17,7 @@ MixedModelsDatasets = "7e9fb7ac-9f67-43bf-b2c8-96ba0796cbb6" NLopt = "76087f3c-5699-56af-9a33-bf431cd00edd" PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" @@ -61,6 +62,7 @@ NLopt = "0.6, 1" PRIMA = "0.2" PooledArrays = "0.5, 1" PrecompileTools = "1" +Printf = "1" ProgressMeter = "1.7" Random = "1" RegressionFormulae = "0.1.3" diff --git a/src/MixedModels.jl b/src/MixedModels.jl index 111c1e9ba..eda2bf3d1 100644 --- a/src/MixedModels.jl +++ b/src/MixedModels.jl @@ -23,6 +23,7 @@ using MixedModelsDatasets: dataset using PooledArrays: PooledArrays, PooledArray using NLopt: NLopt using PrecompileTools: PrecompileTools, @setup_workload, @compile_workload +using Printf: @sprintf using ProgressMeter: ProgressMeter, Progress, finish!, next! using Random: Random, AbstractRNG, randn! using SparseArrays: SparseArrays, SparseMatrixCSC, SparseVector, dropzeros!, nnz @@ -36,12 +37,12 @@ using StatsAPI: loglikelihood, meanresponse, modelmatrix, nobs, pvalue, predict, r2, residuals using StatsAPI: response, responsename, stderror, vcov, weights using StatsBase: StatsBase, CoefTable, model_response, summarystats -using StatsFuns: log2π, normccdf +using StatsFuns: chisqccdf, log2π, normccdf using StatsModels: StatsModels, AbstractContrasts, AbstractTerm, CategoricalTerm using StatsModels: ConstantTerm, DummyCoding, EffectsCoding, FormulaTerm, FunctionTerm using StatsModels: HelmertCoding, HypothesisCoding, InteractionTerm, InterceptTerm using StatsModels: MatrixTerm, SeqDiffCoding, TableRegressionModel -using StatsModels: apply_schema, drop_term, formula, lrtest, modelcols, @formula +using StatsModels: apply_schema, drop_term, formula, lrtest, modelcols, isnested, @formula using StructTypes: StructTypes using Tables: Tables, columntable using TypedTables: TypedTables, DictTable, FlexTable, Table diff --git a/src/likelihoodratiotest.jl b/src/likelihoodratiotest.jl index f0be3447e..4a74cc541 100644 --- a/src/likelihoodratiotest.jl +++ b/src/likelihoodratiotest.jl @@ -1,31 +1,22 @@ """ - LikelihoodRatioTest + LikelihoodRatioTest{N} -Results of MixedModels.likelihoodratiotest +Result of `MixedModels.likelihoodratiotest`. -## Fields -* `formulas`: Vector of model formulae -* `models`: NamedTuple of the `dof` and `deviance` of the models -* `tests`: NamedTuple of the sequential `dofdiff`, `deviancediff`, - and resulting `pvalues` - -## Properties -* `deviance` : note that this is actually -2 log likelihood for linear models - (i.e. without subtracting the constant for a saturated model) -* `pvalues` +This struct wraps `StatsModels.LRTestResult` with a bit more metadata +to enable a few additional `show` methods. +# Fields +- `formulas::NTuple{N, String}` +- `lrt::StatsModels.LRTestResult{N}` +- `linear::Bool` """ -struct LikelihoodRatioTest <: StatsAPI.HypothesisTest - formulas::AbstractVector{String} - models::NamedTuple{(:dof, :deviance)} - tests::NamedTuple{(:dofdiff, :deviancediff, :pvalues)} +Base.@kwdef struct LikelihoodRatioTest{N} <: StatsAPI.HypothesisTest + formulas::NTuple{N,String} + lrt::StatsModels.LRTestResult{N} linear::Bool end -function Base.propertynames(lrt::LikelihoodRatioTest, private::Bool=false) - return (:deviance, :formulas, :models, :pvalues, :tests) -end - """ pvalue(lrt::LikelihoodRatioTest) @@ -38,7 +29,7 @@ To get p-values for multiple tests, use `lrt.pvalues`. """ function StatsAPI.pvalue(lrt::LikelihoodRatioTest) pvalues = lrt.pvalues - if length(pvalues) > 1 + if length(pvalues) > 2 throw( ArgumentError( "Cannot extract **only one** p-value from a multiple test result." @@ -46,25 +37,26 @@ function StatsAPI.pvalue(lrt::LikelihoodRatioTest) ) end - return only(pvalues) + return last(pvalues) end +StatsModels.lrtest(x::LikelihoodRatioTest) = x.lrt + function Base.getproperty(lrt::LikelihoodRatioTest, s::Symbol) - if s == :dof - lrt.models.dof - elseif s == :deviance - lrt.models.deviance + if s in fieldnames(LikelihoodRatioTest) + getfield(lrt, s) elseif s == :pvalues - lrt.tests.pvalues - elseif s == :formulae - lrt.formulas + lrt.pval else - getfield(lrt, s) + getproperty(lrt.lrt, s) end end -# backward syntactic but not type compatibility -Base.getindex(lrt::LikelihoodRatioTest, s::Symbol) = getfield(lrt, s) +const GLM_TYPES = Union{ + TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}, + LinearModel, + GeneralizedLinearModel, +} """ likelihoodratiotest(m::MixedModel...) @@ -76,47 +68,18 @@ Base.getindex(lrt::LikelihoodRatioTest, s::Symbol) = getfield(lrt, s) Likeihood ratio test applied to a set of nested models. -!!! note - The nesting of the models is not checked. It is incumbent on the user - to check this. This differs from `StatsModels.lrtest` as nesting in - mixed models, especially in the random effects specification, may be non obvious. - -!!! note - For comparisons between mixed and non-mixed models, the deviance for the non-mixed - model is taken to be -2 log likelihood, i.e. omitting the additive constant for the - fully saturated model. This is in line with the computation of the deviance for mixed - models. - -This functionality may be deprecated in the future in favor of `StatsModels.lrtest`. +This function is wrapper around `StatsModels.lrtest` that provides improved `show` methods +that include the model formulae, where available. For mixed models, the [`isnested`](@ref) +functionality may be overly conservative. For example, nested models with "clever" parameterizations +may be incorrectly rejected as being non-negative. In such cases, it is incumbent on the +user to perform the likelihood ratio test manually and construct the resulting [`LikelihoodRatioTest`](@ref) +object. """ -function likelihoodratiotest(m::MixedModel...) - _iscomparable(m...) || - throw(ArgumentError("""Models are not comparable: are the objectives, data - and, where appropriate, the link and family the same? - """)) - - m = collect(m) # change the tuple to an array - dofs = dof.(m) - formulas = String.(Symbol.(getproperty.(m, :formula))) - ord = sortperm(dofs) - dofs = dofs[ord] - formulas = formulas[ord] - devs = objective.(m)[ord] - dofdiffs = diff(dofs) - devdiffs = .-(diff(devs)) - pvals = map(zip(dofdiffs, devdiffs)) do (dof, dev) - if dev > 0 - ccdf(Chisq(dof), dev) - else - NaN - end - end - +function likelihoodratiotest(m0::Union{GLM_TYPES,MixedModel}, + m::MixedModel...; kwargs...) + formulas = (_formula(m0), _formula.(m)...) return LikelihoodRatioTest( - formulas, - (dof=dofs, deviance=devs), - (dofdiff=dofdiffs, deviancediff=devdiffs, pvalues=pvals), - first(m) isa LinearMixedModel, + formulas, lrtest(m0, m...; kwargs...), first(m) isa LinearMixedModel ) end @@ -124,84 +87,45 @@ _formula(::Union{LinearModel,GeneralizedLinearModel}) = "NA" function _formula(x::TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}) return String(Symbol(x.mf.f)) end +_formula(x::MixedModel) = string(formula(x)) -# for GLMMs we're actually looking at the deviance and additive constants are comparable -# (because GLM deviance is actually part of the GLMM deviance computation) -# for LMMs, we're always looking at the "deviance scale" but without the additive constant -# for the fully saturated model -function _criterion( - x::Union{GeneralizedLinearModel,TableRegressionModel{<:GeneralizedLinearModel}} -) - return deviance(x) -end -function _criterion(x::Union{LinearModel,TableRegressionModel{<:LinearModel}}) - return -2 * loglikelihood(x) -end - -function likelihoodratiotest( - m0::Union{ - TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}, - LinearModel, - GeneralizedLinearModel, - }, - m::MixedModel..., -) - _iscomparable(m0, first(m)) || - throw(ArgumentError("""Models are not comparable: are the objectives, data - and, where appropriate, the link and family the same? - """)) - lrt = likelihoodratiotest(m...) - devs = pushfirst!(lrt.deviance, _criterion(m0)) - formulas = pushfirst!(lrt.formulas, _formula(m0)) - dofs = pushfirst!(lrt.models.dof, dof(m0)) - devdiffs = pushfirst!(lrt.tests.deviancediff, devs[1] - devs[2]) - dofdiffs = pushfirst!(lrt.tests.dofdiff, dofs[2] - dofs[1]) - - df, dev = first(dofdiffs), first(devdiffs) - p = dev > 0 ? ccdf(Chisq(df), dev) : NaN - pvals = pushfirst!(lrt.tests.pvalues, p) - - return LikelihoodRatioTest( - formulas, - (dof=dofs, deviance=devs), - (dofdiff=dofdiffs, deviancediff=devdiffs, pvalues=pvals), - lrt.linear, - ) -end - -function Base.show(io::IO, ::MIME"text/plain", lrt::LikelihoodRatioTest) +Base.show(io::IO, lrt::LikelihoodRatioTest) = show(io, MIME("text/plain"), lrt) +function Base.show(io::IO, ::MIME"text/plain", lrt::LikelihoodRatioTest{N}) where {N} + println(io, "Likelihood-ratio test: $N models fitted on $(lrt.nobs) observations") println(io, "Model Formulae") - for (i, f) in enumerate(lrt.formulas) println(io, "$i: $f") end - # the following was adapted from StatsModels#162 - # from nalimilan - Δdf = lrt.tests.dofdiff - Δdev = lrt.tests.deviancediff + # adapted from https://github.com/JuliaStats/StatsModels.jl/blob/ad89c6755066511e99148f013e81437e29459ed2/src/lrtest.jl#L130-L178 + + Δdf = _diff(lrt.dof) + chisq = abs.(2 .* _diff(lrt.loglikelihood)) + linear = lrt.linear nc = 6 - nr = length(lrt.formulas) + nr = N outrows = Matrix{String}(undef, nr + 1, nc) - outrows[1, :] = [ - "", "model-dof", lrt.linear ? "-2 logLik" : "deviance", "χ²", "χ²-dof", "P(>χ²)" - ] # colnms - - outrows[2, :] = [ - "[1]", string(lrt.dof[1]), Ryu.writefixed(lrt.deviance[1], 4), " ", " ", " " - ] + outrows[1, :] = ["", + "DoF", + "-2 logLik", + "χ²", + "χ²-dof", "P(>χ²)"] + outrows[2, :] = ["[1]", + @sprintf("%.0d", lrt.dof[1]), + @sprintf("%.4f", -2 * lrt.loglikelihood[1]), + "", + "", + ""] for i in 2:nr - outrows[i + 1, :] = [ - "[$i]", - string(lrt.dof[i]), - Ryu.writefixed(lrt.deviance[i], 4), - Ryu.writefixed(Δdev[i - 1], 4), - string(Δdf[i - 1]), - string(StatsBase.PValue(lrt.pvalues[i - 1])), - ] + outrows[i + 1, :] = ["[$i]", + @sprintf("%.0d", lrt.dof[i]), + @sprintf("%.4f", -2 * lrt.loglikelihood[i]), + @sprintf("%.4f", chisq[i - 1]), + @sprintf("%.0d", Δdf[i - 1]), + string(StatsBase.PValue(lrt.pval[i]))] end colwidths = length.(outrows) max_colwidths = [maximum(view(colwidths, :, i)) for i in 1:nc] @@ -230,58 +154,63 @@ function Base.show(io::IO, ::MIME"text/plain", lrt::LikelihoodRatioTest) return nothing end -Base.show(io::IO, lrt::LikelihoodRatioTest) = Base.show(io, MIME("text/plain"), lrt) - -function _iscomparable(m::LinearMixedModel...) - isconstant(getproperty.(getproperty.(m, :optsum), :REML)) || throw( - ArgumentError( - "Models must all be fit with the same objective (i.e. all ML or all REML)" - ), - ) - - if any(getproperty.(getproperty.(m, :optsum), :REML)) - isconstant(coefnames.(m)) || throw( +##### +##### StatsModels methods for lrtest and isnested +##### + +_diff(t::NTuple{N}) where {N} = ntuple(i -> t[i + 1] - t[i], N - 1) + +# adapted from StatsModels with the type check elided +# https://github.com/JuliaStats/StatsModels.jl/blob/ad89c6755066511e99148f013e81437e29459ed2/src/lrtest.jl#L74-L128 +# we only accept one non mixed GLM so that we can require a MixedModel and avoid piracy +# we always do forward selection for GLM/GLMM comparisons +# because we need the GLM first for dispatch reasons +function StatsModels.lrtest(m0::GLM_TYPES, m::MixedModel...; atol::Real=0.0) + mods = (m0, m...) + length(mods) >= 2 || + throw(ArgumentError("At least two models are needed to perform LR test")) + df = dof.(mods) + all(==(nobs(mods[1])), nobs.(mods)) || + throw( ArgumentError( - "Likelihood-ratio tests for REML-fitted models are only valid when the fixed-effects specifications are identical" + "LR test is only valid for models fitted on the same data, " * + "but number of observations differ", ), ) + for i in 2:length(mods) + if df[i - 1] >= df[i] || !isnested(mods[i - 1], mods[i]; atol=atol) + throw(ArgumentError("LR test is only valid for nested models")) + end end - isconstant(nobs.(m)) || - throw(ArgumentError("Models must have the same number of observations")) + dev = deviance.(mods) - return true -end + Δdf = (NaN, _diff(df)...) + dfr = Int.(dof_residual.(mods)) -# XXX we need the where clause to distinguish from the general method -# but static analysis complains if we don't use the type parameter -function _samefamily( - ::GeneralizedLinearMixedModel{<:AbstractFloat,S}... -) where {S<:Distribution} - return true -end -_samefamily(::GeneralizedLinearMixedModel...) = false + ll = loglikelihood.(mods) + chisq = (NaN, 2 .* abs.(_diff(ll))...) -function _iscomparable(m::GeneralizedLinearMixedModel...) - # TODO: test that all models are fit with same fast/nAGQ option? - _samefamily(m...) || throw(ArgumentError("Models must be fit to the same distribution")) - - isconstant(string.(Link.(m))) || - throw(ArgumentError("Models must have the same link function")) - - isconstant(nobs.(m)) || - throw(ArgumentError("Models must have the same number of observations")) + for i in 2:length(ll) + ll[i - 1] > ll[i] && !isapprox(ll[i - 1], ll[i]; atol=atol) && + throw( + ArgumentError( + "Log-likelihood must not be lower " * + "in models with more degrees of freedom", + ), + ) + end - return true + pval = chisqccdf.(abs.(Δdf), chisq) + return StatsModels.LRTestResult(Int(nobs(mods[1])), dev, ll, df, pval) end """ isnested(m1::MixedModel, m2::MixedModel; atol::Real=0.0) -Indicate whether model `m1` is nested in model `m2`, i.e. whether -`m1` can be obtained by constraining some parameters in `m2`. -Both models must have been fitted on the same data. This check -is conservative for `MixedModel`s and may reject nested models with different -parameterizations as being non nested. + +Indicate whether model `m1` is nested in model `m2`, i.e. whether `m1` can be obtained by constraining some parameters in `m2`. +Both models must have been fitted on the same data. +This check is conservative for `MixedModel`s and may reject nested models with different parameterizations as being non nested. """ function StatsModels.isnested(m1::MixedModel, m2::MixedModel; atol::Real=0.0) try @@ -310,35 +239,34 @@ function StatsModels.isnested(m1::MixedModel, m2::MixedModel; atol::Real=0.0) return true end -function _iscomparable( - m1::TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}, m2::MixedModel +function StatsModels.isnested( + m1::TableRegressionModel{<:Union{<:GeneralizedLinearModel,<:LinearModel}}, + m2::MixedModel; + atol::Real=0.0, ) - _iscomparable(m1.model, m2) || return false - - # check that the nested fixef are a subset of the outer - all(in.(coefnames(m1), Ref(coefnames(m2)))) || return false - - return true + return _iscomparable(m1, m2) && isnested(m1.model, m2; atol) end -# GLM isn't nested with in LMM and LM isn't nested within GLMM -_iscomparable(m1::Union{LinearModel,GeneralizedLinearModel}, m2::MixedModel) = false - -function _iscomparable(m1::LinearModel, m2::LinearMixedModel) +function StatsModels.isnested(m1::LinearModel, m2::LinearMixedModel; atol::Real=0.0) nobs(m1) == nobs(m2) || return false - # XXX This reaches into the internal structure of GLM - size(m1.pp.X, 2) <= size(m2.X, 2) || return false + size(modelmatrix(m1), 2) <= size(modelmatrix(m2), 2) || return false - _isnested(m1.pp.X, m2.X) || return false + _isnested(modelmatrix(m1), modelmatrix(m2)) || return false !m2.optsum.REML || - throw(ArgumentError("REML-fitted models cannot be compared to linear models")) + throw( + ArgumentError( + "REML-fitted modMixedModels.isnested(lm0, fm1)els cannot be compared to linear models" + ), + ) return true end -function _iscomparable(m1::GeneralizedLinearModel, m2::GeneralizedLinearMixedModel) +function StatsModels.isnested( + m1::GeneralizedLinearModel, m2::GeneralizedLinearMixedModel; atol::Real=0.0 +) nobs(m1) == nobs(m2) || return false size(modelmatrix(m1), 2) <= size(modelmatrix(m2), 2) || return false @@ -353,6 +281,79 @@ function _iscomparable(m1::GeneralizedLinearModel, m2::GeneralizedLinearMixedMod return true end +##### +##### Helper functions for isnested +##### + +""" + _iscomparable(m::LinearMixedModel...) + + +Check whether LMMs are comparable on the basis of their REML criterion. +""" +function _iscomparable(m::LinearMixedModel...) + isconstant(getproperty.(getproperty.(m, :optsum), :REML)) || throw( + ArgumentError( + "Models must all be fit with the same objective (i.e. all ML or all REML)" + ), + ) + + if any(getproperty.(getproperty.(m, :optsum), :REML)) + isconstant(coefnames.(m)) || throw( + ArgumentError( + "Likelihood-ratio tests for REML-fitted models are only valid when the fixed-effects specifications are identical" + ), + ) + end + + return true +end + +""" + _iscomparable(m::GeneralizedLinearMixedModel...) + + +Check whether GLMMs are comparable in terms of their model families and links. +""" +function _iscomparable(m::GeneralizedLinearMixedModel...) + # TODO: test that all models are fit with same fast/nAGQ option? + _samefamily(m...) || throw(ArgumentError("Models must be fit to the same distribution")) + + isconstant(string.(Link.(m))) || + throw(ArgumentError("Models must have the same link function")) + + return true +end + +""" + _iscomparable(m1::TableRegressionModel, m2::MixedModel)_samefamily( + ::GeneralizedLinearMixedModel + +Check whether a TableRegressionModel and a MixedModel have coefficient names indicative of nesting. +""" +function _iscomparable( + m1::TableRegressionModel{<:Union{LinearModel,GeneralizedLinearModel}}, m2::MixedModel +) + # check that the nested fixef are a subset of the outer + all(in.(coefnames(m1), Ref(coefnames(m2)))) || return false + + return true +end + +""" + _samefamily(::GeneralizedLinearMixedModel...) + +Check whether all GLMMS come from the same model family. +""" +function _samefamily( + ::GeneralizedLinearMixedModel{<:AbstractFloat,S}... +) where {S<:Distribution} + # XXX we need the where clause to distinguish from the general method + # but static analysis complains if we don't use the type parameter + return true +end +_samefamily(::GeneralizedLinearMixedModel...) = false + """ _isnested(x::AbstractMatrix, y::AbstractMatrix; atol::Real=0.0) diff --git a/src/mimeshow.jl b/src/mimeshow.jl index 88bc6b451..981a53823 100644 --- a/src/mimeshow.jl +++ b/src/mimeshow.jl @@ -64,19 +64,23 @@ function _markdown(b::BlockDescription) return tbl end -function _markdown(lrt::LikelihoodRatioTest) - Δdf = lrt.tests.dofdiff - Δdev = lrt.tests.deviancediff +function _markdown(lrt::LikelihoodRatioTest{N}) where {N} + Δdf = _diff(lrt.dof) + chisq = abs.(2 .* _diff(lrt.loglikelihood)) + linear = lrt.linear - nr = length(lrt.formulas) + nc = 6 + nr = N + + nr = N outrows = Vector{Vector{String}}(undef, nr + 1) - outrows[1] = ["", "model-dof", "deviance", "χ²", "χ²-dof", "P(>χ²)"] # colnms + outrows[1] = ["", "model-dof", "-2 logLik", "χ²", "χ²-dof", "P(>χ²)"] # colnms outrows[2] = [ string(lrt.formulas[1]), string(lrt.dof[1]), - string(round(Int, lrt.deviance[1])), + string(round(Int, 2 * lrt.loglikelihood[1])), " ", " ", " ", @@ -86,10 +90,10 @@ function _markdown(lrt::LikelihoodRatioTest) outrows[i + 1] = [ string(lrt.formulas[i]), string(lrt.dof[i]), - string(round(Int, lrt.deviance[i])), - string(round(Int, Δdev[i - 1])), + string(round(Int, 2 .* lrt.loglikelihood[i])), + string(round(Int, chisq[i - 1])), string(Δdf[i - 1]), - string(StatsBase.PValue(lrt.pvalues[i - 1])), + string(StatsBase.PValue(lrt.pvalues[i])), ] end diff --git a/src/remat.jl b/src/remat.jl index 3a8b39c95..17379d927 100644 --- a/src/remat.jl +++ b/src/remat.jl @@ -195,7 +195,7 @@ Is the grouping factor for `A` nested in the grouping factor for `B`? That is, does each value of `A` occur with just one value of B? """ -function isnested(A::ReMat, B::ReMat) +function StatsModels.isnested(A::ReMat, B::ReMat) size(A, 1) == size(B, 1) || throw(DimensionMismatch("must have size(A,1) == size(B,1)")) bins = zeros(Int32, nlevs(A)) @inbounds for (a, b) in zip(A.refs, B.refs) diff --git a/test/likelihoodratiotest.jl b/test/likelihoodratiotest.jl index d24f4d378..e6030f42b 100644 --- a/test/likelihoodratiotest.jl +++ b/test/likelihoodratiotest.jl @@ -71,35 +71,28 @@ end lm0 = lm(@formula(reaction ~ 1), slp) lm1 = lm(@formula(reaction ~ 1 + days), slp) - @test MixedModels._iscomparable(lm0, fm1) - @test !MixedModels._iscomparable(lm1, fm0) + @test MixedModels.isnested(lm0, fm1) + @test !MixedModels.isnested(lm1, fm0) lrt = likelihoodratiotest(fm0, fm1) + @test lrtest(fm0, fm1) == lrtest(lrt) - @test [deviance(fm0), deviance(fm1)] == lrt.deviance - @test deviance(fm0) - deviance(fm1) == only(lrt.tests.deviancediff) - @test only(lrt.tests.dofdiff) == 1 - @test sum(length, lrt.tests) == 3 - @test sum(length, lrt.pvalues) == 1 - @test sum(length, lrt.models) == 4 - @test length(lrt.formulae) == 2 - show(IOBuffer(), lrt) - @test :pvalues in propertynames(lrt) - @test only(lrt.pvalues) == pvalue(lrt) + @test (deviance(fm0), deviance(fm1)) == lrt.deviance + @test sprint(show, lrt) == "Likelihood-ratio test: 2 models fitted on 180 observations\nModel Formulae\n1: reaction ~ 1 + (1 + days | subj)\n2: reaction ~ 1 + days + (1 + days | subj)\n────────────────────────────────────────────\n DoF -2 logLik χ² χ²-dof P(>χ²)\n────────────────────────────────────────────\n[1] 5 1775.4759 \n[2] 6 1751.9393 23.5365 1 <1e-05\n────────────────────────────────────────────" + @test last(lrt.pvalues) == pvalue(lrt) lrt = likelihoodratiotest(lm1, fm1) - @test lrt.deviance ≈ likelihoodratiotest(lm1.model, fm1).deviance - @test lrt.dof == [3, 6] - @test lrt.deviance ≈ -2 * loglikelihood.([lm1, fm1]) - shown = sprint(show, lrt) - @test occursin("-2 logLik", shown) - @test !occursin("deviance", shown) + @test pvalue(lrt) ≈ 5.9e-32 atol=1e-16 + + lrt2 = likelihoodratiotest(lm1.model, fm1) + @test first(lrt2.formulas) == "NA" + @test lrtest(lrt2) == lrtest(lrt) lrt = likelihoodratiotest(lm0, fm0, fm1) - @test_throws ArgumentError pvalue(lrt) + @suppress @test_throws ArgumentError pvalue(lrt) # non nested FE between non-mixed and mixed - @test_throws ArgumentError likelihoodratiotest(lm1, fm0) + @suppress @test_throws ArgumentError likelihoodratiotest(lm1, fm0) # mix of REML and ML fm0 = fit( @@ -109,8 +102,8 @@ end REML=true, progress=false, ) - @test_throws ArgumentError likelihoodratiotest(fm0, fm1) - @test_throws ArgumentError likelihoodratiotest(lm0, fm0) + @suppress @test_throws ArgumentError likelihoodratiotest(fm0, fm1) + @suppress @test_throws ArgumentError likelihoodratiotest(lm0, fm0) # differing FE with REML fm1 = fit( @@ -121,18 +114,19 @@ end progress=false, ) - @test_throws ArgumentError likelihoodratiotest(fm0, fm1) + @suppress @test_throws ArgumentError likelihoodratiotest(fm0, fm1) + contra = MixedModels.dataset(:contra) # glm doesn't like categorical responses, so we convert it to numeric ourselves # TODO: upstream fix - cc = DataFrame(contra) + cc = DataFrame(dataset(:contra)) cc.usenum = ifelse.(cc.use .== "Y", 1, 0) gmf = glm(@formula(usenum ~ 1 + age + urban + livch), cc, Bernoulli()) gmf2 = glm(@formula(usenum ~ 1 + age + abs2(age) + urban + livch), cc, Bernoulli()) gm0 = fit( MixedModel, @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), - contra, + dataset(:contra), Bernoulli(); fast=true, progress=false, @@ -140,61 +134,44 @@ end gm1 = fit( MixedModel, @formula(use ~ 1 + age + abs2(age) + urban + livch + (1 | urban & dist)), - contra, + dataset(:contra), Bernoulli(); fast=true, progress=false, ) lrt = likelihoodratiotest(gmf, gm1) - @test [-2 * loglikelihood(gmf), deviance(gm1)] ≈ lrt.deviance - @test -2 * loglikelihood(gmf) - deviance(gm1) ≈ only(lrt.tests.deviancediff) - shown = sprint(show, lrt) - @test !occursin("-2 logLik", shown) - @test occursin("deviance", shown) + @test 2 * only(diff(collect(lrt.loglikelihood))) ≈ 95.0725 atol=0.0001 lrt = likelihoodratiotest(gm0, gm1) - @test [deviance(gm0), deviance(gm1)] == lrt.deviance - @test deviance(gm0) - deviance(gm1) == only(lrt.tests.deviancediff) - @test first(lrt.tests.dofdiff) == 1 - @test sum(length, lrt.tests) == 3 - @test sum(length, lrt.pvalues) == 1 - @test sum(length, lrt.models) == 4 - @test length(lrt.formulae) == 2 + @test 2 * only(diff(collect(lrt.loglikelihood))) ≈ 38.0713 atol=0.0001 # mismatched links gm_probit = fit( MixedModel, @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), - contra, + dataset(:contra), Bernoulli(), ProbitLink(); fast=true, progress=false, ) - @test_throws ArgumentError likelihoodratiotest(gmf, gm_probit) - @test_throws ArgumentError likelihoodratiotest(gm0, gm_probit) + @suppress @test_throws ArgumentError likelihoodratiotest(gmf, gm_probit) + @suppress @test_throws ArgumentError likelihoodratiotest(gm0, gm_probit) # mismatched families gm_poisson = fit( MixedModel, @formula(use ~ 1 + age + urban + livch + (1 | urban & dist)), - contra, + dataset(:contra), Poisson(); fast=true, progress=false, ) - @test_throws ArgumentError likelihoodratiotest(gmf, gm_poisson) - @test_throws ArgumentError likelihoodratiotest(gm0, gm_poisson) - - @test !MixedModels._iscomparable(lm0, gm0) - @test !MixedModels._iscomparable(gmf, fm1) - - @test MixedModels._iscomparable(gmf, gm0) - @test !MixedModels._iscomparable(gmf2, gm0) + @suppress @test_throws ArgumentError likelihoodratiotest(gmf, gm_poisson) + @suppress @test_throws ArgumentError likelihoodratiotest(gm0, gm_poisson) - @test MixedModels._isnested(gmf.mm.m, gm0.X) - @test !MixedModels._isnested(gmf2.mm.m, gm0.X) + @test !MixedModels._samefamily(gm0, gm_poisson) # this skips the linear term so that the model matrices # have the same column rank @test !MixedModels._isnested(gmf2.mm.m[:, Not(2)], gm0.X) diff --git a/test/mime.jl b/test/mime.jl index 7f740dbe9..1798745d8 100644 --- a/test/mime.jl +++ b/test/mime.jl @@ -93,17 +93,12 @@ lrt = likelihoodratiotest(fm0, fm1) end @testset "lrt" begin - @test sprint(show, mime, lrt) in (""" -| | model-dof | deviance | χ² | χ²-dof | P(>χ²) | -|:---------------------------------------- | ---------:| --------:| ---:| ------:|:------ | -| reaction ~ 1 + days + (1 \\| subj) | 4 | 1794 | | | | -| reaction ~ 1 + days + (1 + days \\| subj) | 6 | 1752 | 42 | 2 | <1e-09 | -""", """ - | | model-dof | deviance | χ² | χ²-dof | P(>χ²) | - |:---------------------------------------- | ---------:| --------:| ---:| ------:|:------ | - | reaction ~ 1 + days + (1 \\| subj) | 4 | 1794 | | | | - | reaction ~ 1 + days + (1 + days \\| subj) | 6 | 1752 | 42 | 2 | <1e-9 | - """) + @test sprint(show, mime, lrt) == """ +| | model-dof | -2 logLik | χ² | χ²-dof | P(>χ²) | +|:---------------------------------------- | ---------:| ---------:| ---:| ------:|:------ | +| reaction ~ 1 + days + (1 \\| subj) | 4 | -1794 | | | | +| reaction ~ 1 + days + (1 + days \\| subj) | 6 | -1752 | 42 | 2 | <1e-09 | +""" end @testset "blockdescription" begin @@ -214,7 +209,7 @@ rows & subj & item & fixed \\\\ # not doing the full comparison here because there's a zero-padded exponent # that will render differently on different platforms @test startswith(sprint(show, MIME("text/latex"), lrt), - "\\begin{tabular}\n{l | r | r | r | r | l}\n & model-dof & deviance & \$\\chi^2\$ & \$\\chi^2\$-dof & P(>\$\\chi^2\$) \\\\", + "\\begin{tabular}\n{l | r | r | r | r | l}\n & model-dof & -2 logLik & \$\\chi^2\$ & \$\\chi^2\$-dof & P(>\$\\chi^2\$) \\\\", ) optsum = sprint(show, MIME("text/latex"), fm0.optsum) diff --git a/test/pls.jl b/test/pls.jl index f5a4ba87e..d5d3af870 100644 --- a/test/pls.jl +++ b/test/pls.jl @@ -264,8 +264,7 @@ end @test "Diagonal" in tokens lrt = likelihoodratiotest(models(:pastes)...) - @test length(lrt.deviance) == length(lrt.formulas) == length(lrt.models) == 2 - @test only(lrt.tests.pvalues) ≈ 0.5233767965780878 atol = 0.0001 + @test last(lrt.pvalues) ≈ 0.5233767965780878 atol = 0.0001 @testset "missing variables in formula" begin ae = ArgumentError(