StructuralEquationModels · Maximilian-Stefan-Ernst · May 11, 2025 · Apr 15, 2025 · Apr 16, 2025 · Apr 16, 2025
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "StructuralEquationModels"
 uuid = "383ca8c5-e4ff-4104-b0a9-f7b279deed53"
 authors = ["Maximilian Ernst", "Aaron Peikert"]
-version = "0.4.0"
+version = "0.4.2"
 
 [deps]
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"

diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@
 | [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://structuralequationmodels.github.io/StructuralEquationModels.jl/) [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://structuralequationmodels.github.io/StructuralEquationModels.jl/dev/) | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Github Action CI](https://github.com/StructuralEquationModels/StructuralEquationModels.jl/workflows/CI_extended/badge.svg)](https://github.com/StructuralEquationModels/StructuralEquationModels.jl/actions/) [![codecov](https://codecov.io/gh/StructuralEquationModels/StructuralEquationModels.jl/branch/main/graph/badge.svg?token=P2kjzpvM4V)](https://codecov.io/gh/StructuralEquationModels/StructuralEquationModels.jl) | [![DOI](https://zenodo.org/badge/228649704.svg)](https://zenodo.org/badge/latestdoi/228649704) |
 
 > [!NOTE]  
-> Check out our [preprint](https://formal-methods-mpi.github.io/pkgmanuscript/manuscript.pdf) on the package!
+> Check out our [preprint](https://doi.org/10.31234/osf.io/zwe8g_v1) on the package!
 
 # What is this Package for?
 
@@ -39,7 +39,7 @@ The package makes use of
 - SparseArrays.jl to speed up symbolic computations.
 - Optim.jl and NLopt.jl to provide a range of different Optimizers/Linesearches.
 - ProximalAlgorithms.jl for regularization.
-- FiniteDiff.jl and ForwardDiff.jl to provide gradients for user-defined loss functions.
+- FiniteDiff.jl and to provide gradient approximations for user-defined loss functions.
 
 # At the moment, we are still working on:
 - optimizing performance for big models (with hundreds of parameters)

diff --git a/docs/make.jl b/docs/make.jl
@@ -52,7 +52,6 @@ makedocs(
             "files" => "internals/files.md",
             "types" => "internals/types.md",
         ],
-        "Complementary material" => ["Mathematical appendix" => "complementary/maths.md"],
     ],
     format = Documenter.HTML(
         prettyurls = get(ENV, "CI", nothing) == "true",

diff --git a/docs/src/developer/implied.md b/docs/src/developer/implied.md
@@ -62,11 +62,8 @@ model per group and an additional model with `ImpliedEmpty` and `SemRidge` for t
 # Extended help
 
 ## Interfaces
-- `params(::RAMSymbolic) `-> Vector of parameter labels
-- `nparams(::RAMSymbolic)` -> Number of parameters
-
-## Implementation
-Subtype of `SemImplied`.
+- `param_labels(::ImpliedEmpty) `-> Vector of parameter labels
+- `nparams(::ImpliedEmpty)` -> Number of parameters
 """
 struct ImpliedEmpty{A, B, C} <: SemImplied
     hessianeval::A
@@ -78,7 +75,12 @@ end
 ### Constructors
 ############################################################################################
 
-function ImpliedEmpty(;specification, meanstruct = NoMeanStruct(), hessianeval = ExactHessian(), kwargs...)
+function ImpliedEmpty(;
+    specification,
+    meanstruct = NoMeanStruct(),
+    hessianeval = ExactHessian(),
+    kwargs...,
+)
     return ImpliedEmpty(hessianeval, meanstruct, convert(RAMMatrices, specification))
 end
 

diff --git a/docs/src/developer/loss.md b/docs/src/developer/loss.md
@@ -204,7 +204,7 @@ julia>?
 
 help?> RAM
 
-help?> SemObservedCommon
+help?> SemObservedData
 ```
 
 We see that the model implied covariance matrix can be assessed as `Σ(implied)` and the observed covariance matrix as `obs_cov(observed)`.

diff --git a/docs/src/developer/observed.md b/docs/src/developer/observed.md
@@ -28,8 +28,8 @@ nsamples(observed::MyObserved) = ...
 nobserved_vars(observed::MyObserved) = ...
 ```
 
-As always, you can add additional methods for properties that implied types and loss function want to access, for example (from the `SemObservedCommon` implementation):
+As always, you can add additional methods for properties that implied types and loss function want to access, for example (from the `SemObservedData` implementation):
 
 ```julia
-obs_cov(observed::SemObservedCommon) = observed.obs_cov
+obs_cov(observed::SemObservedData) = observed.obs_cov
 ```
diff --git a/docs/src/developer/optimizer.md b/docs/src/developer/optimizer.md
@@ -16,7 +16,7 @@ SemOptimizer{:Name}(args...; kwargs...) = SemOptimizerName(args...; kwargs...)
 
 SemOptimizerName(;
     algorithm = LBFGS(),
-    options = Optim.Options(; f_tol = 1e-10, x_tol = 1.5e-8),
+    options = Optim.Options(; f_reltol = 1e-10, x_abstol = 1.5e-8),
     kwargs...,
 ) = SemOptimizerName(algorithm, options)
 
@@ -37,13 +37,13 @@ options(optimizer::SemOptimizerName) = optimizer.options
 Note that your optimizer is a subtype of `SemOptimizer{:Name}`, where you can choose a `:Name` that can later be used as a keyword argument to `fit(engine = :Name)`.
 Similarly, `SemOptimizer{:Name}(args...; kwargs...) = SemOptimizerName(args...; kwargs...)` should be defined as well as a constructor that uses only keyword arguments:
 
-´´´julia
+```julia
 SemOptimizerName(;
     algorithm = LBFGS(),
-    options = Optim.Options(; f_tol = 1e-10, x_tol = 1.5e-8),
+    options = Optim.Options(; f_reltol = 1e-10, x_abstol = 1.5e-8),
     kwargs...,
 ) = SemOptimizerName(algorithm, options)
-´´´
+```
 A method for `update_observed` and additional methods might be usefull, but are not necessary.
 
 Now comes the substantive part: We need to provide a method for `fit`:

diff --git a/docs/src/internals/files.md b/docs/src/internals/files.md
@@ -10,7 +10,7 @@ Source code is in the `"src"` folder:
 - `"StructuralEquationModels.jl"` defines the module and the exported objects
 - `"types.jl"` defines all abstract types and the basic type hierarchy
 - `"objective_gradient_hessian.jl"` contains methods for computing objective, gradient and hessian values for different model types as well as generic fallback methods
-- The four folders `"observed"`, `"implied"`, `"loss"` and `"diff"` contain implementations of specific subtypes (for example, the `"loss"` folder contains a file `"ML.jl"` that implements the `SemML` loss function).
+- The folders `"observed"`, `"implied"`, and `"loss"` contain implementations of specific subtypes (for example, the `"loss"` folder contains a file `"ML.jl"` that implements the `SemML` loss function).
 - `"optimizer"` contains connections to different optimization backends (aka methods for `fit`)
     - `"optim.jl"`: connection to the `Optim.jl` package
 - `"frontend"` contains user-facing functions

diff --git a/docs/src/internals/internals.md b/docs/src/internals/internals.md
@@ -1,3 +1,3 @@
 # Internals and Design
 
-On the following pages, we document the internals and design of the package. Those informations are no prerequisite for extending the package (as decribed in the developer documentation)!, but they may be useful and hopefully interesting.
+On the following pages, we document some technical information about the package. Those informations are no prerequisite for extending the package (as decribed in the developer documentation)!, but they may be useful.
diff --git a/docs/src/performance/simulation.md b/docs/src/performance/simulation.md
@@ -1,9 +1,5 @@
 # Simulation studies
 
-!!! note "Simulation study interface"
-    We are currently working on an interface for simulation studies.
-    Until we are finished with this, this page is just a collection of tips.
-
 ## Replace observed data
 In simulation studies, a common task is fitting the same model to many different datasets.
 It would be a waste of resources to reconstruct the complete model for each dataset.

diff --git a/docs/src/tutorials/backends/nlopt.md b/docs/src/tutorials/backends/nlopt.md
@@ -1,7 +1,7 @@
 # Using NLopt.jl
 
 [`SemOptimizerNLopt`](@ref) implements the connection to `NLopt.jl`.
-It is only available if the `NLopt` package is loaded alongside `StructuralEquationModel.jl` in the running Julia session.
+It is only available if the `NLopt` package is loaded alongside `StructuralEquationModels.jl` in the running Julia session.
 It takes a bunch of arguments:
 
 ```julia

diff --git a/docs/src/tutorials/construction/build_by_parts.md b/docs/src/tutorials/construction/build_by_parts.md
@@ -47,21 +47,21 @@ partable = ParameterTable(
 Now, we construct the different parts:
 
 ```@example build
-# observed ---------------------------------------------------------------------------------
+# observed -----------------------------------------------------------------------------
 observed = SemObservedData(specification = partable, data = data)
 
-# implied ------------------------------------------------------------------------------------
+# implied ------------------------------------------------------------------------------
 implied_ram = RAM(specification = partable)
 
-# loss -------------------------------------------------------------------------------------
+# loss ---------------------------------------------------------------------------------
 ml = SemML(observed = observed)
 
 loss_ml = SemLoss(ml)
 
-# optimizer -------------------------------------------------------------------------------------
+# optimizer ----------------------------------------------------------------------------
 optimizer = SemOptimizerOptim()
 
-# model ------------------------------------------------------------------------------------
+# model --------------------------------------------------------------------------------
 
 model_ml = Sem(observed, implied_ram, loss_ml)
 

diff --git a/docs/src/tutorials/construction/outer_constructor.md b/docs/src/tutorials/construction/outer_constructor.md
@@ -14,9 +14,8 @@ Structural Equation Model
 - Loss Functions
    SemML
 - Fields
-   observed:  SemObservedCommon
-   implied:     RAM
-   optimizer: SemOptimizerOptim
+   observed:  SemObservedData
+   implied:   RAM
 ```
 
 The output of this call tells you exactly what model you just constructed (i.e. what the loss functions, observed, implied and optimizer parts are).

diff --git a/docs/src/tutorials/regularization/regularization.md b/docs/src/tutorials/regularization/regularization.md
@@ -30,13 +30,12 @@ It can be used as
 ```julia
 SemOptimizerProximal(
     algorithm = ProximalAlgorithms.PANOC(),
-    options = Dict{Symbol, Any}(),
     operator_g,
     operator_h = nothing
     )
 ```
 
-The proximal operator (aka the regularization function) can be passed as `operator_g`, available options are listed [here](https://juliafirstorder.github.io/ProximalOperators.jl/stable/functions/).
+The proximal operator (aka the regularization function) can be passed as `operator_g`.
 The available Algorithms are listed [here](https://juliafirstorder.github.io/ProximalAlgorithms.jl/stable/guide/implemented_algorithms/).
 
 ## First example - lasso

diff --git a/src/additional_functions/helper.jl b/src/additional_functions/helper.jl
@@ -14,84 +14,29 @@ function neumann_series(mat::SparseMatrixCSC; maxiter::Integer = size(mat, 1))
     return inverse
 end
 
-#=
-function make_onelement_array(A)
-    isa(A, Array) ? nothing : (A = [A])
-    return A
-end
- =#
-
-function semvec(observed, implied, loss, optimizer)
-    observed = make_onelement_array(observed)
-    implied = make_onelement_array(implied)
-    loss = make_onelement_array(loss)
-    optimizer = make_onelement_array(optimizer)
-
-    #sem_vec = Array{AbstractSem}(undef, maximum(length.([observed, implied, loss, optimizer])))
-    sem_vec = Sem.(observed, implied, loss, optimizer)
-
-    return sem_vec
-end
-
-skipmissing_mean(mat::AbstractMatrix) =
-    [mean(skipmissing(coldata)) for coldata in eachcol(mat)]
-
-function F_one_person(imp_mean, meandiff, inverse, data, logdet)
-    F = logdet
-    @. meandiff = data - imp_mean
-    F += dot(meandiff, inverse, meandiff)
-    return F
-end
-
-function remove_all_missing(data::AbstractMatrix)
-    keep = Vector{Int64}()
-    for (i, coldata) in zip(axes(data, 1), eachrow(data))
-        if any(!ismissing, coldata)
-            push!(keep, i)
-        end
-    end
-    return data[keep, :], keep
-end
-
 function batch_inv!(fun, model)
     for i in 1:size(fun.inverses, 1)
         fun.inverses[i] .= LinearAlgebra.inv!(fun.choleskys[i])
     end
 end
 
-#=
-function batch_sym_inv_update!(fun::Union{LossFunction, DiffFunction}, model)
-    M_inv = inv(fun.choleskys[1])
-    for i = 1:size(fun.inverses, 1)
-        if size(model.observed.patterns_not[i]) == 0
-            fun.inverses[i] .= M_inv
-        else
-            ind_not = model.observed.patterns_not[i]
-            ind = model.observed.patterns[i]
-
-            A = M_inv[ind_not, ind]
-            H = cholesky(M_inv[ind_not, ind_not])
-            D = H \ A
-            out = M_inv[ind, ind] - LinearAlgebra.BLAS.gemm('T', 'N', 1.0, A, D)
-            fun.inverses[i] .= out
-        end
-    end
-end =#
-
-function sparse_outer_mul!(C, A, B, ind) #computes A*S*B -> C, where ind gives the entries of S that are 1
+# computes A*S*B -> C, where ind gives the entries of S that are 1
+function sparse_outer_mul!(C, A, B, ind) 
     fill!(C, 0.0)
     for i in 1:length(ind)
         BLAS.ger!(1.0, A[:, ind[i][1]], B[ind[i][2], :], C)
     end
 end
 
-function sparse_outer_mul!(C, A, ind) #computes A*∇m, where ∇m ind gives the entries of ∇m that are 1
+# computes A*∇m, where ∇m ind gives the entries of ∇m that are 1
+function sparse_outer_mul!(C, A, ind) 
     fill!(C, 0.0)
     @views C .= sum(A[:, ind], dims = 2)
     return C
 end
 
-function sparse_outer_mul!(C, A, B::Vector, ind) #computes A*S*B -> C, where ind gives the entries of S that are 1
+# computes A*S*B -> C, where ind gives the entries of S that are 1
+function sparse_outer_mul!(C, A, B::Vector, ind) 
     fill!(C, 0.0)
     @views @inbounds for i in 1:length(ind)
         C .+= B[ind[i][2]] .* A[:, ind[i][1]]

diff --git a/src/additional_functions/simulation.jl b/src/additional_functions/simulation.jl
@@ -7,7 +7,7 @@ Return a new model with swaped observed part.
 
 # Arguments
 - `model::AbstractSemSingle`: model to swap the observed part of.
-- `kwargs`: additional keyword arguments; typically includes `data = ...`
+- `kwargs`: additional keyword arguments; typically includes `data` and `specification`
 - `observed`: Either an object of subtype of `SemObserved` or a subtype of `SemObserved`
 
 # Examples

diff --git a/src/additional_functions/start_val/start_fabin3.jl b/src/additional_functions/start_val/start_fabin3.jl
@@ -18,7 +18,7 @@ end
 # SemObservedMissing
 function start_fabin3(observed::SemObservedMissing, implied, args...; kwargs...)
     if !observed.em_model.fitted
-        em_mvn(observed; kwargs...)
+        em_mvn!(observed; kwargs...)
     end
 
     return start_fabin3(implied.ram_matrices, observed.em_model.Σ, observed.em_model.μ)
@@ -45,24 +45,6 @@ function start_fabin3(
     )
     @assert length(F_var2obs) == size(F, 1)
 
-    # check in which matrix each parameter appears
-
-    #=     in_S = length.(S_ind) .!= 0
-        in_A = length.(A_ind) .!= 0
-        A_ind_c = [linear2cartesian(ind, (n_var, n_var)) for ind in A_ind]
-        in_Λ = [any(ind[2] .∈ F_ind) for ind in A_ind_c]
-
-        if !isnothing(M)
-            in_M = length.(M_ind) .!= 0
-            in_any = in_A .| in_S .| in_M
-        else
-            in_any = in_A .| in_S
-        end
-
-        if !all(in_any)
-            @warn "Could not determine fabin3 starting values for some parameters, default to 0."
-        end =#
-
     # set undirected parameters in S
     S_indices = CartesianIndices(S)
     for j in 1:nparams(S)
@@ -79,7 +61,6 @@ function start_fabin3(
 
     # set loadings
     A_indices = CartesianIndices(A)
-    # ind_Λ = findall([is_in_Λ(ind_vec, F_ind) for ind_vec in A_ind_c])
 
     # collect latent variable indicators in A
     # maps latent parameter to the vector of dependent vars

diff --git a/src/additional_functions/start_val/start_simple.jl b/src/additional_functions/start_val/start_simple.jl
@@ -63,7 +63,6 @@ function start_simple(
     nparams(ram_matrices)
 
     start_val = zeros(n_par)
-    n_obs = nobserved_vars(ram_matrices)
     n_var = nvars(ram_matrices)
 
     C_indices = CartesianIndices((n_var, n_var))

diff --git a/src/frontend/common.jl b/src/frontend/common.jl
@@ -1,7 +1,7 @@
 # API methods supported by multiple SEM.jl types
 
 """
-    params(semobj) -> Vector{Symbol}
+    params(partable::ParameterTable) -> Vector{Symbol}
 
 Return the vector of SEM model parameter identifiers.
 """
@@ -42,7 +42,7 @@ nlatent_vars(semobj) = length(latent_vars(semobj))
 """
     param_indices(semobj)
 
-Returns a dict of parameter names and their indices in `semobj`.
+Returns a dict of parameter labels and their indices in `semobj`.
 
 # Examples
 ```julia

diff --git a/src/frontend/fit/fitmeasures/fit_measures.jl b/src/frontend/fit/fitmeasures/fit_measures.jl
@@ -14,6 +14,6 @@ end
 """
     fit_measures(sem_fit, args...)
 
-Return a default set of fit measures or the fit measures passed as `arg...`.
+Return a default set of fit measures or the fit measures passed as `args...`.
 """
 function fit_measures end
diff --git a/src/frontend/fit/fitmeasures/minus2ll.jl b/src/frontend/fit/fitmeasures/minus2ll.jl
@@ -41,7 +41,7 @@ end
 function minus2ll(observed::SemObservedMissing)
     # fit EM-based mean and cov if not yet fitted
     # FIXME EM could be very computationally expensive
-    observed.em_model.fitted || em_mvn(observed)
+    observed.em_model.fitted || em_mvn!(observed)
 
     Σ = observed.em_model.Σ
     μ = observed.em_model.μ