diff --git a/Project.toml b/Project.toml index c0635a1e4..eaa3bba60 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SpeciesDistributionToolkit" uuid = "72b53823-5c0b-4575-ad0e-8e97227ad13b" authors = ["Timothée Poisot "] -version = "0.0.8" +version = "0.0.9" [deps] ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" @@ -10,11 +10,13 @@ Fauxcurrences = "a2d61402-033a-4ca9-aef4-652d70cf7c9c" GBIF = "ee291a33-5a6c-5552-a3c8-0f29a1181037" GDAL = "add2ef01-049f-52c4-9ee2-e494f65e021a" MakieCore = "20f20a25-4f0e-4fdf-b5d1-57303727442b" +MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" Phylopic = "c889285c-44aa-4473-b1e1-56f5d4e3ccf5" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SimpleSDMDatasets = "2c7d61d0-5c73-410d-85b2-d2e7fbbdcefa" SimpleSDMLayers = "2c645270-77db-11e9-22c3-0f302a89c64c" +StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] diff --git a/src/SpeciesDistributionToolkit.jl b/src/SpeciesDistributionToolkit.jl index e60e979c1..ae387a1b0 100644 --- a/src/SpeciesDistributionToolkit.jl +++ b/src/SpeciesDistributionToolkit.jl @@ -12,6 +12,9 @@ using MakieCore import StatsBase import OffsetArrays +import MultivariateStats +import StatsAPI + # We make ample use of re-export using Reexport @@ -34,6 +37,9 @@ include("integrations/gbif_phylopic.jl") # Plotting include("integrations/makie.jl") +# MultivariateStats +include("integrations/multivariatestats.jl") + # Functions for IO include("io/geotiff.jl") include("io/ascii.jl") diff --git a/src/integrations/multivariatestats.jl b/src/integrations/multivariatestats.jl new file mode 100644 index 000000000..5b9446379 --- /dev/null +++ b/src/integrations/multivariatestats.jl @@ -0,0 +1,17 @@ +function _partial_array_from_layers(layers::Vector{T}; samples::Int=10_000) where {T <: SimpleSDMLayers.SimpleSDMLayer} + all_keys = unique(vcat(keys.(layers)...)) + samples = max(length(all_keys), samples) + selected_keys = StatsBase.sample(all_keys, samples; replace=false) + X = Vector(Float64, (length(selected_keys), length(layers))) + for i in axes(layers, 1) + for j in axes(selected_keys, 1) + X[j,i] = layers[i][selected_keys[j]] + end + end + return X +end + +function StatsAPI.fit(MultivariateStats.Whitening, X::Vector{T}; samples::Int=10_000, kwargs...) where {T <: SimpleSDMLayers.SimpleSDMLayer} + Y = SpeciesDistributionToolkit._partial_array_from_layers(X; samples=samples) + return StatsAPI.fit(MultivariateStats.Whitening, Y; kwargs...) +end \ No newline at end of file