From 9bbe4c33717c45478f9eafe4f57460f555e1b9d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Remi=C5=A1?= Date: Thu, 21 Aug 2025 13:43:50 +0300 Subject: [PATCH 1/4] Added Gustafson-Kessel clustering to genfis.jl --- src/genfis.jl | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/src/genfis.jl b/src/genfis.jl index 30446de..e7f5a9d 100644 --- a/src/genfis.jl +++ b/src/genfis.jl @@ -54,3 +54,104 @@ function fuzzy_cmeans(X::Matrix{T}, N::Int; m = 2.0, maxiter = 100, end return C, U end + + +using LinearAlgebra, Statistics + +""" +Performs fuzzy clustering on the data `X` using `C` clusters. + +### Input + +- `X` -- ``d × N`` matrix of data, each column is a data point +- `C` -- number of clusters used. + +### Keyword arguments + +- `α` -- exponent of the fuzzy membership function, default `2.0` +- `ρ` -- ``1 × C`` vector of normalization factors for cluster sizes, default `ones(C)` +- `maxiter` -- maximum number of iterations, default `100` +- `tol` -- absolute error for stopping condition. Stop if ``|Eₖ - Eₖ₊₁|≤tol``, where ``Eₖ`` + is the cost function value at the ``k``:th iteration. +- `stabilize` -- applies covariance matrix stabilization proposed by Babuska et. al., default `true` +- `γ` -- regularization coefficient, only used when stabilize is true, default `0.1` +- `β` -- condition number threshold, only used when stabilize is true, default `10^15` +### Output + +- `V` -- ``d × N``matrix of centers, each column is the center of a cluster. +- `W` -- ``N × C`` matrix of membership degrees, ``Wᵢⱼ`` tells has the membership degree of + the ``i``th point to the ``j``th cluster. +""" +function gustafson_kessel(X::Matrix{T}, C::Int; α=2.0, ρ = ones(C), + maxiter=100, tol=1e-5, stabilize=true, γ=0.1, β=10^15) where {T <: Real} + + α > 1 || throw(ArgumentError("α must be greater than 1")) + size(ρ, 1) == C || throw(ArgumentError("length of ρ must equal to C")) + + N = size(X, 2) + d = size(X, 1) + W = rand(float(T), N, C) + W ./= sum(W, dims=2) + distances = zeros(N, C) + e = 2 / (α - 1) + J = Inf + P0_det = stabilize ? det(cov(X')) ^(1/d) : 0 + + V = Matrix{Float64}(undef, d, C) + + for iter in 1:maxiter + pow_W = W .^ α + W_sum = sum(pow_W, dims=1) + V .= (X * pow_W) ./ W_sum + for (i, vi) in enumerate(eachcol(V)) + P = zeros(d, d) + for(j, xj) in enumerate(eachcol(X)) + sub = xj - vi + P += pow_W[j, i] .* sub * sub' + end + + P ./= W_sum[1, i] + stabilize && _recalculate_cov_matrix(P, γ, β, P0_det) + A = (ρ[i] * det(P))^(1/d) * inv(P) + + for(j, xj) in enumerate(eachcol(X)) + sub = xj - vi + distances[j, i] = sub' * A * sub + end + end + + J_new = 0 + for (i, di) in enumerate(eachrow(distances)) + idx = findfirst(==(0), di) + + if idx === nothing + for (j, dij) in enumerate(di) + denom = sum((dij ./ di[k]) .^ e for k in 1:C) + W[i, j] = 1/denom + J_new += W[i, j] * dij + end + else + result = zeros(N) + result[idx] = 1 + W[i, :] .= result + end + end + + abs(J - J_new) < tol && break + J = J_new + end + + return V, W +end + +function _recalculate_cov_matrix(P::Matrix{Float64}, γ::Number, + β::Int, P0_det::Float64) + temp_P = (1 - γ) * P + γ * P0_det * I(size(P, 1)) + eig = eigen(Symmetric(temp_P)) + eig_vals = eig.values + max_val = maximum(eig_vals) + + eig_vals[max_val ./ eig_vals .> β] .= max_val / β + + P .= eig.vectors * diagm(eig_vals) * eig.vectors' +end \ No newline at end of file From 22b635e9f5cb904430c17c489f20313dff9d2aad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Remi=C5=A1?= Date: Wed, 27 Aug 2025 15:59:05 +0300 Subject: [PATCH 2/4] Created tests for Gustafson-Kessel --- src/genfis.jl | 3 ++- test/test_genfis.jl | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/genfis.jl b/src/genfis.jl index e7f5a9d..fad0e8c 100644 --- a/src/genfis.jl +++ b/src/genfis.jl @@ -87,6 +87,7 @@ function gustafson_kessel(X::Matrix{T}, C::Int; α=2.0, ρ = ones(C), α > 1 || throw(ArgumentError("α must be greater than 1")) size(ρ, 1) == C || throw(ArgumentError("length of ρ must equal to C")) + γ >= 0 && γ <= 1 || throw(ArgumentError("γ must be within the range [0, 1]")) N = size(X, 2) d = size(X, 1) @@ -99,7 +100,7 @@ function gustafson_kessel(X::Matrix{T}, C::Int; α=2.0, ρ = ones(C), V = Matrix{Float64}(undef, d, C) - for iter in 1:maxiter + @inbounds for iter in 1:maxiter pow_W = W .^ α W_sum = sum(pow_W, dims=1) V .= (X * pow_W) ./ W_sum diff --git a/test/test_genfis.jl b/test/test_genfis.jl index 9838eea..315eb7e 100644 --- a/test/test_genfis.jl +++ b/test/test_genfis.jl @@ -1,4 +1,9 @@ -using FuzzyLogic, Test +using FuzzyLogic, Distributions, Random, Test, LinearAlgebra + +function generate_cluster(μ::Vector{Float64}, σ::Matrix{Float64}, n::Int) + dist = MvNormal(μ, σ) + return rand(dist, n) +end @testset "fuzzy c-means" begin X = [-3 -3 -3 -2 -2 -2 -1 0 1 2 2 2 3 3 3; @@ -7,3 +12,31 @@ using FuzzyLogic, Test @test sortslices(C; dims = 2)≈[-2.02767 2.02767; 0 0] atol=1e-3 @test_throws ArgumentError fuzzy_cmeans(X, 3; m = 1) end + +@testset "Gustafson-Kessel" begin + Random.seed!(5) + μ1 = [-2; 1.0] + μ2 = [6.0; 3] + X1 = generate_cluster(μ1, diagm(fill(0.25, 2)), 200) + + θ = π / 4 + U = [cos(θ) -sin(θ); sin(θ) cos(θ)] + D = Diagonal([5, 0.25]) + + Σ = U * D * U' + + X2 = generate_cluster(fill(0.0, 2), Σ, 300) + X3 = generate_cluster(-μ1, diagm(fill(0.5, 2)), 200) + X4 = generate_cluster(μ2, diagm(fill(0.1, 2)), 200) + + X = hcat(X1, X2, X3, X4) + + Random.seed!() + (C, U) = gustafson_kessel(X, 4; α=2) + + @test sortslices(C, dims=2)≈[-1.98861 -0.08146 2.00787 5.94698; 1.04687 -0.03456 -0.95882 3.02226] atol=1e-3 + @test_throws ArgumentError gustafson_kessel(X, 4; α=1) + @test_throws ArgumentError gustafson_kessel(X, 4; ρ=[1, 1, 1]) + @test_throws ArgumentError gustafson_kessel(X, 4, γ=2) + +end \ No newline at end of file From 5368b936c7e39018b4758931cbcd0faa2ef3f315 Mon Sep 17 00:00:00 2001 From: tomre Date: Fri, 29 Aug 2025 13:53:21 +0300 Subject: [PATCH 3/4] Added LinearAlgebra and Statistics library dependencies --- Project.toml | 4 ++++ docs/src/changelog.md | 1 + src/FuzzyLogic.jl | 2 +- src/genfis.jl | 11 ++++------- test/test_genfis.jl | 2 +- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/Project.toml b/Project.toml index 3ab2cb7..724a4b7 100644 --- a/Project.toml +++ b/Project.toml @@ -7,20 +7,24 @@ version = "0.1.3" Dictionaries = "85a47980-9c8c-11e8-2b9f-f7ca1fa99fb4" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" LightXML = "9c8b4983-aa76-5018-a973-4c85ecc9e179" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" PEG = "12d937ae-5f68-53be-93c9-3a6f997a20a8" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] Aqua = "0.6" Dictionaries = "0.3" DocStringExtensions = "0.9" LightXML = "0.9" +LinearAlgebra = "1.11.0" MacroTools = "0.5" PEG = "1.0" RecipesBase = "1.0" Reexport = "1.0" +Statistics = "1.11.1" julia = "1.6" [extras] diff --git a/docs/src/changelog.md b/docs/src/changelog.md index 61780bb..6dfd8ca 100644 --- a/docs/src/changelog.md +++ b/docs/src/changelog.md @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## Unreleased - ![](https://img.shields.io/badge/BREAKING-red.svg) use a different type parameter for each field of membership functions. This allows membership functions to support mixed-type inputs like `GaussianMF(0, 0.1)`. +- ![](https://img.shields.io/badge/new%20feature-green.svg) added Gustafson-Kessel clustering algorithm. ## v0.1.3 -- 2024-09-18 diff --git a/src/FuzzyLogic.jl b/src/FuzzyLogic.jl index 1538858..2bb6036 100644 --- a/src/FuzzyLogic.jl +++ b/src/FuzzyLogic.jl @@ -1,6 +1,6 @@ module FuzzyLogic -using Dictionaries, Reexport +using Dictionaries, Reexport, LinearAlgebra, Statistics include("docstrings.jl") include("intervals.jl") diff --git a/src/genfis.jl b/src/genfis.jl index fad0e8c..9c063fa 100644 --- a/src/genfis.jl +++ b/src/genfis.jl @@ -55,9 +55,6 @@ function fuzzy_cmeans(X::Matrix{T}, N::Int; m = 2.0, maxiter = 100, return C, U end - -using LinearAlgebra, Statistics - """ Performs fuzzy clustering on the data `X` using `C` clusters. @@ -96,9 +93,9 @@ function gustafson_kessel(X::Matrix{T}, C::Int; α=2.0, ρ = ones(C), distances = zeros(N, C) e = 2 / (α - 1) J = Inf - P0_det = stabilize ? det(cov(X')) ^(1/d) : 0 + P0_det = stabilize ? det(cov(X')) ^(1/d) : zero(float(T)) - V = Matrix{Float64}(undef, d, C) + V = Matrix{float(T)}(undef, d, C) @inbounds for iter in 1:maxiter pow_W = W .^ α @@ -106,7 +103,7 @@ function gustafson_kessel(X::Matrix{T}, C::Int; α=2.0, ρ = ones(C), V .= (X * pow_W) ./ W_sum for (i, vi) in enumerate(eachcol(V)) P = zeros(d, d) - for(j, xj) in enumerate(eachcol(X)) + for (j, xj) in enumerate(eachcol(X)) sub = xj - vi P += pow_W[j, i] .* sub * sub' end @@ -115,7 +112,7 @@ function gustafson_kessel(X::Matrix{T}, C::Int; α=2.0, ρ = ones(C), stabilize && _recalculate_cov_matrix(P, γ, β, P0_det) A = (ρ[i] * det(P))^(1/d) * inv(P) - for(j, xj) in enumerate(eachcol(X)) + for (j, xj) in enumerate(eachcol(X)) sub = xj - vi distances[j, i] = sub' * A * sub end diff --git a/test/test_genfis.jl b/test/test_genfis.jl index 315eb7e..03673f1 100644 --- a/test/test_genfis.jl +++ b/test/test_genfis.jl @@ -1,4 +1,4 @@ -using FuzzyLogic, Distributions, Random, Test, LinearAlgebra +using FuzzyLogic, Distributions, Random, Test function generate_cluster(μ::Vector{Float64}, σ::Matrix{Float64}, n::Int) dist = MvNormal(μ, σ) From 57dcf55d121003a347368d83cb99b928192fd327 Mon Sep 17 00:00:00 2001 From: tomre Date: Mon, 1 Sep 2025 18:26:42 +0300 Subject: [PATCH 4/4] Implemented working unit test for Gustafson-Kessel --- src/FuzzyLogic.jl | 2 +- src/genfis.jl | 11 ++++++----- test/test_genfis.jl | 23 +++++++++++------------ 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/FuzzyLogic.jl b/src/FuzzyLogic.jl index 2bb6036..5cfd177 100644 --- a/src/FuzzyLogic.jl +++ b/src/FuzzyLogic.jl @@ -28,7 +28,7 @@ export DifferenceSigmoidMF, LinearMF, GeneralizedBellMF, GaussianMF, ProductSigm KarnikMendelDefuzzifier, EKMDefuzzifier, IASCDefuzzifier, EIASCDefuzzifier, @mamfis, MamdaniFuzzySystem, @sugfis, SugenoFuzzySystem, set, LinearSugenoOutput, ConstantSugenoOutput, - fuzzy_cmeans, + fuzzy_cmeans, gustafson_kessel, compilefis, readfis diff --git a/src/genfis.jl b/src/genfis.jl index 9c063fa..4b8cf94 100644 --- a/src/genfis.jl +++ b/src/genfis.jl @@ -1,3 +1,4 @@ + """ Performs fuzzy clustering on th data `X` using `N` clusters. @@ -79,7 +80,7 @@ Performs fuzzy clustering on the data `X` using `C` clusters. - `W` -- ``N × C`` matrix of membership degrees, ``Wᵢⱼ`` tells has the membership degree of the ``i``th point to the ``j``th cluster. """ -function gustafson_kessel(X::Matrix{T}, C::Int; α=2.0, ρ = ones(C), +function gustafson_kessel(X::Matrix{T}, C::Int; α=2.0, ρ = ones(C), maxiter=100, tol=1e-5, stabilize=true, γ=0.1, β=10^15) where {T <: Real} α > 1 || throw(ArgumentError("α must be greater than 1")) @@ -94,7 +95,7 @@ function gustafson_kessel(X::Matrix{T}, C::Int; α=2.0, ρ = ones(C), e = 2 / (α - 1) J = Inf P0_det = stabilize ? det(cov(X')) ^(1/d) : zero(float(T)) - + V = Matrix{float(T)}(undef, d, C) @inbounds for iter in 1:maxiter @@ -107,7 +108,7 @@ function gustafson_kessel(X::Matrix{T}, C::Int; α=2.0, ρ = ones(C), sub = xj - vi P += pow_W[j, i] .* sub * sub' end - + P ./= W_sum[1, i] stabilize && _recalculate_cov_matrix(P, γ, β, P0_det) A = (ρ[i] * det(P))^(1/d) * inv(P) @@ -149,7 +150,7 @@ function _recalculate_cov_matrix(P::Matrix{Float64}, γ::Number, eig_vals = eig.values max_val = maximum(eig_vals) - eig_vals[max_val ./ eig_vals .> β] .= max_val / β + eig_vals[max_val ./ eig_vals .> β] .= max_val / β P .= eig.vectors * diagm(eig_vals) * eig.vectors' -end \ No newline at end of file +end diff --git a/test/test_genfis.jl b/test/test_genfis.jl index 03673f1..f56e121 100644 --- a/test/test_genfis.jl +++ b/test/test_genfis.jl @@ -1,8 +1,7 @@ -using FuzzyLogic, Distributions, Random, Test +using FuzzyLogic, Random, LinearAlgebra, Test function generate_cluster(μ::Vector{Float64}, σ::Matrix{Float64}, n::Int) - dist = MvNormal(μ, σ) - return rand(dist, n) + return cholesky(Symmetric(σ)).L * randn(2, n) .+ μ end @testset "fuzzy c-means" begin @@ -22,9 +21,9 @@ end θ = π / 4 U = [cos(θ) -sin(θ); sin(θ) cos(θ)] D = Diagonal([5, 0.25]) - + Σ = U * D * U' - + X2 = generate_cluster(fill(0.0, 2), Σ, 300) X3 = generate_cluster(-μ1, diagm(fill(0.5, 2)), 200) X4 = generate_cluster(μ2, diagm(fill(0.1, 2)), 200) @@ -32,11 +31,11 @@ end X = hcat(X1, X2, X3, X4) Random.seed!() - (C, U) = gustafson_kessel(X, 4; α=2) + (C, U) = gustafson_kessel(X, 4; α = 2) - @test sortslices(C, dims=2)≈[-1.98861 -0.08146 2.00787 5.94698; 1.04687 -0.03456 -0.95882 3.02226] atol=1e-3 - @test_throws ArgumentError gustafson_kessel(X, 4; α=1) - @test_throws ArgumentError gustafson_kessel(X, 4; ρ=[1, 1, 1]) - @test_throws ArgumentError gustafson_kessel(X, 4, γ=2) - -end \ No newline at end of file + @test sortslices( + C, dims = 2)≈[-1.98861 -0.08146 2.00787 5.94698; 1.04687 -0.03456 -0.95882 3.02226] atol=1e-3 + @test_throws ArgumentError gustafson_kessel(X, 4; α = 1) + @test_throws ArgumentError gustafson_kessel(X, 4; ρ = [1, 1, 1]) + @test_throws ArgumentError gustafson_kessel(X, 4, γ = 2) +end