Skip to content

Commit 1121733

Browse files
authored
Add smoothing cubic splines (#103)
* Implement smoothing cubic splines * Add an example * Mention smoothing splines in README * Add tests * Formatting [skip ci] * Update comment [skip ci]
1 parent e86b3c5 commit 1121733

File tree

9 files changed

+239
-6
lines changed

9 files changed

+239
-6
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ This package provides:
1717

1818
- evaluation of splines and their derivatives and integrals;
1919

20-
- spline interpolations and function approximation;
20+
- spline interpolations, smoothing splines and function approximation;
2121

2222
- basis recombination, for generating bases satisfying homogeneous boundary
2323
conditions using linear combinations of B-splines.

docs/Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1111

1212
[compat]
1313
Documenter = "1"
14-
Makie = "0.21"
14+
Makie = "0.22"
1515

1616
[extras]
1717
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"

docs/src/index.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ This package provides:
1212
- evaluation of [splines](@ref Splines-api) and their derivatives and
1313
integrals;
1414

15-
- [spline interpolations](@ref interpolation-api) and
16-
[function approximation](@ref function-approximation-api);
15+
- [spline interpolations](@ref interpolation-example), [smoothing splines](@ref smoothing-example) and
16+
[function approximation](@ref function-approximation-example);
1717

1818
- [basis recombination](@ref basis-recombination-api), for generating bases
1919
satisfying homogeneous boundary conditions using linear combinations of

docs/src/interpolation.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
CurrentModule = BSplineKit.SplineInterpolations
55
```
66

7-
High-level interpolation of gridded data using B-splines.
7+
High-level interpolation and fitting of gridded data using B-splines.
88

99
## Functions
1010

1111
```@docs
1212
interpolate
1313
interpolate!
14+
fit
1415
```
1516

1617
## Types

examples/interpolation.jl

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,58 @@ current_figure() # hide
8686
# Clearly, the spurious oscillations are strongly suppressed near the
8787
# boundaries.
8888

89+
# ## [Smoothing cubic splines](@id smoothing-example)
90+
#
91+
# One can use [smoothing splines](https://en.wikipedia.org/wiki/Smoothing_spline)
92+
# to fit noisy data.
93+
# A smoothing spline is a curve which passes close to the input data, while avoiding strong
94+
# fluctuations due to possible noise.
95+
# The smoothign strength is controlled by a regularisation parameter ``λ``.
96+
# Setting ``λ = 0`` corresponds to a regular interpolation (the obtained spline passes
97+
# through all the points), while increasing ``λ`` leads to a smoother curve which roughly
98+
# approximates the data.
99+
#
100+
# Given a set of data points ``(x_i, y_i)``, the idea is to construct a spline ``S(x)`` that
101+
# minimises:
102+
#
103+
# ```math
104+
# ∑_{i = 1}^N w_i |y_i - S(x_i)|^2 + λ ∫_{x_1}^{x_N} \left[ S''(x) \right]^2 \, \mathrm{d}x
105+
# ```
106+
#
107+
# Here ``w_i`` are optional weights that may be used to give "priority" to certain data
108+
# points.
109+
#
110+
# Note that only cubic splines (order ``k = 4``) are currently supported.
111+
112+
rng = MersenneTwister(42)
113+
Ndata = 20
114+
xs = sort!(rand(rng, Ndata))
115+
xs[begin] = 0; xs[end] = 1; # not strictly necessary; just to set the data limits
116+
ys = cospi.(2 .* xs) .+ 0.04 .* randn(rng, Ndata);
117+
118+
# Create smoothing spline from data:
119+
120+
λ = 1e-3
121+
S_fit = fit(xs, ys, λ)
122+
123+
# If we want the spline to pass very near a single data point, we can assign a
124+
# larger weight to that point:
125+
126+
weights = fill!(similar(xs), 1)
127+
weights[12] = 100 # larger weight to point i = 12
128+
S_fit_weight = fit(xs, ys, λ; weights)
129+
130+
# Plot results and compare with natural cubic spline interpolation:
131+
132+
S_interp = interpolate(xs, ys, BSplineOrder(4), Natural())
133+
134+
scatter(xs, ys; label = "Data", color = :black)
135+
lines!(0..1, S_interp; label = "Interpolation", linewidth = 2)
136+
lines!(0..1, S_fit; label = "Fit (λ = )", linewidth = 2)
137+
lines!(0..1, S_fit_weight; label = "Fit (λ = ) with weight", linestyle = :dash, linewidth = 2)
138+
axislegend(position = (0.5, 1))
139+
current_figure() # hide
140+
89141
# ## [Extrapolations](@id extrapolation-example)
90142
#
91143
# One can use extrapolation to evaluate splines outside of their domain of definition.

src/SplineInterpolations/SplineInterpolations.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module SplineInterpolations
33
using ..BSplines
44
using ..Collocation
55
using ..Collocation: CyclicTridiagonalMatrix, IdentityMatrix
6+
using ..DifferentialOps: Derivative
67
using ..Splines
78

89
using BandedMatrices
@@ -21,7 +22,9 @@ using ..Recombinations:
2122
using ..BoundaryConditions
2223

2324
export
24-
SplineInterpolation, spline, interpolate, interpolate!
25+
SplineInterpolation, spline, interpolate, interpolate!, fit
26+
27+
include("smoothing.jl")
2528

2629
"""
2730
SplineInterpolation

src/SplineInterpolations/smoothing.jl

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
@doc raw"""
2+
fit(xs, ys, λ::Real, [BSplineOrder(4)]; [weights = nothing])
3+
4+
Fit a cubic smoothing spline to the given data.
5+
6+
Returns a natural cubic spline which roughly passes through the data (points `(xs[i], ys[i])`)
7+
given some smoothing parameter ``λ``.
8+
Note that ``λ = 0`` means no smoothing and the results are equivalent to
9+
all the data.
10+
11+
One can optionally pass a `weights` vector if one desires to give different weights to
12+
different data points (e.g. if one wants the curve to pass as close as possible to a
13+
specific point). By default all weights are ``w_i = 1``.
14+
15+
More precisely, the returned spline ``S(x)`` minimises:
16+
17+
```math
18+
∑_{i = 1}^N w_i |y_i - S(x_i)|^2 + λ ∫_{x_1}^{x_N} \left[ S''(x) \right]^2 \, \mathrm{d}x
19+
```
20+
21+
Only cubic splines (`BSplineOrder(4)`) are currently supported.
22+
23+
# Examples
24+
25+
```jldoctest smoothing_spline
26+
julia> xs = (0:0.01:1).^2;
27+
28+
julia> ys = @. cospi(2 * xs) + 0.1 * sinpi(200 * xs); # smooth + highly fluctuating components
29+
30+
julia> λ = 0.001; # smoothing parameter
31+
32+
julia> S = fit(xs, ys, λ)
33+
101-element Spline{Float64}:
34+
basis: 101-element RecombinedBSplineBasis of order 4, domain [0.0, 1.0], BCs {left => (D{2},), right => (D{2},)}
35+
order: 4
36+
knots: [0.0, 0.0, 0.0, 0.0, 0.0001, 0.0004, 0.0009, 0.0016, 0.0025, 0.0036 … 0.8836, 0.9025, 0.9216, 0.9409, 0.9604, 0.9801, 1.0, 1.0, 1.0, 1.0]
37+
coefficients: [0.946872, 0.631018, 1.05101, 1.04986, 1.04825, 1.04618, 1.04366, 1.04067, 1.03722, 1.03331 … 0.437844, 0.534546, 0.627651, 0.716043, 0.798813, 0.875733, 0.947428, 1.01524, 0.721199, 0.954231]
38+
```
39+
"""
40+
function fit end
41+
42+
function fit(
43+
xs::AbstractVector, ys::AbstractVector, λ::Real, order::BSplineOrder{4};
44+
weights::Union{Nothing, AbstractVector} = nothing,
45+
)
46+
λ 0 || throw(DomainError(λ, "the smoothing parameter λ must be non-negative"))
47+
eachindex(xs) == eachindex(ys) || throw(DimensionMismatch("x and y vectors must have the same length"))
48+
N = length(xs)
49+
cs = similar(xs)
50+
51+
T = eltype(cs)
52+
53+
# Create natural cubic B-spline basis with knots = input points
54+
B = BSplineBasis(order, copy(xs))
55+
R = RecombinedBSplineBasis(B, Natural())
56+
57+
# Compute collocation matrices for derivatives 0 and 2
58+
A = BandedMatrix{T}(undef, (N, N), (1, 1)) # 3 bands are enough
59+
D = similar(A)
60+
collocation_matrix!(A, R, xs, Derivative(0))
61+
collocation_matrix!(D, R, xs, Derivative(2))
62+
63+
# Matrix containing grid steps (banded + symmetric, so we don't compute the lower part)
64+
Δ_upper = BandedMatrix{T}(undef, (N, N), (0, 1))
65+
fill!(Δ_upper, 0)
66+
@inbounds for i axes(Δ_upper, 1)[2:end-1]
67+
# Δ_upper[i, i - 1] = xs[i] - xs[i - 1] # this one is obtained by symmetry
68+
Δ_upper[i, i] = 2 * (xs[i + 1] - xs[i - 1])
69+
Δ_upper[i, i + 1] = xs[i + 1] - xs[i]
70+
end
71+
Δ = Hermitian(Δ_upper) # symmetric matrix with 3 bands
72+
73+
# The integral of the squared second derivative is (H * cs) ⋅ (D * cs) / 6.
74+
H = Δ * D
75+
76+
# Directly construct LHS matrix
77+
# M = Hermitian((H'D + D'H) * (λ / 6) + 2 * A' * W * A) # usually positive definite
78+
79+
# Construct LHS matrix trying to reduce computations
80+
B = H' * D # this matrix has 5 bands
81+
buf_1 = (B .+ B') .*/ 6) # = (H'D + D'H) * (λ / 6)
82+
buf_2 = if weights === nothing
83+
A' * A
84+
else
85+
A' * Diagonal(weights) * A # = A' * W * A
86+
end
87+
88+
@. buf_1 = buf_1 + 2 * buf_2 # (H'D + D'H) * (λ / 6) + 2 * A' * W * A
89+
M = Hermitian(buf_1) # the matrix is actually symmetric (usually positive definite)
90+
F = cholesky!(M) # factorise matrix (assuming posdef)
91+
92+
# Directly construct RHS
93+
# z = 2 * A' * (W * ys) # RHS
94+
# ldiv!(cs, F, z)
95+
96+
# Construct RHS trying to reduce allocations
97+
zs = copy(ys)
98+
if weights !== nothing
99+
eachindex(weights) == eachindex(xs) || throw(DimensionMismatch("the `weights` vector must have the same length as the data"))
100+
lmul!(Diagonal(weights), zs) # zs = W * ys
101+
end
102+
mul!(cs, A', zs) # cs = A' * (W * ys)
103+
lmul!(2, cs) # cs = 2 * A' * (W * ys)
104+
105+
ldiv!(F, cs) # solve linear system
106+
107+
Spline(R, cs)
108+
end
109+
110+
fit(x, y, λ; kws...) = fit(x, y, λ, BSplineOrder(4); kws...)

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ include("splines.jl")
2828
include("periodic.jl")
2929
include("natural.jl")
3030
include("interpolation.jl")
31+
include("smoothing.jl")
3132
include("extrapolation.jl")
3233
include("approximation.jl")
3334
include("recombination.jl")

test/smoothing.jl

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Test smoothing splines
2+
3+
using BSplineKit
4+
using QuadGK: quadgk
5+
using Test
6+
7+
# Returns the integral of |S''(x)| (the "curvature") over the whole spline.
8+
function total_curvature(S::Spline)
9+
ts = knots(S)
10+
k = order(S)
11+
xs = @view ts[(begin - 1 + k):(end + 1 - k)] # exclude repeated knots at the boundaries
12+
@assert length(xs) == length(S)
13+
S″ = Derivative(2) * S
14+
curv, err = quadgk(xs) do x
15+
abs2(S″(x))
16+
end
17+
curv
18+
end
19+
20+
function distance_from_data(S::Spline, xs, ys)
21+
dist = zero(eltype(ys))
22+
for i in eachindex(xs, ys)
23+
dist += abs2(ys[i] - S(xs[i]))
24+
end
25+
sqrt(dist / length(xs))
26+
end
27+
28+
@testset "Smoothing cubic splines" begin
29+
xs = (0:0.01:1).^2
30+
ys = @. cospi(2 * xs) + 0.1 * sinpi(200 * xs) # smooth + highly fluctuating components
31+
32+
# Check that smoothing with λ = 0 is equivalent to interpolation
33+
@testset "Fit λ = 0" begin
34+
λ = 0
35+
S = @inferred fit(xs, ys, λ)
36+
S_interp = spline(interpolate(xs, ys, BSplineOrder(4), Natural()))
37+
@test coefficients(S) coefficients(S_interp)
38+
end
39+
40+
@testset "Smoothing" begin
41+
λs = [0.0, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2]
42+
curvatures = similar(λs)
43+
distances = similar(λs)
44+
for i in eachindex(λs)
45+
S = @inferred fit(xs, ys, λs[i])
46+
curvatures[i] = total_curvature(S)
47+
distances[i] = distance_from_data(S, xs, ys)
48+
end
49+
@test issorted(curvatures; rev = true) # in decreasing order (small λ => large curvature)
50+
@test issorted(distances) # in increasing order (large λ => large distance from data)
51+
end
52+
53+
@testset "Weights" begin
54+
λ = 1e-3
55+
weights = fill!(similar(xs), 1)
56+
S = fit(xs, ys, λ)
57+
Sw = @inferred fit(xs, ys, λ; weights) # equivalent to the default (all weights are 1)
58+
@test coefficients(S) == coefficients(Sw)
59+
# Now give more weight to point i = 3
60+
weights[3] = 1000
61+
Sw = fit(xs, ys, λ; weights)
62+
@test abs(Sw(xs[3]) - ys[3]) < abs(S(xs[3]) - ys[3]) # the new curve is closer to the data point i = 3
63+
@test total_curvature(Sw) > total_curvature(S) # since we give more importance to data fitting (basically, the sum of weights is larger)
64+
@test distance_from_data(Sw, xs, ys) < distance_from_data(S, xs, ys)
65+
end
66+
end

0 commit comments

Comments
 (0)