Skip to content

Add kw arg to normalize kernel in distance weights. #791

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 19 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 251 additions & 0 deletions libpysal/_kernels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
"""
Kernel weight functions for spatial or statistical analysis.

This module provides a collection of kernel functions used to compute
weights based on distances and a given bandwidth. These functions are
commonly used in kernel density estimation, geographically weighted
regression, and other localized modeling techniques.

Available kernel types:
- 'triangular'
- 'parabolic'
- 'gaussian'
- 'bisquare'
- 'cosine'
- 'boxcar' / 'discrete'
- 'exponential'
- 'identity' (returns raw distances)


Notes
-----
Unless otherwise stated, kernel functions as defined in Anselin, L. (2024) An Introduction to Spatial
Data Science with GeoDa: Volume 1 Exploring Spatial Data. CRC Press. p. 230.
"""

import numpy as np

def _triangular(distances, bandwidth):
"""
Triangular kernel.

Parameters
----------
distances : array-like
Input distances.
bandwidth : float
Bandwidth parameter.

Returns
-------
weights : ndarray
Weights computed using the triangular kernel.
"""
u = np.clip(distances / bandwidth, 0, 1)
return 1 - u


def _parabolic(distances, bandwidth):
"""
Parabolic (Epanechnikov) kernel.

Parameters
----------
distances : array-like
Input distances.
bandwidth : float
Bandwidth parameter.

Returns
-------
weights : ndarray
Weights computed using the parabolic kernel.
"""
u = np.clip(distances / bandwidth, 0, 1)
return 0.75 * (1 - u**2)


def _gaussian(distances, bandwidth):
"""
Gaussian kernel.

Parameters
----------
distances : array-like
Input distances.
bandwidth : float
Bandwidth parameter.

Returns
-------
weights : ndarray
Weights computed using the Gaussian kernel.
"""
u = distances / bandwidth
exponent_term = -0.5 * (u ** 2)
c = 1 / np.sqrt(2 * np.pi)
return c * np.exp(exponent_term)


def _bisquare(distances, bandwidth):
"""
Bisquare (or biweight) kernel.

Parameters
----------
distances : array-like
Input distances.
bandwidth : float
Bandwidth parameter.

Returns
-------
weights : ndarray
Weights computed using the bisquare kernel.
"""
u = np.clip(distances / bandwidth, 0, 1)
return (15 / 16) * (1 - u**2) ** 2


def _cosine(distances, bandwidth):
"""
Cosine kernel.

Parameters
----------
distances : array-like
Input distances.
bandwidth : float
Bandwidth parameter.

Returns
-------
weights : ndarray
Weights computed using the cosine kernel.

Notes
-----
Source: Silverman, B.W. (1986). Density Estimation for Statistics and
Data Analysis.

"""
u = np.clip(distances / bandwidth, 0, 1)
return (np.pi / 4) * np.cos(np.pi / 2 * u)


def _exponential(distances, bandwidth):
"""
Exponential kernel.

Parameters
----------
distances : array-like
Input distances.
bandwidth : float
Bandwidth parameter.

Returns
-------
weights : ndarray
Weights computed using the exponential kernel.

Notes
-----
TODO: source
"""
u = distances / bandwidth
return np.exp(-u)


def _boxcar(distances, bandwidth):
"""
Boxcar (uniform) kernel.

Parameters
----------
distances : array-like
Input distances.
bandwidth : float
Bandwidth parameter.

Returns
-------
weights : ndarray
Weights of 1 for distances < bandwidth, 0 otherwise.

Notes
-----
TODO: source
"""
r = (distances < bandwidth).astype(int)
return r


def _identity(distances, _):
"""
Identity function (returns the input distances).

Parameters
----------
distances : array-like
Input distances.
_ : any
Ignored.

Returns
-------
distances : ndarray
Unchanged input distances.
"""
return distances


# dispatcher

_kernel_functions = {
"triangular": _triangular,
"parabolic": _parabolic,
"gaussian": _gaussian,
"bisquare": _bisquare,
"cosine": _cosine,
"boxcar": _boxcar,
"discrete": _boxcar,
"exponential": _exponential,
"identity": _identity,
None: _identity,
}


def _kernel(distances, bandwidth, kernel='gaussian'):
"""
Compute kernel weights given distances and bandwidth.

Parameters
----------
distances : array-like
Input distances.
bandwidth : float
Bandwidth parameter controlling the kernel shape.
kernel : str or callable, optional
The kernel to use. Can be one of the predefined kernel names
or a custom function of the form `f(distances, bandwidth)`.

Returns
-------
weights : ndarray
Computed kernel weights.

Raises
------
ValueError
If a string kernel name is provided and is not recognized.
"""
if callable(kernel):
k = kernel(distances, bandwidth)
else:
kernel = kernel.lower()
if kernel not in _kernel_functions:
raise ValueError(f"Unknown kernel: {kernel!r}. Choose from {list(_kernel_functions)}.")
k = _kernel_functions[kernel](distances, bandwidth)

return k
60 changes: 4 additions & 56 deletions libpysal/graph/_kernel.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy
import pandas
from scipy import optimize, sparse, spatial, stats
from libpysal._kernels import _kernel_functions

from ._utils import (
CoplanarError,
Expand All @@ -22,59 +23,6 @@
_VALID_GEOMETRY_TYPES = ["Point"]


def _triangular(distances, bandwidth):
u = numpy.clip(distances / bandwidth, 0, 1)
return 1 - u


def _parabolic(distances, bandwidth):
u = numpy.clip(distances / bandwidth, 0, 1)
return 0.75 * (1 - u**2)


def _gaussian(distances, bandwidth):
u = distances / bandwidth
return numpy.exp(-((u / 2) ** 2)) / (numpy.sqrt(2 * numpy.pi))


def _bisquare(distances, bandwidth):
u = numpy.clip(distances / bandwidth, 0, 1)
return (15 / 16) * (1 - u**2) ** 2


def _cosine(distances, bandwidth):
u = numpy.clip(distances / bandwidth, 0, 1)
return (numpy.pi / 4) * numpy.cos(numpy.pi / 2 * u)


def _exponential(distances, bandwidth):
u = distances / bandwidth
return numpy.exp(-u)


def _boxcar(distances, bandwidth):
r = (distances < bandwidth).astype(int)
return r


def _identity(distances, _):
return distances


_kernel_functions = {
"triangular": _triangular,
"parabolic": _parabolic,
"gaussian": _gaussian,
"bisquare": _bisquare,
"cosine": _cosine,
"boxcar": _boxcar,
"discrete": _boxcar,
"exponential": _exponential,
"identity": _identity,
None: _identity,
}


def _kernel(
coordinates,
bandwidth=None,
Expand Down Expand Up @@ -140,9 +88,9 @@ def _kernel(
coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES
)
else:
assert coordinates.shape[0] == coordinates.shape[1], (
"coordinates should represent a distance matrix if metric='precomputed'"
)
assert (
coordinates.shape[0] == coordinates.shape[1]
), "coordinates should represent a distance matrix if metric='precomputed'"
if ids is None:
ids = numpy.arange(coordinates.shape[0])

Expand Down
40 changes: 20 additions & 20 deletions libpysal/graph/tests/test_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,26 +237,26 @@ def test_kernel_precompute(self):
g = graph.Graph.build_kernel(distmat, metric="precomputed")
expected = np.array(
[
0.126,
0.266,
0.174,
0.071,
0.126,
0.329,
0.311,
0.291,
0.266,
0.329,
0.31,
0.205,
0.174,
0.311,
0.31,
0.339,
0.071,
0.291,
0.205,
0.339,
0.04,
0.177,
0.076,
0.013,
0.04,
0.271,
0.242,
0.212,
0.177,
0.271,
0.241,
0.105,
0.076,
0.242,
0.241,
0.288,
0.013,
0.212,
0.105,
0.288,
]
)

Expand Down
4 changes: 2 additions & 2 deletions libpysal/graph/tests/test_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,8 @@ def test_kernels(kernel, grocs):
assert weight.mean() == pytest.approx(0.10312196315841769)
assert weight.max() == pytest.approx(0.749881829575671)
elif kernel == "gaussian":
assert weight.mean() == pytest.approx(0.19932294761630429)
assert weight.max() == pytest.approx(0.3989265663183409)
assert weight.mean() == pytest.approx(0.13787969156713978)
assert weight.max() == pytest.approx(0.39891085285421685)
elif kernel == "bisquare":
assert weight.mean() == pytest.approx(0.09084085210598618)
assert weight.max() == pytest.approx(0.9372045972129259)
Expand Down
Loading