Skip to content

Commit 0207814

Browse files
committed
Using precomputed kernels
Until now, the kernels were inside SEFR. Now we can have them precomputed in LinearBoost, in order to avoid redundant kernel computation in SEFR.
1 parent cf6c43f commit 0207814

File tree

2 files changed

+164
-34
lines changed

2 files changed

+164
-34
lines changed

src/linearboost/linear_boost.py

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import numpy as np
2727
from sklearn.base import clone
2828
from sklearn.ensemble import AdaBoostClassifier
29+
from sklearn.metrics.pairwise import pairwise_kernels
2930
from sklearn.pipeline import make_pipeline
3031
from sklearn.preprocessing import (
3132
MaxAbsScaler,
@@ -95,8 +96,9 @@ def _boost(self, iboost, X, y, sample_weight, random_state):
9596
iboost : int
9697
The index of the current boost iteration.
9798
98-
X : {array-like} of shape (n_samples, n_features)
99-
The training input samples.
99+
X : {array-like} of shape (n_samples, n_features) or (n_samples, n_samples)
100+
The training input samples. For kernel methods, this will be a
101+
precomputed kernel matrix.
100102
101103
y : array-like of shape (n_samples,)
102104
The target values (class labels).
@@ -375,6 +377,14 @@ class LinearBoostClassifier(_DenseAdaBoostClassifier):
375377
scaler_ : transformer
376378
The scaler instance used to transform the data.
377379
380+
X_fit_ : ndarray of shape (n_samples, n_features)
381+
The training data after scaling, stored when kernel != 'linear'
382+
for prediction purposes.
383+
384+
K_train_ : ndarray of shape (n_samples, n_samples)
385+
The precomputed kernel matrix on training data, stored when
386+
kernel != 'linear'.
387+
378388
Notes
379389
-----
380390
This classifier only supports binary classification tasks.
@@ -426,8 +436,14 @@ def __init__(
426436
degree=3,
427437
coef0=1,
428438
):
439+
# Create SEFR estimator with 'precomputed' kernel if we're using kernels
440+
if kernel == "linear":
441+
base_estimator = SEFR(kernel="linear")
442+
else:
443+
base_estimator = SEFR(kernel="precomputed")
444+
429445
super().__init__(
430-
estimator=SEFR(kernel=kernel, gamma=gamma, degree=degree, coef0=coef0),
446+
estimator=base_estimator,
431447
n_estimators=n_estimators,
432448
learning_rate=learning_rate,
433449
)
@@ -489,6 +505,37 @@ def _check_X_y(self, X, y) -> tuple[np.ndarray, np.ndarray]:
489505

490506
return X, y
491507

508+
def _get_kernel_matrix(self, X, Y=None):
509+
"""Compute kernel matrix between X and Y.
510+
511+
Parameters
512+
----------
513+
X : array-like of shape (n_samples_X, n_features)
514+
Input samples.
515+
Y : array-like of shape (n_samples_Y, n_features), default=None
516+
Input samples. If None, use X.
517+
518+
Returns
519+
-------
520+
K : ndarray of shape (n_samples_X, n_samples_Y)
521+
Kernel matrix.
522+
"""
523+
if Y is None:
524+
Y = X
525+
526+
if callable(self.kernel):
527+
return self.kernel(X, Y)
528+
else:
529+
return pairwise_kernels(
530+
X,
531+
Y,
532+
metric=self.kernel,
533+
filter_params=True,
534+
gamma=self.gamma,
535+
degree=self.degree,
536+
coef0=self.coef0,
537+
)
538+
492539
def fit(self, X, y, sample_weight=None) -> Self:
493540
"""Build a LinearBoost classifier from the training set (X, y).
494541
@@ -515,6 +562,7 @@ def fit(self, X, y, sample_weight=None) -> Self:
515562
if self.scaler not in _scalers:
516563
raise ValueError('Invalid scaler provided; got "%s".' % self.scaler)
517564

565+
# Apply scaling
518566
if self.scaler == "minmax":
519567
self.scaler_ = clone(_scalers["minmax"])
520568
else:
@@ -538,10 +586,20 @@ def fit(self, X, y, sample_weight=None) -> Self:
538586
X_transformed = X_transformed[nonzero_mask]
539587
y = y[nonzero_mask]
540588
sample_weight = sample_weight[nonzero_mask]
589+
541590
X_transformed, y = self._check_X_y(X_transformed, y)
542591
self.classes_ = np.unique(y)
543592
self.n_classes_ = self.classes_.shape[0]
544593

594+
# Store training data for kernel computation during prediction
595+
if self.kernel != "linear":
596+
self.X_fit_ = X_transformed
597+
# Precompute kernel matrix ONCE for all estimators
598+
self.K_train_ = self._get_kernel_matrix(X_transformed)
599+
training_data = self.K_train_
600+
else:
601+
training_data = X_transformed
602+
545603
if self.class_weight is not None:
546604
if isinstance(self.class_weight, str) and self.class_weight != "balanced":
547605
raise ValueError(
@@ -566,7 +624,8 @@ def fit(self, X, y, sample_weight=None) -> Self:
566624
category=FutureWarning,
567625
message=".*parameter 'algorithm' is deprecated.*",
568626
)
569-
return super().fit(X_transformed, y, sample_weight)
627+
# Pass the precomputed kernel matrix (or raw features for linear)
628+
return super().fit(training_data, y, sample_weight)
570629

571630
@staticmethod
572631
def _samme_proba(estimator, n_classes, X):
@@ -590,6 +649,15 @@ def _samme_proba(estimator, n_classes, X):
590649
)
591650

592651
def _boost(self, iboost, X, y, sample_weight, random_state):
652+
"""
653+
Implement a single boost using precomputed kernel matrix or raw features.
654+
655+
Parameters
656+
----------
657+
X : ndarray
658+
For kernel methods, this is the precomputed kernel matrix.
659+
For linear methods, this is the raw feature matrix.
660+
"""
593661
estimator = self._make_estimator(random_state=random_state)
594662
estimator.fit(X, y, sample_weight=sample_weight)
595663

@@ -668,13 +736,20 @@ class in ``classes_``, respectively.
668736
check_is_fitted(self)
669737
X_transformed = self.scaler_.transform(X)
670738

739+
if self.kernel == "linear":
740+
# For linear kernel, pass raw features
741+
test_data = X_transformed
742+
else:
743+
# For kernel methods, compute kernel matrix between test and training data
744+
test_data = self._get_kernel_matrix(X_transformed, self.X_fit_)
745+
671746
if self.algorithm == "SAMME.R":
672747
# Proper SAMME.R decision function
673748
classes = self.classes_
674749
n_classes = len(classes)
675750

676751
pred = sum(
677-
self._samme_proba(estimator, n_classes, X_transformed)
752+
self._samme_proba(estimator, n_classes, test_data)
678753
for estimator in self.estimators_
679754
)
680755
pred /= self.estimator_weights_.sum()
@@ -685,7 +760,7 @@ class in ``classes_``, respectively.
685760

686761
else:
687762
# Standard SAMME algorithm from AdaBoostClassifier (discrete)
688-
return super().decision_function(X_transformed)
763+
return super().decision_function(test_data)
689764

690765
def predict(self, X):
691766
"""Predict classes for X.

src/linearboost/sefr.py

Lines changed: 83 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,14 @@ class SEFR(LinearClassifierMixin, BaseEstimator):
4444
Specifies if a constant (a.k.a. bias or intercept) should be
4545
added to the decision function.
4646
47-
kernel : {'linear', 'poly', 'rbf', 'sigmoid'} or callable, default='linear'
47+
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, default='linear'
4848
Specifies the kernel type to be used in the algorithm.
4949
If a callable is given, it is used to pre-compute the kernel matrix.
50+
If 'precomputed', X is assumed to be a kernel matrix.
5051
5152
gamma : float, default=None
5253
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. If None, then it is
53-
set to 1.0 / n_features.
54+
set to 1.0 / n_features. Ignored when kernel='precomputed'.
5455
5556
degree : int, default=3
5657
Degree for 'poly' kernels. Ignored by other kernels.
@@ -80,7 +81,7 @@ class SEFR(LinearClassifierMixin, BaseEstimator):
8081
has feature names that are all strings.
8182
8283
X_fit_ : ndarray of shape (n_samples, n_features)
83-
The training data, stored when a kernel is used.
84+
The training data, stored when a kernel is used (except for 'precomputed').
8485
8586
Notes
8687
-----
@@ -100,7 +101,10 @@ class SEFR(LinearClassifierMixin, BaseEstimator):
100101

101102
_parameter_constraints: dict = {
102103
"fit_intercept": ["boolean"],
103-
"kernel": [StrOptions({"linear", "poly", "rbf", "sigmoid"}), callable],
104+
"kernel": [
105+
StrOptions({"linear", "poly", "rbf", "sigmoid", "precomputed"}),
106+
callable,
107+
],
104108
"gamma": [Interval(Real, 0, None, closed="left"), None],
105109
"degree": [Interval(Integral, 1, None, closed="left"), None],
106110
"coef0": [Real, None],
@@ -144,28 +148,58 @@ def _more_tags(self) -> dict[str, bool]:
144148
}
145149

146150
def _check_X(self, X) -> np.ndarray:
147-
X = validate_data(
148-
self,
149-
X,
150-
dtype="numeric",
151-
force_all_finite=True,
152-
reset=False,
153-
)
154-
if X.shape[1] != self.n_features_in_:
155-
raise ValueError(
156-
"Expected input with %d features, got %d instead."
157-
% (self.n_features_in_, X.shape[1])
151+
if self.kernel == "precomputed":
152+
X = validate_data(
153+
self,
154+
X,
155+
dtype="numeric",
156+
force_all_finite=True,
157+
reset=False,
158+
)
159+
# For precomputed kernels during prediction, X should be (n_test_samples, n_train_samples)
160+
if hasattr(self, "n_features_in_") and X.shape[1] != self.n_features_in_:
161+
raise ValueError(
162+
f"Precomputed kernel matrix should have {self.n_features_in_} columns "
163+
f"(number of training samples), got {X.shape[1]}."
164+
)
165+
else:
166+
X = validate_data(
167+
self,
168+
X,
169+
dtype="numeric",
170+
force_all_finite=True,
171+
reset=False,
158172
)
173+
if hasattr(self, "n_features_in_") and X.shape[1] != self.n_features_in_:
174+
raise ValueError(
175+
"Expected input with %d features, got %d instead."
176+
% (self.n_features_in_, X.shape[1])
177+
)
159178
return X
160179

161180
def _check_X_y(self, X, y) -> tuple[np.ndarray, np.ndarray]:
162-
X, y = check_X_y(
163-
X,
164-
y,
165-
dtype="numeric",
166-
force_all_finite=True,
167-
estimator=self,
168-
)
181+
if self.kernel == "precomputed":
182+
# For precomputed kernels, X should be a square kernel matrix
183+
X, y = check_X_y(
184+
X,
185+
y,
186+
dtype="numeric",
187+
force_all_finite=True,
188+
estimator=self,
189+
)
190+
if X.shape[0] != X.shape[1]:
191+
raise ValueError(
192+
f"Precomputed kernel matrix should be square, got shape {X.shape}."
193+
)
194+
else:
195+
X, y = check_X_y(
196+
X,
197+
y,
198+
dtype="numeric",
199+
force_all_finite=True,
200+
estimator=self,
201+
)
202+
169203
check_classification_targets(y)
170204

171205
if np.unique(y).shape[0] == 1:
@@ -180,6 +214,10 @@ def _check_X_y(self, X, y) -> tuple[np.ndarray, np.ndarray]:
180214
return X, y
181215

182216
def _get_kernel_matrix(self, X, Y=None):
217+
if self.kernel == "precomputed":
218+
# X is already a kernel matrix
219+
return X
220+
183221
if Y is None:
184222
Y = self.X_fit_
185223

@@ -203,9 +241,10 @@ def fit(self, X, y, sample_weight=None) -> Self:
203241
204242
Parameters
205243
----------
206-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
244+
X : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples, n_samples)
207245
Training vector, where `n_samples` is the number of samples and
208246
`n_features` is the number of features.
247+
If kernel='precomputed', X should be a square kernel matrix.
209248
210249
y : array-like of shape (n_samples,)
211250
Target vector relative to X.
@@ -219,15 +258,25 @@ def fit(self, X, y, sample_weight=None) -> Self:
219258
self
220259
Fitted estimator.
221260
"""
222-
_check_n_features(self, X=X, reset=True)
223-
_check_feature_names(self, X=X, reset=True)
261+
if self.kernel == "precomputed":
262+
_check_n_features(self, X=X, reset=True)
263+
_check_feature_names(self, X=X, reset=True)
264+
else:
265+
_check_n_features(self, X=X, reset=True)
266+
_check_feature_names(self, X=X, reset=True)
224267

225268
X, y = self._check_X_y(X, y)
226-
self.X_fit_ = X
269+
270+
# Store training data only for non-precomputed kernels
271+
if self.kernel != "precomputed":
272+
self.X_fit_ = X
273+
227274
self.classes_, y_ = np.unique(y, return_inverse=True)
228275

229276
if self.kernel == "linear":
230277
K = X
278+
elif self.kernel == "precomputed":
279+
K = X # X is already the kernel matrix
231280
else:
232281
K = self._get_kernel_matrix(X)
233282

@@ -277,10 +326,14 @@ def fit(self, X, y, sample_weight=None) -> Self:
277326
def decision_function(self, X):
278327
check_is_fitted(self)
279328
X = self._check_X(X)
329+
280330
if self.kernel == "linear":
281331
K = X
332+
elif self.kernel == "precomputed":
333+
K = X # X is already a kernel matrix
282334
else:
283335
K = self._get_kernel_matrix(X)
336+
284337
return (
285338
safe_sparse_dot(K, self.coef_.T, dense_output=True) + self.intercept_
286339
).ravel()
@@ -294,9 +347,10 @@ def predict_proba(self, X):
294347
295348
Parameters
296349
----------
297-
X : array-like of shape (n_samples, n_features)
350+
X : array-like of shape (n_samples, n_features) or (n_samples, n_train_samples)
298351
Vector to be scored, where `n_samples` is the number of samples and
299352
`n_features` is the number of features.
353+
If kernel='precomputed', X should have shape (n_samples, n_train_samples).
300354
301355
Returns
302356
-------
@@ -324,9 +378,10 @@ def predict_log_proba(self, X):
324378
325379
Parameters
326380
----------
327-
X : array-like of shape (n_samples, n_features)
381+
X : array-like of shape (n_samples, n_features) or (n_samples, n_train_samples)
328382
Vector to be scored, where `n_samples` is the number of samples and
329383
`n_features` is the number of features.
384+
If kernel='precomputed', X should have shape (n_samples, n_train_samples).
330385
331386
Returns
332387
-------

0 commit comments

Comments
 (0)