Skip to content

Commit 451c968

Browse files
committed
Merge branch 'dev'
2 parents 356e3e4 + 93c7804 commit 451c968

File tree

3 files changed

+32
-4
lines changed

3 files changed

+32
-4
lines changed

docs/source/content/examples/pool-based_sampling.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"\n",
99
"## Overview\n",
1010
"\n",
11-
"In this example, the we apply an `ActiveLearner` onto the iris dataset using pool-based sampling. In this setting, we assume a small set of labeled data $\\mathcal{L}$ and a large set of unlabeled data $\\mathcal{U}$ such that $\\left| \\mathcal{L} \\right| \\ll \\left| \\mathcal{U} \\right|$. In his review of the active learning literature, Settles covers a high-level overview of the general pool-based sampling algorithm:\n",
11+
"In this example, we apply an `ActiveLearner` onto the iris dataset using pool-based sampling. In this setting, we assume a small set of labeled data $\\mathcal{L}$ and a large set of unlabeled data $\\mathcal{U}$ such that $\\left| \\mathcal{L} \\right| \\ll \\left| \\mathcal{U} \\right|$. In his review of the active learning literature, Settles covers a high-level overview of the general pool-based sampling algorithm:\n",
1212
"\n",
1313
"> Queries are selectively drawn from the pool, which is usually assumed to be closed (i.e., static or non-changing), although this is not strictly necessary. Typically, instances are queried in a greedy fashion, according to an informativeness measure used to evaluate all instances in the pool (or, perhaps if $\\mathcal{U}$ is very large, some subsample thereof).\n",
1414
"\n",

modAL/models/base.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class BaseLearner(ABC, BaseEstimator):
3030
for instance, modAL.uncertainty.uncertainty_sampling.
3131
X_training: Initial training samples, if available.
3232
y_training: Initial training labels corresponding to initial training samples.
33+
force_all_finite: When True, forces all values of the data finite.
34+
When False, accepts np.nan and np.inf values.
3335
bootstrap_init: If initial training data is available, bootstrapping can be done during the first training.
3436
Useful when building Committee models with bagging.
3537
**fit_kwargs: keyword arguments.
@@ -47,6 +49,7 @@ def __init__(self,
4749
X_training: Optional[modALinput] = None,
4850
y_training: Optional[modALinput] = None,
4951
bootstrap_init: bool = False,
52+
force_all_finite: bool = True,
5053
**fit_kwargs
5154
) -> None:
5255
assert callable(query_strategy), 'query_strategy must be callable'
@@ -59,6 +62,9 @@ def __init__(self,
5962
if X_training is not None:
6063
self._fit_to_known(bootstrap=bootstrap_init, **fit_kwargs)
6164

65+
assert isinstance(force_all_finite, bool), 'force_all_finite must be a bool'
66+
self.force_all_finite = force_all_finite
67+
6268
def _add_training_data(self, X: modALinput, y: modALinput) -> None:
6369
"""
6470
Adds the new data and label to the known data, but does not retrain the model.
@@ -71,7 +77,8 @@ def _add_training_data(self, X: modALinput, y: modALinput) -> None:
7177
If the classifier has been fitted, the features in X have to agree with the training samples which the
7278
classifier has seen.
7379
"""
74-
check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None)
80+
check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None,
81+
force_all_finite=self.force_all_finite)
7582

7683
if self.X_training is None:
7784
self.X_training = X
@@ -117,7 +124,8 @@ def _fit_on_new(self, X: modALinput, y: modALinput, bootstrap: bool = False, **f
117124
Returns:
118125
self
119126
"""
120-
check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None)
127+
check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None,
128+
force_all_finite=self.force_all_finite)
121129

122130
if not bootstrap:
123131
self.estimator.fit(X, y, **fit_kwargs)
@@ -146,7 +154,8 @@ def fit(self, X: modALinput, y: modALinput, bootstrap: bool = False, **fit_kwarg
146154
Returns:
147155
self
148156
"""
149-
check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None)
157+
check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None,
158+
force_all_finite=self.force_all_finite)
150159
self.X_training, self.y_training = X, y
151160
return self._fit_to_known(bootstrap=bootstrap, **fit_kwargs)
152161

tests/core_tests.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,25 @@ def test_teach(self):
734734

735735
learner.teach(X, y, bootstrap=bootstrap, only_new=only_new)
736736

737+
def test_nan(self):
738+
X_training_nan = np.ones(shape=(10, 2)) * np.nan
739+
X_training_inf = np.ones(shape=(10, 2)) * np.inf
740+
y_training = np.random.randint(0, 2, size=10)
741+
742+
learner = modAL.models.learners.ActiveLearner(
743+
X_training=X_training_nan, y_training=y_training,
744+
estimator=mock.MockEstimator(),
745+
force_all_finite=False
746+
)
747+
learner.teach(X_training_nan, y_training)
748+
749+
learner = modAL.models.learners.ActiveLearner(
750+
X_training=X_training_inf, y_training=y_training,
751+
estimator=mock.MockEstimator(),
752+
force_all_finite=False
753+
)
754+
learner.teach(X_training_inf, y_training)
755+
737756
def test_keras(self):
738757
pass
739758

0 commit comments

Comments
 (0)