Skip to content

Commit 99a1ebe

Browse files
author
Github Actions
committed
Arlind Kadra: Merge pull request #173 from ravinkohli/fix-fit_pipeline
1 parent a803edd commit 99a1ebe

29 files changed

+1424
-727
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n# Fit a single configuration\n*Auto-PyTorch* searches for the best combination of machine learning algorithms\nand their hyper-parameter configuration for a given task.\n\nThis example shows how one can fit one of these pipelines, both, with a user defined\nconfiguration, and a randomly sampled one form the configuration space.\nThe pipelines that Auto-PyTorch fits are compatible with Scikit-Learn API. You can\nget further documentation about Scikit-Learn models here: <https://scikit-learn.org/stable/getting_started.html`>_\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.metrics\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.resampling_strategy import HoldoutValTypes\n\n\nif __name__ == '__main__':\n ############################################################################\n # Data Loading\n # ============\n\n X, y = sklearn.datasets.fetch_openml(data_id=3, return_X_y=True, as_frame=True)\n X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n X, y, test_size=0.5, random_state=3\n )\n\n ############################################################################\n # Define an estimator\n # ============================\n\n # Search for a good configuration\n estimator = TabularClassificationTask(\n resampling_strategy=HoldoutValTypes.holdout_validation,\n resampling_strategy_args={'val_share': 0.33}\n )\n\n ############################################################################\n # Get a random configuration of the pipeline for current dataset\n # ===============================================================\n\n dataset = estimator.get_dataset(X_train=X_train,\n y_train=y_train,\n X_test=X_test,\n y_test=y_test)\n configuration = estimator.get_search_space(dataset).get_default_configuration()\n\n ###########################################################################\n # Fit the configuration\n # ==================================\n\n pipeline, run_info, run_value, dataset = estimator.fit_pipeline(X_train=X_train, y_train=y_train,\n dataset_name='kr-vs-kp',\n X_test=X_test, y_test=y_test,\n disable_file_output=False,\n configuration=configuration\n )\n\n # This object complies with Scikit-Learn Pipeline API.\n # https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html\n print(pipeline.named_steps)\n\n # The fit_pipeline command also returns a named tuple with the pipeline constraints\n print(run_info)\n\n # The fit_pipeline command also returns a named tuple with train/test performance\n print(run_value)\n\n print(\"Passed Configuration:\", pipeline.config)\n print(\"Network:\", pipeline.named_steps['network'].network)"
30+
]
31+
}
32+
],
33+
"metadata": {
34+
"kernelspec": {
35+
"display_name": "Python 3",
36+
"language": "python",
37+
"name": "python3"
38+
},
39+
"language_info": {
40+
"codemirror_mode": {
41+
"name": "ipython",
42+
"version": 3
43+
},
44+
"file_extension": ".py",
45+
"mimetype": "text/x-python",
46+
"name": "python",
47+
"nbconvert_exporter": "python",
48+
"pygments_lexer": "ipython3",
49+
"version": "3.8.9"
50+
}
51+
},
52+
"nbformat": 4,
53+
"nbformat_minor": 0
54+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# -*- encoding: utf-8 -*-
2+
"""
3+
==========================
4+
Fit a single configuration
5+
==========================
6+
*Auto-PyTorch* searches for the best combination of machine learning algorithms
7+
and their hyper-parameter configuration for a given task.
8+
9+
This example shows how one can fit one of these pipelines, both, with a user defined
10+
configuration, and a randomly sampled one form the configuration space.
11+
The pipelines that Auto-PyTorch fits are compatible with Scikit-Learn API. You can
12+
get further documentation about Scikit-Learn models here: <https://scikit-learn.org/stable/getting_started.html`>_
13+
"""
14+
import os
15+
import tempfile as tmp
16+
import warnings
17+
18+
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
19+
os.environ['OMP_NUM_THREADS'] = '1'
20+
os.environ['OPENBLAS_NUM_THREADS'] = '1'
21+
os.environ['MKL_NUM_THREADS'] = '1'
22+
23+
warnings.simplefilter(action='ignore', category=UserWarning)
24+
warnings.simplefilter(action='ignore', category=FutureWarning)
25+
26+
import sklearn.datasets
27+
import sklearn.metrics
28+
29+
from autoPyTorch.api.tabular_classification import TabularClassificationTask
30+
from autoPyTorch.datasets.resampling_strategy import HoldoutValTypes
31+
32+
33+
if __name__ == '__main__':
34+
############################################################################
35+
# Data Loading
36+
# ============
37+
38+
X, y = sklearn.datasets.fetch_openml(data_id=3, return_X_y=True, as_frame=True)
39+
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
40+
X, y, test_size=0.5, random_state=3
41+
)
42+
43+
############################################################################
44+
# Define an estimator
45+
# ============================
46+
47+
# Search for a good configuration
48+
estimator = TabularClassificationTask(
49+
resampling_strategy=HoldoutValTypes.holdout_validation,
50+
resampling_strategy_args={'val_share': 0.33}
51+
)
52+
53+
############################################################################
54+
# Get a random configuration of the pipeline for current dataset
55+
# ===============================================================
56+
57+
dataset = estimator.get_dataset(X_train=X_train,
58+
y_train=y_train,
59+
X_test=X_test,
60+
y_test=y_test)
61+
configuration = estimator.get_search_space(dataset).get_default_configuration()
62+
63+
###########################################################################
64+
# Fit the configuration
65+
# ==================================
66+
67+
pipeline, run_info, run_value, dataset = estimator.fit_pipeline(X_train=X_train, y_train=y_train,
68+
dataset_name='kr-vs-kp',
69+
X_test=X_test, y_test=y_test,
70+
disable_file_output=False,
71+
configuration=configuration
72+
)
73+
74+
# This object complies with Scikit-Learn Pipeline API.
75+
# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
76+
print(pipeline.named_steps)
77+
78+
# The fit_pipeline command also returns a named tuple with the pipeline constraints
79+
print(run_info)
80+
81+
# The fit_pipeline command also returns a named tuple with train/test performance
82+
print(run_value)
83+
84+
print("Passed Configuration:", pipeline.config)
85+
print("Network:", pipeline.named_steps['network'].network)
26.2 KB
Loading

0 commit comments

Comments
 (0)