Skip to content

Commit

Permalink
Merge pull request #122 from perib/new_search_space_def
Browse files Browse the repository at this point in the history
New search space def
  • Loading branch information
perib authored Mar 27, 2024
2 parents ef2a9a1 + 68378bc commit d5a27cc
Show file tree
Hide file tree
Showing 74 changed files with 10,182 additions and 5,922 deletions.
1,084 changes: 151 additions & 933 deletions Tutorial/1_Estimators_Overview.ipynb

Large diffs are not rendered by default.

478 changes: 0 additions & 478 deletions Tutorial/2_Defining_Search_Space_(config_dicts).ipynb

This file was deleted.

4,904 changes: 4,904 additions & 0 deletions Tutorial/2_Search_Spaces.ipynb

Large diffs are not rendered by default.

1,244 changes: 1,244 additions & 0 deletions Tutorial/3_Feature_Set_Selector.ipynb

Large diffs are not rendered by default.

1,147 changes: 0 additions & 1,147 deletions Tutorial/3_Genetic_Feature_Set_Selectors.ipynb

This file was deleted.

130 changes: 69 additions & 61 deletions Tutorial/4_Symbolic_Regression_and_Classification.ipynb

Large diffs are not rendered by default.

590 changes: 590 additions & 0 deletions Tutorial/5_Genetic_Feature_Selection.ipynb

Large diffs are not rendered by default.

121 changes: 0 additions & 121 deletions Tutorial/5_GraphPipeline.ipynb

This file was deleted.

121 changes: 121 additions & 0 deletions Tutorial/6_GraphPipeline.ipynb

Large diffs are not rendered by default.

107 changes: 102 additions & 5 deletions Tutorial/7_dask_parallelization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,27 @@
" scorer = sklearn.metrics.get_scorer('roc_auc_ovr')\n",
" X, y = sklearn.datasets.load_digits(return_X_y=True)\n",
" X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n",
" est = tpot2.TPOTEstimatorSteadyState( n_jobs=10,memory_limit=\"4GB\", classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n",
" \n",
" graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
" est = tpot2.TPOTEstimator(\n",
" scorers = [\"roc_auc\"],\n",
" scorers_weights = [1],\n",
" classification = True,\n",
" cv = 5,\n",
" search_space = graph_search_space,\n",
" population_size= 10,\n",
" generations = 5,\n",
" max_eval_time_seconds = 60*5,\n",
" verbose = 2,\n",
" )\n",
" \n",
" \n",
" est.fit(X_train, y_train)\n",
" print(scorer(est, X_test, y_test))"
]
Expand Down Expand Up @@ -106,7 +126,27 @@
"X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n",
"\n",
"\n",
"est = tpot2.TPOTEstimatorSteadyState( n_jobs=10,memory_limit=\"4GB\", classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n",
"graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
"est = tpot2.TPOTEstimator(\n",
" scorers = [\"roc_auc\"],\n",
" scorers_weights = [1],\n",
" classification = True,\n",
" cv = 5,\n",
" search_space = graph_search_space,\n",
" population_size= 10,\n",
" generations = 5,\n",
" max_eval_time_seconds = 60*5,\n",
" verbose = 2,\n",
" n_jobs=10,\n",
" memory_limit=\"4GB\"\n",
")\n",
"\n",
"est.fit(X_train, y_train)\n",
"print(scorer(est, X_test, y_test))"
]
Expand Down Expand Up @@ -214,7 +254,27 @@
}
],
"source": [
"est = tpot2.TPOTEstimatorSteadyState( client=client, classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n",
"graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
"est = tpot2.TPOTEstimator(\n",
" client = client,\n",
" scorers = [\"roc_auc\"],\n",
" scorers_weights = [1],\n",
" classification = True,\n",
" cv = 5,\n",
" search_space = graph_search_space,\n",
" population_size= 10,\n",
" generations = 5,\n",
" max_eval_time_seconds = 60*5,\n",
" verbose = 2,\n",
")\n",
"\n",
"\n",
"# this is equivalent to: \n",
"# est = tpot2.TPOTClassifier(population_size= 8, generations=5, n_jobs=4, memory_limit=\"4GB\", verbose=1)\n",
"est.fit(X_train, y_train)\n",
Expand Down Expand Up @@ -283,7 +343,25 @@
" threads_per_worker=1,\n",
" memory_limit='4GB',\n",
") as cluster, Client(cluster) as client:\n",
" est = tpot2.TPOTEstimatorSteadyState(client=client, n_jobs=10,memory_limit=\"4GB\", classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n",
" graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
" est = tpot2.TPOTEstimator(\n",
" client = client,\n",
" scorers = [\"roc_auc\"],\n",
" scorers_weights = [1],\n",
" classification = True,\n",
" cv = 5,\n",
" search_space = graph_search_space,\n",
" population_size= 10,\n",
" generations = 5,\n",
" max_eval_time_seconds = 60*5,\n",
" verbose = 2,\n",
" )\n",
" est.fit(X_train, y_train)\n",
" print(scorer(est, X_test, y_test))"
]
Expand Down Expand Up @@ -349,7 +427,26 @@
" X, y = sklearn.datasets.load_digits(return_X_y=True)\n",
" X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)\n",
"\n",
" est = tpot2.TPOTEstimatorSteadyState( client=client, classification=True, max_eval_time_seconds=60, max_time_seconds=120, scorers=['roc_auc_ovr'], scorers_weights=[1], verbose=1)\n",
" graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
" est = tpot2.TPOTEstimator(\n",
" client = client,\n",
" scorers = [\"roc_auc\"],\n",
" scorers_weights = [1],\n",
" classification = True,\n",
" cv = 5,\n",
" search_space = graph_search_space,\n",
" population_size= 10,\n",
" generations = 5,\n",
" max_eval_time_seconds = 60*5,\n",
" verbose = 2,\n",
" )\n",
" est.fit(X_train, y_train)\n",
" # this is equivalent to: \n",
" # est = tpot2.TPOTClassifier(population_size= 8, generations=5, n_jobs=4, memory_limit=\"4GB\", verbose=1)\n",
" est.fit(X_train, y_train)\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,26 +186,33 @@
"\n",
"X, y = sklearn.datasets.load_iris(return_X_y=True)\n",
"\n",
"est = tpot2.TPOTEstimator( \n",
" generations=5,\n",
" scorers=['roc_auc_ovr'],\n",
" scorers_weights=[1],\n",
" classification=True,\n",
" root_config_dict=\"classifiers\",\n",
" inner_config_dict= [\"transformers\"],\n",
" leaf_config_dict=\"selectors\",\n",
" n_jobs=32,\n",
" cv=2,\n",
" max_eval_time_seconds=30,\n",
"graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
"est = tpot2.TPOTEstimator(\n",
" scorers = [\"roc_auc\"],\n",
" scorers_weights = [1],\n",
" classification = True,\n",
" cv = 5,\n",
" search_space = graph_search_space,\n",
" generations = 50,\n",
" max_eval_time_seconds = 60*5,\n",
" verbose = 2,\n",
"\n",
"\n",
" population_size=population_size,\n",
" initial_population_size=initial_population_size,\n",
" population_scaling = population_scaling,\n",
" generations_until_end_population = generations_until_end_population,\n",
" \n",
" budget_range = budget_range,\n",
" generations_until_end_budget=generations_until_end_budget,\n",
" )\n",
"\n",
" population_size=population_size,\n",
" initial_population_size=initial_population_size,\n",
" population_scaling = population_scaling,\n",
" generations_until_end_population = generations_until_end_population,\n",
" \n",
" budget_range = budget_range,\n",
" generations_until_end_budget=generations_until_end_budget,\n",
" verbose=0)\n",
"\n",
"\n",
"start = time.time()\n",
Expand Down Expand Up @@ -296,14 +303,20 @@
}
],
"source": [
"graph_search_space = tpot2.search_spaces.pipelines.GraphPipeline(\n",
" root_search_space= tpot2.config.get_search_space([\"KNeighborsClassifier\", \"LogisticRegression\", \"DecisionTreeClassifier\"]),\n",
" leaf_search_space = tpot2.config.get_search_space(\"selectors\"), \n",
" inner_search_space = tpot2.config.get_search_space([\"transformers\"]),\n",
" max_size = 10,\n",
" )\n",
"\n",
"\n",
"est = tpot2.TPOTEstimator( \n",
" generations=5,\n",
" scorers=['roc_auc_ovr'],\n",
" scorers_weights=[1],\n",
" classification=True,\n",
" root_config_dict=\"classifiers\",\n",
" inner_config_dict= [\"transformers\"],\n",
" leaf_config_dict=\"selectors\",\n",
" search_space = graph_search_space,\n",
" n_jobs=32,\n",
" cv=cv,\n",
" \n",
Expand Down Expand Up @@ -369,14 +382,15 @@
}
],
"source": [
"\n",
"\n",
"\n",
"est = tpot2.TPOTEstimator( \n",
" generations=5,\n",
" scorers=['roc_auc_ovr'],\n",
" scorers_weights=[1],\n",
" classification=True,\n",
" root_config_dict=\"classifiers\",\n",
" inner_config_dict= [\"transformers\"],\n",
" leaf_config_dict=\"selectors\",\n",
" search_space = graph_search_space,\n",
" n_jobs=32,\n",
" cv=cv,\n",
"\n",
Expand Down Expand Up @@ -447,9 +461,7 @@
" scorers=['roc_auc_ovr'],\n",
" scorers_weights=[1],\n",
" classification=True,\n",
" root_config_dict=\"classifiers\",\n",
" inner_config_dict= [\"transformers\"],\n",
" leaf_config_dict=\"selectors\",\n",
" search_space = graph_search_space,\n",
" n_jobs=32,\n",
" cv=cv,\n",
"\n",
Expand Down
File renamed without changes.
85 changes: 85 additions & 0 deletions Tutorial/Example_Search_Spaces/imputation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from ConfigSpace import ConfigurationSpace\n",
"from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal\n",
"\n",
"simple_imputer = ConfigurationSpace(\n",
" space = {\n",
" 'strategy' : Categorical('strategy', [['mean','median',], ['most_frequent'] ]),\n",
" 'add_indicator' : Categorical('add_indicator', [True, False]), \n",
" }\n",
")\n",
"\n",
"simple_imputer.sample_configuration()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Configuration(values={\n",
" '2': 2,\n",
" 'a': 2,\n",
"})"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from ConfigSpace import ConfigurationSpace, EqualsCondition\n",
"import ConfigSpace\n",
"\n",
"cs = ConfigurationSpace({\n",
"\n",
" \"1\": [1,2,3],\n",
" \"2\": ConfigSpace.Constant(\"2\", 2),\n",
"\n",
" \"a\": [1, 2, 3],\n",
"\n",
"})\n",
"\n",
"cond = EqualsCondition(cs['1'], cs['a'], 1)\n",
"cond2 = EqualsCondition(cs['2'], cs['a'], 2)\n",
"\n",
"cs.add_condition(cond)\n",
"cs.add_condition(cond2)\n",
"\n",
"cs.sample_configuration()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "tpot2env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def calculate_version():

setup(
name='TPOT2',
python_requires='<3.12', #for configspace compatibility
version=package_version,
author='Pedro Ribeiro',
packages=find_packages(),
Expand Down Expand Up @@ -48,6 +49,7 @@ def calculate_version():
'dask-ml>=2022.5.27',
'dask-jobqueue>=0.8.1',
'func_timeout>=4.3.5',
'configspace>=0.7.1',
],
extras_require={
'skrebate': ['skrebate>=0.3.4'],
Expand Down
4 changes: 3 additions & 1 deletion tpot2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
#TODO: are all the imports in the init files done correctly?
#TODO clean up import organization

from .individual import BaseIndividual

from .graphsklearn import GraphPipeline
from .population import Population

from . import builtin_modules
from . import utils
from . import config
from . import individual_representations
from . import search_spaces
from . import evolvers
from . import objectives
from . import selectors
Expand Down
22 changes: 1 addition & 21 deletions tpot2/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1 @@
#TODO: make configuration dictionaries optinally based on strings?
from .classifiers import make_classifier_config_dictionary
from .transformers import make_transformer_config_dictionary
from .regressors import make_regressor_config_dictionary
from .selectors import make_selector_config_dictionary
from .special_configs import make_arithmetic_transformer_config_dictionary, make_FSS_config_dictionary, make_passthrough_config_dictionary
from .autoqtl_builtins import make_FeatureEncodingFrequencySelector_config_dictionary, make_genetic_encoders_config_dictionary
from .hyperparametersuggestor import *

try:
from .classifiers_sklearnex import make_sklearnex_classifier_config_dictionary
from .regressors_sklearnex import make_sklearnex_regressor_config_dictionary
except ModuleNotFoundError: #if optional packages are not installed
pass

try:
from .mdr_configs import make_skrebate_config_dictionary, make_MDR_config_dictionary, make_ContinuousMDR_config_dictionary
except: #if optional packages are not installed
pass

from .classifiers import *
from .get_configspace import get_search_space
Loading

0 comments on commit d5a27cc

Please sign in to comment.