Skip to content

Commit

Permalink
Merge pull request #137 from perib/new_search_space_def
Browse files Browse the repository at this point in the history
New search space def
  • Loading branch information
perib authored Jun 19, 2024
2 parents fedc90d + 5425ee4 commit bbef3e4
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 11 deletions.
2 changes: 1 addition & 1 deletion tpot2/builtin_modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .column_one_hot_encoder import ColumnOneHotEncoder
from .arithmetictransformer import ArithmeticTransformer
from .arithmetictransformer import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer
from .passthrough import Passthrough
from .passthrough import Passthrough, SkipTransformer
from .imputer import ColumnSimpleImputer
from .estimatortransformer import EstimatorTransformer
from .passkbinsdiscretizer import PassKBinsDiscretizer
12 changes: 12 additions & 0 deletions tpot2/builtin_modules/passthrough.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

class Passthrough(TransformerMixin,BaseEstimator):

Expand All @@ -7,3 +8,14 @@ def fit(self, X=None, y=None):

def transform(self, X):
return X


class SkipTransformer(TransformerMixin,BaseEstimator):

def fit(self, X=None, y=None):
return self

def transform(self, X):
#empty array of same shape as X
return np.array([]).reshape(X.shape[0],0)

13 changes: 9 additions & 4 deletions tpot2/config/get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer
from tpot2.builtin_modules.genetic_encoders import DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder
from tpot2.builtin_modules import ZeroCount, ColumnOneHotEncoder, PassKBinsDiscretizer
from tpot2.builtin_modules import Passthrough
from tpot2.builtin_modules import Passthrough, SkipTransformer
from sklearn.linear_model import SGDClassifier, LogisticRegression, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, ElasticNetCV, PassiveAggressiveClassifier, ARDRegression
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, ExtraTreesRegressor, ExtraTreesClassifier, AdaBoostRegressor, AdaBoostClassifier, GradientBoostingRegressor,RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, HistGradientBoostingClassifier, HistGradientBoostingRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
Expand All @@ -45,7 +45,7 @@
from sklearn.feature_selection import f_classif, f_regression #TODO create a selectomixin using these?
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier

from sklearn.impute import SimpleImputer

all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV,
AdaBoostClassifier,MLPRegressor,
Expand All @@ -54,8 +54,9 @@
PowerTransformer, QuantileTransformer,ARDRegression, QuadraticDiscriminantAnalysis, PassiveAggressiveClassifier, LinearDiscriminantAnalysis,
DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder,
GaussianProcessClassifier, BaggingClassifier,LGBMRegressor,
Passthrough,
Passthrough,SkipTransformer,
PassKBinsDiscretizer,
SimpleImputer,
]


Expand Down Expand Up @@ -123,7 +124,7 @@
"all_transformers" : ["transformers", "scalers"],

"arithmatic": ["AddTransformer", "mul_neg_1_Transformer", "MulTransformer", "SafeReciprocalTransformer", "EQTransformer", "NETransformer", "GETransformer", "GTTransformer", "LETransformer", "LTTransformer", "MinTransformer", "MaxTransformer"],
"imputers": [],
"imputers": ["SimpleImputer"],
"skrebate": ["ReliefF", "SURF", "SURFstar", "MultiSURF"],
"genetic_encoders": ["DominantEncoder", "RecessiveEncoder", "HeterosisEncoder", "UnderDominanceEncoder", "OverDominanceEncoder"],

Expand All @@ -135,6 +136,8 @@

def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_state=None):
match name:
case "SimpleImputer":
return imputers.simple_imputer_cs

#autoqtl_builtins.py
case "FeatureEncodingFrequencySelector":
Expand All @@ -152,6 +155,8 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_st

case "Passthrough":
return {}
case "SkipTransformer":
return {}

#classifiers.py
case "LinearDiscriminantAnalysis":
Expand Down
2 changes: 1 addition & 1 deletion tpot2/config/imputers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from ConfigSpace import ConfigurationSpace
from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal

simple_imputer = ConfigurationSpace(
simple_imputer_cs = ConfigurationSpace(
space = {
'strategy' : Categorical('strategy', ['mean','median', 'most_frequent', ]),
'add_indicator' : Categorical('add_indicator', [True, False]),
Expand Down
27 changes: 23 additions & 4 deletions tpot2/search_spaces/pipelines/sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ class SequentialPipelineIndividual(SklearnIndividual):
# takes in a list of search spaces. each space is a list of SklearnIndividualGenerators.
# will produce a pipeline of Sequential length. Each step in the pipeline will correspond to the the search space provided in the same index.

def __init__(self, search_spaces : List[SklearnIndividualGenerator], rng=None) -> None:
def __init__(self, search_spaces : List[SklearnIndividualGenerator], memory=None, rng=None) -> None:
super().__init__()
self.search_spaces = search_spaces
self.memory = memory
self.pipeline = []

for space in self.search_spaces:
Expand All @@ -25,6 +26,14 @@ def __init__(self, search_spaces : List[SklearnIndividualGenerator], rng=None) -
#TODO, mutate all steps or just one?
def mutate(self, rng=None):
rng = np.random.default_rng()

# mutated = False
# for step in self.pipeline:
# if rng.random() < 0.5:
# if step.mutate(rng):
# mutated = True
# return mutated

step = rng.choice(self.pipeline)
return step.mutate(rng)

Expand Down Expand Up @@ -102,6 +111,15 @@ def _crossover_swap_segment(self, other, rng):
def _crossover_inner_step(self, other, rng):
rng = np.random.default_rng()

# crossover_success = False
# for idx in range(len(self.pipeline)):
# if rng.random() < 0.5:
# if self.pipeline[idx].crossover(other.pipeline[idx], rng):
# crossover_success = True

# return crossover_success


crossover_success = False
for idx in range(len(self.pipeline)):
if rng.random() < 0.5:
Expand All @@ -111,7 +129,7 @@ def _crossover_inner_step(self, other, rng):
return crossover_success

def export_pipeline(self):
return sklearn.pipeline.make_pipeline(*[step.export_pipeline() for step in self.pipeline])
return sklearn.pipeline.make_pipeline(*[step.export_pipeline() for step in self.pipeline], memory=self.memory)

def unique_id(self):
l = [step.unique_id() for step in self.pipeline]
Expand All @@ -122,12 +140,13 @@ def unique_id(self):


class SequentialPipeline(SklearnIndividualGenerator):
def __init__(self, search_spaces : List[SklearnIndividualGenerator] ) -> None:
def __init__(self, search_spaces : List[SklearnIndividualGenerator], memory=None ) -> None:
"""
Takes in a list of search spaces. will produce a pipeline of Sequential length. Each step in the pipeline will correspond to the the search space provided in the same index.
"""

self.search_spaces = search_spaces
self.memory = memory

def generate(self, rng=None):
return SequentialPipelineIndividual(self.search_spaces, rng=rng)
return SequentialPipelineIndividual(self.search_spaces, memory=self.memory, rng=rng)
2 changes: 1 addition & 1 deletion tpot2/search_spaces/pipelines/union.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def _crossover(self, other, rng=None):
#swap a random step in the pipeline with the corresponding step in the other pipeline
rng = np.random.default_rng()

cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step]
cx_funcs = [self._crossover_inner_step]
rng.shuffle(cx_funcs)
for cx_func in cx_funcs:
if cx_func(other, rng):
Expand Down

0 comments on commit bbef3e4

Please sign in to comment.