nnetsauce
1from .base.base import Base 2from .base.baseRegressor import BaseRegressor 3from .boosting.adaBoostClassifier import AdaBoostClassifier 4from .custom.customClassifier import CustomClassifier 5from .custom.customRegressor import CustomRegressor 6from .datasets import Downloader 7from .deep.deepClassifier import DeepClassifier 8from .deep.deepRegressor import DeepRegressor 9from .deep.deepMTS import DeepMTS 10from .glm.glmClassifier import GLMClassifier 11from .glm.glmRegressor import GLMRegressor 12from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier 13from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor 14from .lazypredict.lazydeepClassifier import LazyDeepClassifier 15from .lazypredict.lazydeepRegressor import LazyDeepRegressor 16from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS 17from .mts.mts import MTS 18from .mts.classical import ClassicalMTS 19from .multitask.multitaskClassifier import MultitaskClassifier 20from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier 21from .neuralnet.neuralnetregression import NeuralNetRegressor 22from .neuralnet.neuralnetclassification import NeuralNetClassifier 23from .optimizers.optimizer import Optimizer 24from .predictioninterval import PredictionInterval 25from .quantile.quantileregression import QuantileRegressor 26from .quantile.quantileclassification import QuantileClassifier 27from .randombag.randomBagClassifier import RandomBagClassifier 28from .randombag.randomBagRegressor import RandomBagRegressor 29from .ridge2.ridge2Classifier import Ridge2Classifier 30from .ridge2.ridge2Regressor import Ridge2Regressor 31from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier 32from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor 33from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor 34from .sampling import SubSampler 35from .updater import RegressorUpdater, ClassifierUpdater 36from .votingregressor import MedianVotingRegressor 37 38__all__ = [ 39 "AdaBoostClassifier", 40 "Base", 41 "BaseRegressor", 42 "BayesianRVFLRegressor", 43 "BayesianRVFL2Regressor", 44 "ClassicalMTS", 45 "CustomClassifier", 46 "CustomRegressor", 47 "DeepClassifier", 48 "DeepRegressor", 49 "DeepMTS", 50 "Downloader", 51 "GLMClassifier", 52 "GLMRegressor", 53 "LazyClassifier", 54 "LazyRegressor", 55 "LazyDeepClassifier", 56 "LazyDeepRegressor", 57 "LazyMTS", 58 "LazyDeepMTS", 59 "MedianVotingRegressor", 60 "MTS", 61 "MultitaskClassifier", 62 "NeuralNetRegressor", 63 "NeuralNetClassifier", 64 "PredictionInterval", 65 "SimpleMultitaskClassifier", 66 "Optimizer", 67 "QuantileRegressor", 68 "QuantileClassifier", 69 "RandomBagRegressor", 70 "RandomBagClassifier", 71 "RegressorUpdater", 72 "ClassifierUpdater", 73 "Ridge2Regressor", 74 "Ridge2Classifier", 75 "Ridge2MultitaskClassifier", 76 "SubSampler", 77]
21class AdaBoostClassifier(Boosting, ClassifierMixin): 22 """AdaBoost Classification (SAMME) model class derived from class Boosting 23 24 Parameters: 25 26 obj: object 27 any object containing a method fit (obj.fit()) and a method predict 28 (obj.predict()) 29 30 n_estimators: int 31 number of boosting iterations 32 33 learning_rate: float 34 learning rate of the boosting procedure 35 36 n_hidden_features: int 37 number of nodes in the hidden layer 38 39 reg_lambda: float 40 regularization parameter for weights 41 42 reg_alpha: float 43 controls compromize between l1 and l2 norm of weights 44 45 activation_name: str 46 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 47 48 a: float 49 hyperparameter for 'prelu' or 'elu' activation function 50 51 nodes_sim: str 52 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 53 'uniform' 54 55 bias: boolean 56 indicates if the hidden layer contains a bias term (True) or not 57 (False) 58 59 dropout: float 60 regularization parameter; (random) percentage of nodes dropped out 61 of the training 62 63 direct_link: boolean 64 indicates if the original predictors are included (True) in model's 65 fitting or not (False) 66 67 n_clusters: int 68 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 69 no clustering) 70 71 cluster_encode: bool 72 defines how the variable containing clusters is treated (default is one-hot) 73 if `False`, then labels are used, without one-hot encoding 74 75 type_clust: str 76 type of clustering method: currently k-means ('kmeans') or Gaussian 77 Mixture Model ('gmm') 78 79 type_scaling: a tuple of 3 strings 80 scaling methods for inputs, hidden layer, and clustering respectively 81 (and when relevant). 82 Currently available: standardization ('std') or MinMax scaling ('minmax') 83 84 col_sample: float 85 percentage of covariates randomly chosen for training 86 87 row_sample: float 88 percentage of rows chosen for training, by stratified bootstrapping 89 90 seed: int 91 reproducibility seed for nodes_sim=='uniform' 92 93 verbose: int 94 0 for no output, 1 for a progress bar (default is 1) 95 96 method: str 97 type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real) 98 99 backend: str 100 "cpu" or "gpu" or "tpu" 101 102 Attributes: 103 104 alpha_: list 105 AdaBoost coefficients alpha_m 106 107 base_learners_: dict 108 a dictionary containing the base learners 109 110 Examples: 111 112 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py) 113 114 ```python 115 import nnetsauce as ns 116 import numpy as np 117 from sklearn.datasets import load_breast_cancer 118 from sklearn.linear_model import LogisticRegression 119 from sklearn.model_selection import train_test_split 120 from sklearn import metrics 121 from time import time 122 123 breast_cancer = load_breast_cancer() 124 Z = breast_cancer.data 125 t = breast_cancer.target 126 np.random.seed(123) 127 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 128 129 # SAMME.R 130 clf = LogisticRegression(solver='liblinear', multi_class = 'ovr', 131 random_state=123) 132 fit_obj = ns.AdaBoostClassifier(clf, 133 n_hidden_features=int(11.22338867), 134 direct_link=True, 135 n_estimators=250, learning_rate=0.01126343, 136 col_sample=0.72684326, row_sample=0.86429443, 137 dropout=0.63078613, n_clusters=2, 138 type_clust="gmm", 139 verbose=1, seed = 123, 140 method="SAMME.R") 141 142 start = time() 143 fit_obj.fit(X_train, y_train) 144 print(f"Elapsed {time() - start}") 145 146 start = time() 147 print(fit_obj.score(X_test, y_test)) 148 print(f"Elapsed {time() - start}") 149 150 preds = fit_obj.predict(X_test) 151 152 print(metrics.classification_report(preds, y_test)) 153 154 ``` 155 156 """ 157 158 # construct the object ----- 159 160 def __init__( 161 self, 162 obj, 163 n_estimators=10, 164 learning_rate=0.1, 165 n_hidden_features=1, 166 reg_lambda=0, 167 reg_alpha=0.5, 168 activation_name="relu", 169 a=0.01, 170 nodes_sim="sobol", 171 bias=True, 172 dropout=0, 173 direct_link=False, 174 n_clusters=2, 175 cluster_encode=True, 176 type_clust="kmeans", 177 type_scaling=("std", "std", "std"), 178 col_sample=1, 179 row_sample=1, 180 seed=123, 181 verbose=1, 182 method="SAMME", 183 backend="cpu", 184 ): 185 self.type_fit = "classification" 186 self.verbose = verbose 187 self.method = method 188 self.reg_lambda = reg_lambda 189 self.reg_alpha = reg_alpha 190 191 super().__init__( 192 obj=obj, 193 n_estimators=n_estimators, 194 learning_rate=learning_rate, 195 n_hidden_features=n_hidden_features, 196 activation_name=activation_name, 197 a=a, 198 nodes_sim=nodes_sim, 199 bias=bias, 200 dropout=dropout, 201 direct_link=direct_link, 202 n_clusters=n_clusters, 203 cluster_encode=cluster_encode, 204 type_clust=type_clust, 205 type_scaling=type_scaling, 206 col_sample=col_sample, 207 row_sample=row_sample, 208 seed=seed, 209 backend=backend, 210 ) 211 212 self.alpha_ = [] 213 self.base_learners_ = dict.fromkeys(range(n_estimators)) 214 215 def fit(self, X, y, sample_weight=None, **kwargs): 216 """Fit Boosting model to training data (X, y). 217 218 Parameters: 219 220 X: {array-like}, shape = [n_samples, n_features] 221 Training vectors, where n_samples is the number 222 of samples and n_features is the number of features. 223 224 y: array-like, shape = [n_samples] 225 Target values. 226 227 **kwargs: additional parameters to be passed to 228 self.cook_training_set or self.obj.fit 229 230 Returns: 231 232 self: object 233 """ 234 235 assert mx.is_factor(y), "y must contain only integers" 236 237 assert self.method in ( 238 "SAMME", 239 "SAMME.R", 240 ), "`method` must be either 'SAMME' or 'SAMME.R'" 241 242 assert (self.reg_lambda <= 1) & ( 243 self.reg_lambda >= 0 244 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 245 246 assert (self.reg_alpha <= 1) & ( 247 self.reg_alpha >= 0 248 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 249 250 # training 251 n, p = X.shape 252 self.n_classes = len(np.unique(y)) 253 self.classes_ = np.unique(y) # for compatibility with sklearn 254 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 255 256 if sample_weight is None: 257 w_m = np.repeat(1.0 / n, n) 258 else: 259 w_m = np.asarray(sample_weight) 260 261 base_learner = CustomClassifier( 262 self.obj, 263 n_hidden_features=self.n_hidden_features, 264 activation_name=self.activation_name, 265 a=self.a, 266 nodes_sim=self.nodes_sim, 267 bias=self.bias, 268 dropout=self.dropout, 269 direct_link=self.direct_link, 270 n_clusters=self.n_clusters, 271 type_clust=self.type_clust, 272 type_scaling=self.type_scaling, 273 col_sample=self.col_sample, 274 row_sample=self.row_sample, 275 seed=self.seed, 276 ) 277 278 if self.verbose == 1: 279 pbar = Progbar(self.n_estimators) 280 281 if self.method == "SAMME": 282 err_m = 1e6 283 err_bound = 1 - 1 / self.n_classes 284 self.alpha_.append(1.0) 285 x_range_n = range(n) 286 287 for m in range(self.n_estimators): 288 preds = base_learner.fit( 289 X, y, sample_weight=w_m.ravel(), **kwargs 290 ).predict(X) 291 292 self.base_learners_.update({m: deepcopy(base_learner)}) 293 294 cond = [y[i] != preds[i] for i in x_range_n] 295 296 err_m = max( 297 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 298 2.220446049250313e-16, 299 ) # sum(w_m) == 1 300 301 if self.reg_lambda > 0: 302 err_m += self.reg_lambda * ( 303 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 304 + self.reg_alpha * sum([abs(x) for x in w_m]) 305 ) 306 307 err_m = min(err_m, err_bound) 308 309 alpha_m = self.learning_rate * log( 310 (self.n_classes - 1) * (1 - err_m) / err_m 311 ) 312 313 self.alpha_.append(alpha_m) 314 315 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 316 317 sum_w_m = sum(w_m_temp) 318 319 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 320 321 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 322 323 if self.verbose == 1: 324 pbar.update(m) 325 326 if self.verbose == 1: 327 pbar.update(self.n_estimators) 328 329 self.n_estimators = len(self.base_learners_) 330 self.classes_ = np.unique(y) 331 332 return self 333 334 if self.method == "SAMME.R": 335 Y = mo.one_hot_encode2(y, self.n_classes) 336 337 if sample_weight is None: 338 w_m = np.repeat(1.0 / n, n) # (N, 1) 339 340 else: 341 w_m = np.asarray(sample_weight) 342 343 for m in range(self.n_estimators): 344 probs = base_learner.fit( 345 X, y, sample_weight=w_m.ravel(), **kwargs 346 ).predict_proba(X) 347 348 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 349 350 self.base_learners_.update({m: deepcopy(base_learner)}) 351 352 w_m *= np.exp( 353 -1.0 354 * self.learning_rate 355 * (1.0 - 1.0 / self.n_classes) 356 * xlogy(Y, probs).sum(axis=1) 357 ) 358 359 w_m /= np.sum(w_m) 360 361 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 362 363 if self.verbose == 1: 364 pbar.update(m) 365 366 if self.verbose == 1: 367 pbar.update(self.n_estimators) 368 369 self.n_estimators = len(self.base_learners_) 370 self.classes_ = np.unique(y) 371 372 return self 373 374 def predict(self, X, **kwargs): 375 """Predict test data X. 376 377 Parameters: 378 379 X: {array-like}, shape = [n_samples, n_features] 380 Training vectors, where n_samples is the number 381 of samples and n_features is the number of features. 382 383 **kwargs: additional parameters to be passed to 384 self.cook_test_set 385 386 Returns: 387 388 model predictions: {array-like} 389 """ 390 return self.predict_proba(X, **kwargs).argmax(axis=1) 391 392 def predict_proba(self, X, **kwargs): 393 """Predict probabilities for test data X. 394 395 Parameters: 396 397 X: {array-like}, shape = [n_samples, n_features] 398 Training vectors, where n_samples is the number 399 of samples and n_features is the number of features. 400 401 **kwargs: additional parameters to be passed to 402 self.cook_test_set 403 404 Returns: 405 406 probability estimates for test data: {array-like} 407 408 """ 409 410 n_iter = len(self.base_learners_) 411 412 if self.method == "SAMME": 413 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 414 415 # if self.verbose == 1: 416 # pbar = Progbar(n_iter) 417 418 for idx, base_learner in self.base_learners_.items(): 419 preds = base_learner.predict(X, **kwargs) 420 421 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 422 preds, self.n_classes 423 ) 424 425 # if self.verbose == 1: 426 # pbar.update(idx) 427 428 # if self.verbose == 1: 429 # pbar.update(n_iter) 430 431 expit_ensemble_learner = expit(ensemble_learner) 432 433 sum_ensemble = expit_ensemble_learner.sum(axis=1) 434 435 return expit_ensemble_learner / sum_ensemble[:, None] 436 437 # if self.method == "SAMME.R": 438 ensemble_learner = 0 439 440 # if self.verbose == 1: 441 # pbar = Progbar(n_iter) 442 443 for idx, base_learner in self.base_learners_.items(): 444 probs = base_learner.predict_proba(X, **kwargs) 445 446 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 447 448 log_preds_proba = np.log(probs) 449 450 ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 451 452 # if self.verbose == 1: 453 # pbar.update(idx) 454 455 ensemble_learner *= self.n_classes - 1 456 457 # if self.verbose == 1: 458 # pbar.update(n_iter) 459 460 expit_ensemble_learner = expit(ensemble_learner) 461 462 sum_ensemble = expit_ensemble_learner.sum(axis=1) 463 464 return expit_ensemble_learner / sum_ensemble[:, None]
AdaBoost Classification (SAMME) model class derived from class Boosting
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
learning_rate: float
learning rate of the boosting procedure
n_hidden_features: int
number of nodes in the hidden layer
reg_lambda: float
regularization parameter for weights
reg_alpha: float
controls compromize between l1 and l2 norm of weights
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
verbose: int
0 for no output, 1 for a progress bar (default is 1)
method: str
type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
alpha_: list
AdaBoost coefficients alpha_m
base_learners_: dict
a dictionary containing the base learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
n_hidden_features=int(11.22338867),
direct_link=True,
n_estimators=250, learning_rate=0.01126343,
col_sample=0.72684326, row_sample=0.86429443,
dropout=0.63078613, n_clusters=2,
type_clust="gmm",
verbose=1, seed = 123,
method="SAMME.R")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
preds = fit_obj.predict(X_test)
print(metrics.classification_report(preds, y_test))
215 def fit(self, X, y, sample_weight=None, **kwargs): 216 """Fit Boosting model to training data (X, y). 217 218 Parameters: 219 220 X: {array-like}, shape = [n_samples, n_features] 221 Training vectors, where n_samples is the number 222 of samples and n_features is the number of features. 223 224 y: array-like, shape = [n_samples] 225 Target values. 226 227 **kwargs: additional parameters to be passed to 228 self.cook_training_set or self.obj.fit 229 230 Returns: 231 232 self: object 233 """ 234 235 assert mx.is_factor(y), "y must contain only integers" 236 237 assert self.method in ( 238 "SAMME", 239 "SAMME.R", 240 ), "`method` must be either 'SAMME' or 'SAMME.R'" 241 242 assert (self.reg_lambda <= 1) & ( 243 self.reg_lambda >= 0 244 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 245 246 assert (self.reg_alpha <= 1) & ( 247 self.reg_alpha >= 0 248 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 249 250 # training 251 n, p = X.shape 252 self.n_classes = len(np.unique(y)) 253 self.classes_ = np.unique(y) # for compatibility with sklearn 254 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 255 256 if sample_weight is None: 257 w_m = np.repeat(1.0 / n, n) 258 else: 259 w_m = np.asarray(sample_weight) 260 261 base_learner = CustomClassifier( 262 self.obj, 263 n_hidden_features=self.n_hidden_features, 264 activation_name=self.activation_name, 265 a=self.a, 266 nodes_sim=self.nodes_sim, 267 bias=self.bias, 268 dropout=self.dropout, 269 direct_link=self.direct_link, 270 n_clusters=self.n_clusters, 271 type_clust=self.type_clust, 272 type_scaling=self.type_scaling, 273 col_sample=self.col_sample, 274 row_sample=self.row_sample, 275 seed=self.seed, 276 ) 277 278 if self.verbose == 1: 279 pbar = Progbar(self.n_estimators) 280 281 if self.method == "SAMME": 282 err_m = 1e6 283 err_bound = 1 - 1 / self.n_classes 284 self.alpha_.append(1.0) 285 x_range_n = range(n) 286 287 for m in range(self.n_estimators): 288 preds = base_learner.fit( 289 X, y, sample_weight=w_m.ravel(), **kwargs 290 ).predict(X) 291 292 self.base_learners_.update({m: deepcopy(base_learner)}) 293 294 cond = [y[i] != preds[i] for i in x_range_n] 295 296 err_m = max( 297 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 298 2.220446049250313e-16, 299 ) # sum(w_m) == 1 300 301 if self.reg_lambda > 0: 302 err_m += self.reg_lambda * ( 303 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 304 + self.reg_alpha * sum([abs(x) for x in w_m]) 305 ) 306 307 err_m = min(err_m, err_bound) 308 309 alpha_m = self.learning_rate * log( 310 (self.n_classes - 1) * (1 - err_m) / err_m 311 ) 312 313 self.alpha_.append(alpha_m) 314 315 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 316 317 sum_w_m = sum(w_m_temp) 318 319 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 320 321 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 322 323 if self.verbose == 1: 324 pbar.update(m) 325 326 if self.verbose == 1: 327 pbar.update(self.n_estimators) 328 329 self.n_estimators = len(self.base_learners_) 330 self.classes_ = np.unique(y) 331 332 return self 333 334 if self.method == "SAMME.R": 335 Y = mo.one_hot_encode2(y, self.n_classes) 336 337 if sample_weight is None: 338 w_m = np.repeat(1.0 / n, n) # (N, 1) 339 340 else: 341 w_m = np.asarray(sample_weight) 342 343 for m in range(self.n_estimators): 344 probs = base_learner.fit( 345 X, y, sample_weight=w_m.ravel(), **kwargs 346 ).predict_proba(X) 347 348 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 349 350 self.base_learners_.update({m: deepcopy(base_learner)}) 351 352 w_m *= np.exp( 353 -1.0 354 * self.learning_rate 355 * (1.0 - 1.0 / self.n_classes) 356 * xlogy(Y, probs).sum(axis=1) 357 ) 358 359 w_m /= np.sum(w_m) 360 361 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 362 363 if self.verbose == 1: 364 pbar.update(m) 365 366 if self.verbose == 1: 367 pbar.update(self.n_estimators) 368 369 self.n_estimators = len(self.base_learners_) 370 self.classes_ = np.unique(y) 371 372 return self
Fit Boosting model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
374 def predict(self, X, **kwargs): 375 """Predict test data X. 376 377 Parameters: 378 379 X: {array-like}, shape = [n_samples, n_features] 380 Training vectors, where n_samples is the number 381 of samples and n_features is the number of features. 382 383 **kwargs: additional parameters to be passed to 384 self.cook_test_set 385 386 Returns: 387 388 model predictions: {array-like} 389 """ 390 return self.predict_proba(X, **kwargs).argmax(axis=1)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
392 def predict_proba(self, X, **kwargs): 393 """Predict probabilities for test data X. 394 395 Parameters: 396 397 X: {array-like}, shape = [n_samples, n_features] 398 Training vectors, where n_samples is the number 399 of samples and n_features is the number of features. 400 401 **kwargs: additional parameters to be passed to 402 self.cook_test_set 403 404 Returns: 405 406 probability estimates for test data: {array-like} 407 408 """ 409 410 n_iter = len(self.base_learners_) 411 412 if self.method == "SAMME": 413 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 414 415 # if self.verbose == 1: 416 # pbar = Progbar(n_iter) 417 418 for idx, base_learner in self.base_learners_.items(): 419 preds = base_learner.predict(X, **kwargs) 420 421 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 422 preds, self.n_classes 423 ) 424 425 # if self.verbose == 1: 426 # pbar.update(idx) 427 428 # if self.verbose == 1: 429 # pbar.update(n_iter) 430 431 expit_ensemble_learner = expit(ensemble_learner) 432 433 sum_ensemble = expit_ensemble_learner.sum(axis=1) 434 435 return expit_ensemble_learner / sum_ensemble[:, None] 436 437 # if self.method == "SAMME.R": 438 ensemble_learner = 0 439 440 # if self.verbose == 1: 441 # pbar = Progbar(n_iter) 442 443 for idx, base_learner in self.base_learners_.items(): 444 probs = base_learner.predict_proba(X, **kwargs) 445 446 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 447 448 log_preds_proba = np.log(probs) 449 450 ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 451 452 # if self.verbose == 1: 453 # pbar.update(idx) 454 455 ensemble_learner *= self.n_classes - 1 456 457 # if self.verbose == 1: 458 # pbar.update(n_iter) 459 460 expit_ensemble_learner = expit(ensemble_learner) 461 462 sum_ensemble = expit_ensemble_learner.sum(axis=1) 463 464 return expit_ensemble_learner / sum_ensemble[:, None]
Predict probabilities for test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
46class Base(BaseEstimator): 47 """Base model from which all the other classes inherit. 48 49 This class contains the most important data preprocessing/feature engineering methods. 50 51 Parameters: 52 53 n_hidden_features: int 54 number of nodes in the hidden layer 55 56 activation_name: str 57 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 58 59 a: float 60 hyperparameter for 'prelu' or 'elu' activation function 61 62 nodes_sim: str 63 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 64 'uniform' 65 66 bias: boolean 67 indicates if the hidden layer contains a bias term (True) or 68 not (False) 69 70 dropout: float 71 regularization parameter; (random) percentage of nodes dropped out 72 of the training 73 74 direct_link: boolean 75 indicates if the original features are included (True) in model's 76 fitting or not (False) 77 78 n_clusters: int 79 number of clusters for type_clust='kmeans' or type_clust='gmm' 80 clustering (could be 0: no clustering) 81 82 cluster_encode: bool 83 defines how the variable containing clusters is treated (default is one-hot); 84 if `False`, then labels are used, without one-hot encoding 85 86 type_clust: str 87 type of clustering method: currently k-means ('kmeans') or Gaussian 88 Mixture Model ('gmm') 89 90 type_scaling: a tuple of 3 strings 91 scaling methods for inputs, hidden layer, and clustering respectively 92 (and when relevant). 93 Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs') 94 95 col_sample: float 96 percentage of features randomly chosen for training 97 98 row_sample: float 99 percentage of rows chosen for training, by stratified bootstrapping 100 101 seed: int 102 reproducibility seed for nodes_sim=='uniform', clustering and dropout 103 104 backend: str 105 "cpu" or "gpu" or "tpu" 106 107 """ 108 109 # construct the object ----- 110 111 def __init__( 112 self, 113 n_hidden_features=5, 114 activation_name="relu", 115 a=0.01, 116 nodes_sim="sobol", 117 bias=True, 118 dropout=0, 119 direct_link=True, 120 n_clusters=2, 121 cluster_encode=True, 122 type_clust="kmeans", 123 type_scaling=("std", "std", "std"), 124 col_sample=1, 125 row_sample=1, 126 seed=123, 127 backend="cpu", 128 ): 129 # input checks ----- 130 131 sys_platform = platform.system() 132 133 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 134 warnings.warn("No GPU/TPU computing on Windows yet, backend set to 'cpu'") 135 backend = "cpu" 136 137 assert activation_name in ( 138 "relu", 139 "tanh", 140 "sigmoid", 141 "prelu", 142 "elu", 143 ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')" 144 145 assert nodes_sim in ( 146 "sobol", 147 "hammersley", 148 "uniform", 149 "halton", 150 ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')" 151 152 assert type_clust in ( 153 "kmeans", 154 "gmm", 155 ), "'type_clust' must be in ('kmeans', 'gmm')" 156 157 assert (len(type_scaling) == 3) & all( 158 type_scaling[i] in ("minmax", "std", "robust", "maxabs") 159 for i in range(len(type_scaling)) 160 ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')" 161 162 assert (col_sample >= 0) & ( 163 col_sample <= 1 164 ), "'col_sample' must be comprised between 0 and 1 (both included)" 165 166 assert backend in ( 167 "cpu", 168 "gpu", 169 "tpu", 170 ), "must have 'backend' in ('cpu', 'gpu', 'tpu')" 171 172 self.n_hidden_features = n_hidden_features 173 self.activation_name = activation_name 174 self.a = a 175 self.nodes_sim = nodes_sim 176 self.bias = bias 177 self.seed = seed 178 self.backend = backend 179 self.dropout = dropout 180 self.direct_link = direct_link 181 self.cluster_encode = cluster_encode 182 self.type_clust = type_clust 183 self.type_scaling = type_scaling 184 self.col_sample = col_sample 185 self.row_sample = row_sample 186 self.n_clusters = n_clusters 187 if isinstance(self, RegressorMixin): 188 self.type_fit = "regression" 189 elif isinstance(self, ClassifierMixin): 190 self.type_fit = "classification" 191 self.subsampler_ = None 192 self.index_col_ = None 193 self.index_row_ = True 194 self.clustering_obj_ = None 195 self.clustering_scaler_ = None 196 self.nn_scaler_ = None 197 self.scaler_ = None 198 self.encoder_ = None 199 self.W_ = None 200 self.X_ = None 201 self.y_ = None 202 self.y_mean_ = None 203 self.beta_ = None 204 205 # activation function ----- 206 if sys_platform in ("Linux", "Darwin"): 207 activation_options = { 208 "relu": ac.relu if (self.backend == "cpu") else jnn.relu, 209 "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh, 210 "sigmoid": (ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid), 211 "prelu": partial(ac.prelu, a=a), 212 "elu": ( 213 partial(ac.elu, a=a) 214 if (self.backend == "cpu") 215 else partial(jnn.elu, a=a) 216 ), 217 } 218 else: # on Windows currently, no JAX 219 activation_options = { 220 "relu": (ac.relu if (self.backend == "cpu") else NotImplementedError), 221 "tanh": (np.tanh if (self.backend == "cpu") else NotImplementedError), 222 "sigmoid": ( 223 ac.sigmoid if (self.backend == "cpu") else NotImplementedError 224 ), 225 "prelu": partial(ac.prelu, a=a), 226 "elu": ( 227 partial(ac.elu, a=a) 228 if (self.backend == "cpu") 229 else NotImplementedError 230 ), 231 } 232 self.activation_func = activation_options[activation_name] 233 234 # "preprocessing" methods to be inherited ----- 235 236 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 237 """Create new covariates with kmeans or GMM clustering 238 239 Parameters: 240 241 X: {array-like}, shape = [n_samples, n_features] 242 Training vectors, where n_samples is the number 243 of samples and n_features is the number of features. 244 245 predict: boolean 246 is False on training set and True on test set 247 248 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 249 if scaler has already been fitted on training data (online training), it can be passed here 250 251 **kwargs: 252 additional parameters to be passed to the 253 clustering method 254 255 Returns: 256 257 Clusters' matrix, one-hot encoded: {array-like} 258 259 """ 260 261 np.random.seed(self.seed) 262 263 if X is None: 264 X = self.X_ 265 266 if isinstance(X, pd.DataFrame): 267 X = copy.deepcopy(X.values.astype(float)) 268 269 if len(X.shape) == 1: 270 X = X.reshape(1, -1) 271 272 if predict is False: # encode training set 273 274 # scale input data before clustering 275 self.clustering_scaler_, scaled_X = mo.scale_covariates( 276 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 277 ) 278 279 self.clustering_obj_, X_clustered = mo.cluster_covariates( 280 scaled_X, 281 self.n_clusters, 282 self.seed, 283 type_clust=self.type_clust, 284 **kwargs 285 ) 286 287 if self.cluster_encode == True: 288 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 289 np.float16 290 ) 291 292 return X_clustered.astype(np.float16) 293 294 # if predict == True, encode test set 295 X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X)) 296 297 if self.cluster_encode == True: 298 return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16) 299 300 return X_clustered.astype(np.float16) 301 302 def create_layer(self, scaled_X, W=None): 303 """Create hidden layer. 304 305 Parameters: 306 307 scaled_X: {array-like}, shape = [n_samples, n_features] 308 Training vectors, where n_samples is the number 309 of samples and n_features is the number of features 310 311 W: {array-like}, shape = [n_features, hidden_features] 312 if provided, constructs the hidden layer with W; otherwise computed internally 313 314 Returns: 315 316 Hidden layer matrix: {array-like} 317 318 """ 319 320 n_features = scaled_X.shape[1] 321 322 # hash_sim = { 323 # "sobol": generate_sobol, 324 # "hammersley": generate_hammersley, 325 # "uniform": generate_uniform, 326 # "halton": generate_halton 327 # } 328 329 if self.bias is False: # no bias term in the hidden layer 330 if W is None: 331 if self.nodes_sim == "sobol": 332 self.W_ = generate_sobol( 333 n_dims=n_features, 334 n_points=self.n_hidden_features, 335 seed=self.seed, 336 ) 337 elif self.nodes_sim == "hammersley": 338 self.W_ = generate_hammersley( 339 n_dims=n_features, 340 n_points=self.n_hidden_features, 341 seed=self.seed, 342 ) 343 elif self.nodes_sim == "uniform": 344 self.W_ = generate_uniform( 345 n_dims=n_features, 346 n_points=self.n_hidden_features, 347 seed=self.seed, 348 ) 349 else: 350 self.W_ = generate_halton( 351 n_dims=n_features, 352 n_points=self.n_hidden_features, 353 seed=self.seed, 354 ) 355 356 # self.W_ = hash_sim[self.nodes_sim]( 357 # n_dims=n_features, 358 # n_points=self.n_hidden_features, 359 # seed=self.seed, 360 # ) 361 362 assert ( 363 scaled_X.shape[1] == self.W_.shape[0] 364 ), "check dimensions of covariates X and matrix W" 365 366 return mo.dropout( 367 x=self.activation_func( 368 mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend) 369 ), 370 drop_prob=self.dropout, 371 seed=self.seed, 372 ) 373 374 # W is not none 375 assert ( 376 scaled_X.shape[1] == W.shape[0] 377 ), "check dimensions of covariates X and matrix W" 378 379 # self.W_ = W 380 return mo.dropout( 381 x=self.activation_func( 382 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 383 ), 384 drop_prob=self.dropout, 385 seed=self.seed, 386 ) 387 388 # with bias term in the hidden layer 389 if W is None: 390 n_features_1 = n_features + 1 391 392 if self.nodes_sim == "sobol": 393 self.W_ = generate_sobol( 394 n_dims=n_features_1, 395 n_points=self.n_hidden_features, 396 seed=self.seed, 397 ) 398 elif self.nodes_sim == "hammersley": 399 self.W_ = generate_hammersley( 400 n_dims=n_features_1, 401 n_points=self.n_hidden_features, 402 seed=self.seed, 403 ) 404 elif self.nodes_sim == "uniform": 405 self.W_ = generate_uniform( 406 n_dims=n_features_1, 407 n_points=self.n_hidden_features, 408 seed=self.seed, 409 ) 410 else: 411 self.W_ = generate_halton( 412 n_dims=n_features_1, 413 n_points=self.n_hidden_features, 414 seed=self.seed, 415 ) 416 417 # self.W_ = hash_sim[self.nodes_sim]( 418 # n_dims=n_features_1, 419 # n_points=self.n_hidden_features, 420 # seed=self.seed, 421 # ) 422 423 return mo.dropout( 424 x=self.activation_func( 425 mo.safe_sparse_dot( 426 a=mo.cbind( 427 np.ones(scaled_X.shape[0]), 428 scaled_X, 429 backend=self.backend, 430 ), 431 b=self.W_, 432 backend=self.backend, 433 ) 434 ), 435 drop_prob=self.dropout, 436 seed=self.seed, 437 ) 438 439 # W is not None 440 # self.W_ = W 441 return mo.dropout( 442 x=self.activation_func( 443 mo.safe_sparse_dot( 444 a=mo.cbind( 445 np.ones(scaled_X.shape[0]), 446 scaled_X, 447 backend=self.backend, 448 ), 449 b=W, 450 backend=self.backend, 451 ) 452 ), 453 drop_prob=self.dropout, 454 seed=self.seed, 455 ) 456 457 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 458 """Create new hidden features for training set, with hidden layer, center the response. 459 460 Parameters: 461 462 y: array-like, shape = [n_samples] 463 Target values 464 465 X: {array-like}, shape = [n_samples, n_features] 466 Training vectors, where n_samples is the number 467 of samples and n_features is the number of features 468 469 W: {array-like}, shape = [n_features, hidden_features] 470 if provided, constructs the hidden layer via W 471 472 Returns: 473 474 (centered response, direct link + hidden layer matrix): {tuple} 475 476 """ 477 478 # either X and y are stored or not 479 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 480 if self.n_hidden_features > 0: # has a hidden layer 481 assert ( 482 len(self.type_scaling) >= 2 483 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 484 485 if X is None: 486 487 if self.col_sample == 1: 488 input_X = self.X_ 489 else: 490 n_features = self.X_.shape[1] 491 new_n_features = int(np.ceil(n_features * self.col_sample)) 492 assert ( 493 new_n_features >= 1 494 ), "check class attribute 'col_sample' and the number of covariates provided for X" 495 np.random.seed(self.seed) 496 index_col = np.random.choice( 497 range(n_features), size=new_n_features, replace=False 498 ) 499 self.index_col_ = index_col 500 input_X = self.X_[:, self.index_col_] 501 502 else: # X is not None # keep X vs self.X_ 503 504 if isinstance(X, pd.DataFrame): 505 X = copy.deepcopy(X.values.astype(float)) 506 507 if self.col_sample == 1: 508 input_X = X 509 else: 510 n_features = X.shape[1] 511 new_n_features = int(np.ceil(n_features * self.col_sample)) 512 assert ( 513 new_n_features >= 1 514 ), "check class attribute 'col_sample' and the number of covariates provided for X" 515 np.random.seed(self.seed) 516 index_col = np.random.choice( 517 range(n_features), size=new_n_features, replace=False 518 ) 519 self.index_col_ = index_col 520 input_X = X[:, self.index_col_] 521 522 if self.n_clusters <= 0: 523 # data without any clustering: self.n_clusters is None ----- 524 525 if self.n_hidden_features > 0: # with hidden layer 526 527 self.nn_scaler_, scaled_X = mo.scale_covariates( 528 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 529 ) 530 Phi_X = ( 531 self.create_layer(scaled_X) 532 if W is None 533 else self.create_layer(scaled_X, W=W) 534 ) 535 Z = ( 536 mo.cbind(input_X, Phi_X, backend=self.backend) 537 if self.direct_link is True 538 else Phi_X 539 ) 540 self.scaler_, scaled_Z = mo.scale_covariates( 541 Z, choice=self.type_scaling[0], scaler=self.scaler_ 542 ) 543 else: # no hidden layer 544 Z = input_X 545 self.scaler_, scaled_Z = mo.scale_covariates( 546 Z, choice=self.type_scaling[0], scaler=self.scaler_ 547 ) 548 549 else: 550 551 # data with clustering: self.n_clusters is not None ----- # keep 552 553 augmented_X = mo.cbind( 554 input_X, 555 self.encode_clusters(input_X, **kwargs), 556 backend=self.backend, 557 ) 558 559 if self.n_hidden_features > 0: # with hidden layer 560 561 self.nn_scaler_, scaled_X = mo.scale_covariates( 562 augmented_X, 563 choice=self.type_scaling[1], 564 scaler=self.nn_scaler_, 565 ) 566 Phi_X = ( 567 self.create_layer(scaled_X) 568 if W is None 569 else self.create_layer(scaled_X, W=W) 570 ) 571 Z = ( 572 mo.cbind(augmented_X, Phi_X, backend=self.backend) 573 if self.direct_link is True 574 else Phi_X 575 ) 576 self.scaler_, scaled_Z = mo.scale_covariates( 577 Z, choice=self.type_scaling[0], scaler=self.scaler_ 578 ) 579 else: # no hidden layer 580 Z = augmented_X 581 self.scaler_, scaled_Z = mo.scale_covariates( 582 Z, choice=self.type_scaling[0], scaler=self.scaler_ 583 ) 584 585 # Returning model inputs ----- 586 if mx.is_factor(y) is False: # regression 587 # center y 588 if y is None: 589 self.y_mean_, centered_y = mo.center_response(self.y_) 590 else: 591 self.y_mean_, centered_y = mo.center_response(y) 592 593 # y is subsampled 594 if self.row_sample < 1: 595 n, p = Z.shape 596 597 self.subsampler_ = ( 598 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 599 if y is None 600 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 601 ) 602 603 self.index_row_ = self.subsampler_.subsample() 604 605 n_row_sample = len(self.index_row_) 606 # regression 607 return ( 608 centered_y[self.index_row_].reshape(n_row_sample), 609 self.scaler_.transform( 610 Z[self.index_row_, :].reshape(n_row_sample, p) 611 ), 612 ) 613 # y is not subsampled 614 # regression 615 return (centered_y, self.scaler_.transform(Z)) 616 617 # classification 618 # y is subsampled 619 if self.row_sample < 1: 620 n, p = Z.shape 621 622 self.subsampler_ = ( 623 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 624 if y is None 625 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 626 ) 627 628 self.index_row_ = self.subsampler_.subsample() 629 630 n_row_sample = len(self.index_row_) 631 # classification 632 return ( 633 y[self.index_row_].reshape(n_row_sample), 634 self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)), 635 ) 636 # y is not subsampled 637 # classification 638 return (y, self.scaler_.transform(Z)) 639 640 def cook_test_set(self, X, **kwargs): 641 """Transform data from test set, with hidden layer. 642 643 Parameters: 644 645 X: {array-like}, shape = [n_samples, n_features] 646 Training vectors, where n_samples is the number 647 of samples and n_features is the number of features 648 649 **kwargs: additional parameters to be passed to self.encode_cluster 650 651 Returns: 652 653 Transformed test set : {array-like} 654 """ 655 656 if isinstance(X, pd.DataFrame): 657 X = copy.deepcopy(X.values.astype(float)) 658 659 if len(X.shape) == 1: 660 X = X.reshape(1, -1) 661 662 if ( 663 self.n_clusters == 0 664 ): # data without clustering: self.n_clusters is None ----- 665 if self.n_hidden_features > 0: 666 # if hidden layer 667 scaled_X = ( 668 self.nn_scaler_.transform(X) 669 if (self.col_sample == 1) 670 else self.nn_scaler_.transform(X[:, self.index_col_]) 671 ) 672 Phi_X = self.create_layer(scaled_X, self.W_) 673 if self.direct_link == True: 674 return self.scaler_.transform( 675 mo.cbind(scaled_X, Phi_X, backend=self.backend) 676 ) 677 # when self.direct_link == False 678 return self.scaler_.transform(Phi_X) 679 # if no hidden layer # self.n_hidden_features == 0 680 return self.scaler_.transform(X) 681 682 # data with clustering: self.n_clusters > 0 ----- 683 if self.col_sample == 1: 684 predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs) 685 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 686 else: 687 predicted_clusters = self.encode_clusters( 688 X=X[:, self.index_col_], predict=True, **kwargs 689 ) 690 augmented_X = mo.cbind( 691 X[:, self.index_col_], predicted_clusters, backend=self.backend 692 ) 693 694 if self.n_hidden_features > 0: # if hidden layer 695 scaled_X = self.nn_scaler_.transform(augmented_X) 696 Phi_X = self.create_layer(scaled_X, self.W_) 697 if self.direct_link == True: 698 return self.scaler_.transform( 699 mo.cbind(augmented_X, Phi_X, backend=self.backend) 700 ) 701 return self.scaler_.transform(Phi_X) 702 703 # if no hidden layer 704 return self.scaler_.transform(augmented_X) 705 706 def cross_val_score( 707 self, 708 X, 709 y, 710 cv=5, 711 scoring="accuracy", 712 random_state=42, 713 n_jobs=-1, 714 epsilon=0.5, 715 penalized=True, 716 objective="abs", 717 **kwargs 718 ): 719 """ 720 Penalized Cross-validation score for a model. 721 722 Parameters: 723 724 X: {array-like}, shape = [n_samples, n_features] 725 Training vectors, where n_samples is the number 726 of samples and n_features is the number of features 727 728 y: array-like, shape = [n_samples] 729 Target values 730 731 X_test: {array-like}, shape = [n_samples, n_features] 732 Test vectors, where n_samples is the number 733 of samples and n_features is the number of features 734 735 y_test: array-like, shape = [n_samples] 736 Target values 737 738 cv: int 739 Number of folds 740 741 scoring: str 742 Scoring metric 743 744 random_state: int 745 Random state 746 747 n_jobs: int 748 Number of jobs to run in parallel 749 750 epsilon: float 751 Penalty parameter 752 753 penalized: bool 754 Whether to obtain penalized cross-validation score or not 755 756 objective: str 757 'abs': Minimize the absolute difference between cross-validation score and validation score 758 'relative': Minimize the relative difference between cross-validation score and validation score 759 Returns: 760 761 A namedtuple with the following fields: 762 - cv_score: float 763 cross-validation score 764 - val_score: float 765 validation score 766 - penalized_score: float 767 penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score) 768 If higher scoring metric is better, minimize the function result. 769 If lower scoring metric is better, maximize the function result. 770 """ 771 if scoring == "accuracy": 772 scoring_func = accuracy_score 773 elif scoring == "balanced_accuracy": 774 scoring_func = balanced_accuracy_score 775 elif scoring == "f1": 776 scoring_func = f1_score 777 elif scoring == "roc_auc": 778 scoring_func = roc_auc_score 779 elif scoring == "r2": 780 scoring_func = r2_score 781 elif scoring == "mse": 782 scoring_func = mean_squared_error 783 elif scoring == "mae": 784 scoring_func = mean_absolute_error 785 elif scoring == "mape": 786 scoring_func = mean_absolute_percentage_error 787 elif scoring == "rmse": 788 789 def scoring_func(y_true, y_pred): 790 return np.sqrt(mean_squared_error(y_true, y_pred)) 791 792 X_train, X_val, y_train, y_val = train_test_split( 793 X, y, test_size=0.2, random_state=random_state 794 ) 795 796 res = cross_val_score( 797 self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs 798 ) # cross-validation error 799 800 if penalized == False: 801 return res 802 803 DescribeResult = namedtuple( 804 "DescribeResult", ["cv_score", "val_score", "penalized_score"] 805 ) 806 807 numerator = res.mean() 808 809 # Evaluate on the (cv+1)-th fold 810 preds_val = self.fit(X_train, y_train).predict(X_val) 811 try: 812 denominator = scoring(y_val, preds_val) # validation error 813 except Exception as e: 814 denominator = scoring_func(y_val, preds_val) 815 816 # if higher is better 817 if objective == "abs": 818 penalized_score = np.abs(numerator - denominator) + epsilon * ( 819 1 / denominator + 1 / numerator 820 ) 821 elif objective == "relative": 822 ratio = numerator / denominator 823 penalized_score = np.abs(ratio - 1) + epsilon * ( 824 1 / denominator + 1 / numerator 825 ) 826 827 return DescribeResult( 828 cv_score=numerator, 829 val_score=denominator, 830 penalized_score=penalized_score, 831 )
Base model from which all the other classes inherit.
This class contains the most important data preprocessing/feature engineering methods.
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
236 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 237 """Create new covariates with kmeans or GMM clustering 238 239 Parameters: 240 241 X: {array-like}, shape = [n_samples, n_features] 242 Training vectors, where n_samples is the number 243 of samples and n_features is the number of features. 244 245 predict: boolean 246 is False on training set and True on test set 247 248 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 249 if scaler has already been fitted on training data (online training), it can be passed here 250 251 **kwargs: 252 additional parameters to be passed to the 253 clustering method 254 255 Returns: 256 257 Clusters' matrix, one-hot encoded: {array-like} 258 259 """ 260 261 np.random.seed(self.seed) 262 263 if X is None: 264 X = self.X_ 265 266 if isinstance(X, pd.DataFrame): 267 X = copy.deepcopy(X.values.astype(float)) 268 269 if len(X.shape) == 1: 270 X = X.reshape(1, -1) 271 272 if predict is False: # encode training set 273 274 # scale input data before clustering 275 self.clustering_scaler_, scaled_X = mo.scale_covariates( 276 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 277 ) 278 279 self.clustering_obj_, X_clustered = mo.cluster_covariates( 280 scaled_X, 281 self.n_clusters, 282 self.seed, 283 type_clust=self.type_clust, 284 **kwargs 285 ) 286 287 if self.cluster_encode == True: 288 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 289 np.float16 290 ) 291 292 return X_clustered.astype(np.float16) 293 294 # if predict == True, encode test set 295 X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X)) 296 297 if self.cluster_encode == True: 298 return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16) 299 300 return X_clustered.astype(np.float16)
Create new covariates with kmeans or GMM clustering
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
predict: boolean
is False on training set and True on test set
scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
if scaler has already been fitted on training data (online training), it can be passed here
**kwargs:
additional parameters to be passed to the
clustering method
Returns:
Clusters' matrix, one-hot encoded: {array-like}
302 def create_layer(self, scaled_X, W=None): 303 """Create hidden layer. 304 305 Parameters: 306 307 scaled_X: {array-like}, shape = [n_samples, n_features] 308 Training vectors, where n_samples is the number 309 of samples and n_features is the number of features 310 311 W: {array-like}, shape = [n_features, hidden_features] 312 if provided, constructs the hidden layer with W; otherwise computed internally 313 314 Returns: 315 316 Hidden layer matrix: {array-like} 317 318 """ 319 320 n_features = scaled_X.shape[1] 321 322 # hash_sim = { 323 # "sobol": generate_sobol, 324 # "hammersley": generate_hammersley, 325 # "uniform": generate_uniform, 326 # "halton": generate_halton 327 # } 328 329 if self.bias is False: # no bias term in the hidden layer 330 if W is None: 331 if self.nodes_sim == "sobol": 332 self.W_ = generate_sobol( 333 n_dims=n_features, 334 n_points=self.n_hidden_features, 335 seed=self.seed, 336 ) 337 elif self.nodes_sim == "hammersley": 338 self.W_ = generate_hammersley( 339 n_dims=n_features, 340 n_points=self.n_hidden_features, 341 seed=self.seed, 342 ) 343 elif self.nodes_sim == "uniform": 344 self.W_ = generate_uniform( 345 n_dims=n_features, 346 n_points=self.n_hidden_features, 347 seed=self.seed, 348 ) 349 else: 350 self.W_ = generate_halton( 351 n_dims=n_features, 352 n_points=self.n_hidden_features, 353 seed=self.seed, 354 ) 355 356 # self.W_ = hash_sim[self.nodes_sim]( 357 # n_dims=n_features, 358 # n_points=self.n_hidden_features, 359 # seed=self.seed, 360 # ) 361 362 assert ( 363 scaled_X.shape[1] == self.W_.shape[0] 364 ), "check dimensions of covariates X and matrix W" 365 366 return mo.dropout( 367 x=self.activation_func( 368 mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend) 369 ), 370 drop_prob=self.dropout, 371 seed=self.seed, 372 ) 373 374 # W is not none 375 assert ( 376 scaled_X.shape[1] == W.shape[0] 377 ), "check dimensions of covariates X and matrix W" 378 379 # self.W_ = W 380 return mo.dropout( 381 x=self.activation_func( 382 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 383 ), 384 drop_prob=self.dropout, 385 seed=self.seed, 386 ) 387 388 # with bias term in the hidden layer 389 if W is None: 390 n_features_1 = n_features + 1 391 392 if self.nodes_sim == "sobol": 393 self.W_ = generate_sobol( 394 n_dims=n_features_1, 395 n_points=self.n_hidden_features, 396 seed=self.seed, 397 ) 398 elif self.nodes_sim == "hammersley": 399 self.W_ = generate_hammersley( 400 n_dims=n_features_1, 401 n_points=self.n_hidden_features, 402 seed=self.seed, 403 ) 404 elif self.nodes_sim == "uniform": 405 self.W_ = generate_uniform( 406 n_dims=n_features_1, 407 n_points=self.n_hidden_features, 408 seed=self.seed, 409 ) 410 else: 411 self.W_ = generate_halton( 412 n_dims=n_features_1, 413 n_points=self.n_hidden_features, 414 seed=self.seed, 415 ) 416 417 # self.W_ = hash_sim[self.nodes_sim]( 418 # n_dims=n_features_1, 419 # n_points=self.n_hidden_features, 420 # seed=self.seed, 421 # ) 422 423 return mo.dropout( 424 x=self.activation_func( 425 mo.safe_sparse_dot( 426 a=mo.cbind( 427 np.ones(scaled_X.shape[0]), 428 scaled_X, 429 backend=self.backend, 430 ), 431 b=self.W_, 432 backend=self.backend, 433 ) 434 ), 435 drop_prob=self.dropout, 436 seed=self.seed, 437 ) 438 439 # W is not None 440 # self.W_ = W 441 return mo.dropout( 442 x=self.activation_func( 443 mo.safe_sparse_dot( 444 a=mo.cbind( 445 np.ones(scaled_X.shape[0]), 446 scaled_X, 447 backend=self.backend, 448 ), 449 b=W, 450 backend=self.backend, 451 ) 452 ), 453 drop_prob=self.dropout, 454 seed=self.seed, 455 )
Create hidden layer.
Parameters:
scaled_X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer with W; otherwise computed internally
Returns:
Hidden layer matrix: {array-like}
457 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 458 """Create new hidden features for training set, with hidden layer, center the response. 459 460 Parameters: 461 462 y: array-like, shape = [n_samples] 463 Target values 464 465 X: {array-like}, shape = [n_samples, n_features] 466 Training vectors, where n_samples is the number 467 of samples and n_features is the number of features 468 469 W: {array-like}, shape = [n_features, hidden_features] 470 if provided, constructs the hidden layer via W 471 472 Returns: 473 474 (centered response, direct link + hidden layer matrix): {tuple} 475 476 """ 477 478 # either X and y are stored or not 479 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 480 if self.n_hidden_features > 0: # has a hidden layer 481 assert ( 482 len(self.type_scaling) >= 2 483 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 484 485 if X is None: 486 487 if self.col_sample == 1: 488 input_X = self.X_ 489 else: 490 n_features = self.X_.shape[1] 491 new_n_features = int(np.ceil(n_features * self.col_sample)) 492 assert ( 493 new_n_features >= 1 494 ), "check class attribute 'col_sample' and the number of covariates provided for X" 495 np.random.seed(self.seed) 496 index_col = np.random.choice( 497 range(n_features), size=new_n_features, replace=False 498 ) 499 self.index_col_ = index_col 500 input_X = self.X_[:, self.index_col_] 501 502 else: # X is not None # keep X vs self.X_ 503 504 if isinstance(X, pd.DataFrame): 505 X = copy.deepcopy(X.values.astype(float)) 506 507 if self.col_sample == 1: 508 input_X = X 509 else: 510 n_features = X.shape[1] 511 new_n_features = int(np.ceil(n_features * self.col_sample)) 512 assert ( 513 new_n_features >= 1 514 ), "check class attribute 'col_sample' and the number of covariates provided for X" 515 np.random.seed(self.seed) 516 index_col = np.random.choice( 517 range(n_features), size=new_n_features, replace=False 518 ) 519 self.index_col_ = index_col 520 input_X = X[:, self.index_col_] 521 522 if self.n_clusters <= 0: 523 # data without any clustering: self.n_clusters is None ----- 524 525 if self.n_hidden_features > 0: # with hidden layer 526 527 self.nn_scaler_, scaled_X = mo.scale_covariates( 528 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 529 ) 530 Phi_X = ( 531 self.create_layer(scaled_X) 532 if W is None 533 else self.create_layer(scaled_X, W=W) 534 ) 535 Z = ( 536 mo.cbind(input_X, Phi_X, backend=self.backend) 537 if self.direct_link is True 538 else Phi_X 539 ) 540 self.scaler_, scaled_Z = mo.scale_covariates( 541 Z, choice=self.type_scaling[0], scaler=self.scaler_ 542 ) 543 else: # no hidden layer 544 Z = input_X 545 self.scaler_, scaled_Z = mo.scale_covariates( 546 Z, choice=self.type_scaling[0], scaler=self.scaler_ 547 ) 548 549 else: 550 551 # data with clustering: self.n_clusters is not None ----- # keep 552 553 augmented_X = mo.cbind( 554 input_X, 555 self.encode_clusters(input_X, **kwargs), 556 backend=self.backend, 557 ) 558 559 if self.n_hidden_features > 0: # with hidden layer 560 561 self.nn_scaler_, scaled_X = mo.scale_covariates( 562 augmented_X, 563 choice=self.type_scaling[1], 564 scaler=self.nn_scaler_, 565 ) 566 Phi_X = ( 567 self.create_layer(scaled_X) 568 if W is None 569 else self.create_layer(scaled_X, W=W) 570 ) 571 Z = ( 572 mo.cbind(augmented_X, Phi_X, backend=self.backend) 573 if self.direct_link is True 574 else Phi_X 575 ) 576 self.scaler_, scaled_Z = mo.scale_covariates( 577 Z, choice=self.type_scaling[0], scaler=self.scaler_ 578 ) 579 else: # no hidden layer 580 Z = augmented_X 581 self.scaler_, scaled_Z = mo.scale_covariates( 582 Z, choice=self.type_scaling[0], scaler=self.scaler_ 583 ) 584 585 # Returning model inputs ----- 586 if mx.is_factor(y) is False: # regression 587 # center y 588 if y is None: 589 self.y_mean_, centered_y = mo.center_response(self.y_) 590 else: 591 self.y_mean_, centered_y = mo.center_response(y) 592 593 # y is subsampled 594 if self.row_sample < 1: 595 n, p = Z.shape 596 597 self.subsampler_ = ( 598 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 599 if y is None 600 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 601 ) 602 603 self.index_row_ = self.subsampler_.subsample() 604 605 n_row_sample = len(self.index_row_) 606 # regression 607 return ( 608 centered_y[self.index_row_].reshape(n_row_sample), 609 self.scaler_.transform( 610 Z[self.index_row_, :].reshape(n_row_sample, p) 611 ), 612 ) 613 # y is not subsampled 614 # regression 615 return (centered_y, self.scaler_.transform(Z)) 616 617 # classification 618 # y is subsampled 619 if self.row_sample < 1: 620 n, p = Z.shape 621 622 self.subsampler_ = ( 623 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 624 if y is None 625 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 626 ) 627 628 self.index_row_ = self.subsampler_.subsample() 629 630 n_row_sample = len(self.index_row_) 631 # classification 632 return ( 633 y[self.index_row_].reshape(n_row_sample), 634 self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)), 635 ) 636 # y is not subsampled 637 # classification 638 return (y, self.scaler_.transform(Z))
Create new hidden features for training set, with hidden layer, center the response.
Parameters:
y: array-like, shape = [n_samples]
Target values
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer via W
Returns:
(centered response, direct link + hidden layer matrix): {tuple}
640 def cook_test_set(self, X, **kwargs): 641 """Transform data from test set, with hidden layer. 642 643 Parameters: 644 645 X: {array-like}, shape = [n_samples, n_features] 646 Training vectors, where n_samples is the number 647 of samples and n_features is the number of features 648 649 **kwargs: additional parameters to be passed to self.encode_cluster 650 651 Returns: 652 653 Transformed test set : {array-like} 654 """ 655 656 if isinstance(X, pd.DataFrame): 657 X = copy.deepcopy(X.values.astype(float)) 658 659 if len(X.shape) == 1: 660 X = X.reshape(1, -1) 661 662 if ( 663 self.n_clusters == 0 664 ): # data without clustering: self.n_clusters is None ----- 665 if self.n_hidden_features > 0: 666 # if hidden layer 667 scaled_X = ( 668 self.nn_scaler_.transform(X) 669 if (self.col_sample == 1) 670 else self.nn_scaler_.transform(X[:, self.index_col_]) 671 ) 672 Phi_X = self.create_layer(scaled_X, self.W_) 673 if self.direct_link == True: 674 return self.scaler_.transform( 675 mo.cbind(scaled_X, Phi_X, backend=self.backend) 676 ) 677 # when self.direct_link == False 678 return self.scaler_.transform(Phi_X) 679 # if no hidden layer # self.n_hidden_features == 0 680 return self.scaler_.transform(X) 681 682 # data with clustering: self.n_clusters > 0 ----- 683 if self.col_sample == 1: 684 predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs) 685 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 686 else: 687 predicted_clusters = self.encode_clusters( 688 X=X[:, self.index_col_], predict=True, **kwargs 689 ) 690 augmented_X = mo.cbind( 691 X[:, self.index_col_], predicted_clusters, backend=self.backend 692 ) 693 694 if self.n_hidden_features > 0: # if hidden layer 695 scaled_X = self.nn_scaler_.transform(augmented_X) 696 Phi_X = self.create_layer(scaled_X, self.W_) 697 if self.direct_link == True: 698 return self.scaler_.transform( 699 mo.cbind(augmented_X, Phi_X, backend=self.backend) 700 ) 701 return self.scaler_.transform(Phi_X) 702 703 # if no hidden layer 704 return self.scaler_.transform(augmented_X)
Transform data from test set, with hidden layer.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.encode_cluster
Returns:
Transformed test set : {array-like}
15class BaseRegressor(Base, RegressorMixin): 16 """Random Vector Functional Link Network regression without shrinkage 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 31 'uniform' 32 33 bias: boolean 34 indicates if the hidden layer contains a bias term (True) or 35 not (False) 36 37 dropout: float 38 regularization parameter; (random) percentage of nodes dropped out 39 of the training 40 41 direct_link: boolean 42 indicates if the original features are included (True) in model's 43 fitting or not (False) 44 45 n_clusters: int 46 number of clusters for type_clust='kmeans' or type_clust='gmm' 47 clustering (could be 0: no clustering) 48 49 cluster_encode: bool 50 defines how the variable containing clusters is treated (default is one-hot); 51 if `False`, then labels are used, without one-hot encoding 52 53 type_clust: str 54 type of clustering method: currently k-means ('kmeans') or Gaussian 55 Mixture Model ('gmm') 56 57 type_scaling: a tuple of 3 strings 58 scaling methods for inputs, hidden layer, and clustering respectively 59 (and when relevant). 60 Currently available: standardization ('std') or MinMax scaling ('minmax') 61 62 col_sample: float 63 percentage of features randomly chosen for training 64 65 row_sample: float 66 percentage of rows chosen for training, by stratified bootstrapping 67 68 seed: int 69 reproducibility seed for nodes_sim=='uniform', clustering and dropout 70 71 backend: str 72 "cpu" or "gpu" or "tpu" 73 74 Attributes: 75 76 beta_: vector 77 regression coefficients 78 79 GCV_: float 80 Generalized Cross-Validation error 81 82 """ 83 84 # construct the object ----- 85 86 def __init__( 87 self, 88 n_hidden_features=5, 89 activation_name="relu", 90 a=0.01, 91 nodes_sim="sobol", 92 bias=True, 93 dropout=0, 94 direct_link=True, 95 n_clusters=2, 96 cluster_encode=True, 97 type_clust="kmeans", 98 type_scaling=("std", "std", "std"), 99 col_sample=1, 100 row_sample=1, 101 seed=123, 102 backend="cpu", 103 ): 104 super().__init__( 105 n_hidden_features=n_hidden_features, 106 activation_name=activation_name, 107 a=a, 108 nodes_sim=nodes_sim, 109 bias=bias, 110 dropout=dropout, 111 direct_link=direct_link, 112 n_clusters=n_clusters, 113 cluster_encode=cluster_encode, 114 type_clust=type_clust, 115 type_scaling=type_scaling, 116 col_sample=col_sample, 117 row_sample=row_sample, 118 seed=seed, 119 backend=backend, 120 ) 121 122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend) 144 145 self.beta_ = fit_obj["beta_hat"] 146 147 self.GCV_ = fit_obj["GCV"] 148 149 return self 150 151 def predict(self, X, **kwargs): 152 """Predict test data X. 153 154 Parameters: 155 156 X: {array-like}, shape = [n_samples, n_features] 157 Training vectors, where n_samples is the number 158 of samples and n_features is the number of features 159 160 **kwargs: additional parameters to be passed to self.cook_test_set 161 162 Returns: 163 164 model predictions: {array-like} 165 """ 166 167 if len(X.shape) == 1: 168 n_features = X.shape[0] 169 new_X = mo.rbind( 170 X.reshape(1, n_features), 171 np.ones(n_features).reshape(1, n_features), 172 ) 173 174 return ( 175 self.y_mean_ 176 + mo.safe_sparse_dot( 177 a=self.cook_test_set(new_X, **kwargs), 178 b=self.beta_, 179 backend=self.backend, 180 ) 181 )[0] 182 183 return self.y_mean_ + mo.safe_sparse_dot( 184 a=self.cook_test_set(X, **kwargs), 185 b=self.beta_, 186 backend=self.backend, 187 )
Random Vector Functional Link Network regression without shrinkage
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: vector
regression coefficients
GCV_: float
Generalized Cross-Validation error
122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend) 144 145 self.beta_ = fit_obj["beta_hat"] 146 147 self.GCV_ = fit_obj["GCV"] 148 149 return self
Fit BaseRegressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to self.cook_training_set
Returns:
self: object
151 def predict(self, X, **kwargs): 152 """Predict test data X. 153 154 Parameters: 155 156 X: {array-like}, shape = [n_samples, n_features] 157 Training vectors, where n_samples is the number 158 of samples and n_features is the number of features 159 160 **kwargs: additional parameters to be passed to self.cook_test_set 161 162 Returns: 163 164 model predictions: {array-like} 165 """ 166 167 if len(X.shape) == 1: 168 n_features = X.shape[0] 169 new_X = mo.rbind( 170 X.reshape(1, n_features), 171 np.ones(n_features).reshape(1, n_features), 172 ) 173 174 return ( 175 self.y_mean_ 176 + mo.safe_sparse_dot( 177 a=self.cook_test_set(new_X, **kwargs), 178 b=self.beta_, 179 backend=self.backend, 180 ) 181 )[0] 182 183 return self.y_mean_ + mo.safe_sparse_dot( 184 a=self.cook_test_set(X, **kwargs), 185 b=self.beta_, 186 backend=self.backend, 187 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFLRegressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with one prior 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s: float 61 std. dev. of regression parameters in Bayesian Ridge Regression 62 63 sigma: float 64 std. dev. of residuals in Bayesian Ridge Regression 65 66 return_std: boolean 67 if True, uncertainty around predictions is evaluated 68 69 backend: str 70 "cpu" or "gpu" or "tpu" 71 72 Attributes: 73 74 beta_: array-like 75 regression''s coefficients 76 77 Sigma_: array-like 78 covariance of the distribution of fitted parameters 79 80 GCV_: float 81 Generalized cross-validation error 82 83 y_mean_: float 84 average response 85 86 Examples: 87 88 ```python 89 TBD 90 ``` 91 92 """ 93 94 # construct the object ----- 95 96 def __init__( 97 self, 98 n_hidden_features=5, 99 activation_name="relu", 100 a=0.01, 101 nodes_sim="sobol", 102 bias=True, 103 dropout=0, 104 direct_link=True, 105 n_clusters=2, 106 cluster_encode=True, 107 type_clust="kmeans", 108 type_scaling=("std", "std", "std"), 109 seed=123, 110 s=0.1, 111 sigma=0.05, 112 return_std=True, 113 backend="cpu", 114 ): 115 super().__init__( 116 n_hidden_features=n_hidden_features, 117 activation_name=activation_name, 118 a=a, 119 nodes_sim=nodes_sim, 120 bias=bias, 121 dropout=dropout, 122 direct_link=direct_link, 123 n_clusters=n_clusters, 124 cluster_encode=cluster_encode, 125 type_clust=type_clust, 126 type_scaling=type_scaling, 127 seed=seed, 128 backend=backend, 129 ) 130 self.s = s 131 self.sigma = sigma 132 self.beta_ = None 133 self.Sigma_ = None 134 self.GCV_ = None 135 self.return_std = return_std 136 137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self 178 179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with one prior
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s: float
std. dev. of regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self
Fit BayesianRVFLRegressor to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFL2Regressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with two priors 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s1: float 61 std. dev. of init. regression parameters in Bayesian Ridge Regression 62 63 s2: float 64 std. dev. of augmented regression parameters in Bayesian Ridge Regression 65 66 sigma: float 67 std. dev. of residuals in Bayesian Ridge Regression 68 69 return_std: boolean 70 if True, uncertainty around predictions is evaluated 71 72 backend: str 73 "cpu" or "gpu" or "tpu" 74 75 Attributes: 76 77 beta_: array-like 78 regression''s coefficients 79 80 Sigma_: array-like 81 covariance of the distribution of fitted parameters 82 83 GCV_: float 84 Generalized cross-validation error 85 86 y_mean_: float 87 average response 88 89 Examples: 90 91 ```python 92 TBD 93 ``` 94 95 """ 96 97 # construct the object ----- 98 99 def __init__( 100 self, 101 n_hidden_features=5, 102 activation_name="relu", 103 a=0.01, 104 nodes_sim="sobol", 105 bias=True, 106 dropout=0, 107 direct_link=True, 108 n_clusters=0, 109 cluster_encode=True, 110 type_clust="kmeans", 111 type_scaling=("std", "std", "std"), 112 seed=123, 113 s1=0.1, 114 s2=0.1, 115 sigma=0.05, 116 return_std=True, 117 backend="cpu", 118 ): 119 super().__init__( 120 n_hidden_features=n_hidden_features, 121 activation_name=activation_name, 122 a=a, 123 nodes_sim=nodes_sim, 124 bias=bias, 125 dropout=dropout, 126 direct_link=direct_link, 127 n_clusters=n_clusters, 128 cluster_encode=cluster_encode, 129 type_clust=type_clust, 130 type_scaling=type_scaling, 131 seed=seed, 132 backend=backend, 133 ) 134 135 self.s1 = s1 136 self.s2 = s2 137 self.sigma = sigma 138 self.beta_ = None 139 self.Sigma_ = None 140 self.GCV_ = None 141 self.return_std = return_std 142 143 def fit(self, X, y, **kwargs): 144 """Fit BayesianRVFL2Regressor to training data (X, y) 145 146 Parameters: 147 148 X: {array-like}, shape = [n_samples, n_features] 149 Training vectors, where n_samples is the number 150 of samples and n_features is the number of features 151 152 y: array-like, shape = [n_samples] 153 Target values 154 155 **kwargs: additional parameters to be passed to 156 self.cook_training_set 157 158 Returns: 159 160 self: object 161 162 """ 163 164 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 165 166 n, p = X.shape 167 q = self.n_hidden_features 168 169 if self.direct_link == True: 170 r = p + self.n_clusters 171 172 block11 = (self.s1**2) * np.eye(r) 173 block12 = np.zeros((r, q)) 174 block21 = np.zeros((q, r)) 175 block22 = (self.s2**2) * np.eye(q) 176 177 Sigma_prior = mo.rbind( 178 x=mo.cbind(x=block11, y=block12, backend=self.backend), 179 y=mo.cbind(x=block21, y=block22, backend=self.backend), 180 backend=self.backend, 181 ) 182 183 else: 184 Sigma_prior = (self.s2**2) * np.eye(q) 185 186 fit_obj = lmf.beta_Sigma_hat_rvfl2( 187 X=scaled_Z, 188 y=centered_y, 189 Sigma=Sigma_prior, 190 sigma=self.sigma, 191 fit_intercept=False, 192 return_cov=self.return_std, 193 backend=self.backend, 194 ) 195 196 self.beta_ = fit_obj["beta_hat"] 197 198 if self.return_std == True: 199 self.Sigma_ = fit_obj["Sigma_hat"] 200 201 self.GCV_ = fit_obj["GCV"] 202 203 return self 204 205 def predict(self, X, return_std=False, **kwargs): 206 """Predict test data X. 207 208 Parameters: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 return_std: {boolean}, standard dev. is returned or not 215 216 **kwargs: additional parameters to be passed to 217 self.cook_test_set 218 219 Returns: 220 221 model predictions: {array-like} 222 223 """ 224 225 if len(X.shape) == 1: # one observation in the test set only 226 n_features = X.shape[0] 227 new_X = mo.rbind( 228 x=X.reshape(1, n_features), 229 y=np.ones(n_features).reshape(1, n_features), 230 backend=self.backend, 231 ) 232 233 self.return_std = return_std 234 235 if self.return_std == False: 236 if len(X.shape) == 1: 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 self.cook_test_set(new_X, **kwargs), 241 self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 self.cook_test_set(X, **kwargs), 248 self.beta_, 249 backend=self.backend, 250 ) 251 252 else: # confidence interval required for preds? 253 if len(X.shape) == 1: 254 Z = self.cook_test_set(new_X, **kwargs) 255 256 pred_obj = lmf.beta_Sigma_hat_rvfl2( 257 X_star=Z, 258 return_cov=self.return_std, 259 beta_hat_=self.beta_, 260 Sigma_hat_=self.Sigma_, 261 backend=self.backend, 262 ) 263 264 return ( 265 self.y_mean_ + pred_obj["preds"][0], 266 pred_obj["preds_std"][0], 267 ) 268 269 Z = self.cook_test_set(X, **kwargs) 270 271 pred_obj = lmf.beta_Sigma_hat_rvfl2( 272 X_star=Z, 273 return_cov=self.return_std, 274 beta_hat_=self.beta_, 275 Sigma_hat_=self.Sigma_, 276 backend=self.backend, 277 ) 278 279 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with two priors
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s1: float
std. dev. of init. regression parameters in Bayesian Ridge Regression
s2: float
std. dev. of augmented regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
143 def fit(self, X, y, **kwargs): 144 """Fit BayesianRVFL2Regressor to training data (X, y) 145 146 Parameters: 147 148 X: {array-like}, shape = [n_samples, n_features] 149 Training vectors, where n_samples is the number 150 of samples and n_features is the number of features 151 152 y: array-like, shape = [n_samples] 153 Target values 154 155 **kwargs: additional parameters to be passed to 156 self.cook_training_set 157 158 Returns: 159 160 self: object 161 162 """ 163 164 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 165 166 n, p = X.shape 167 q = self.n_hidden_features 168 169 if self.direct_link == True: 170 r = p + self.n_clusters 171 172 block11 = (self.s1**2) * np.eye(r) 173 block12 = np.zeros((r, q)) 174 block21 = np.zeros((q, r)) 175 block22 = (self.s2**2) * np.eye(q) 176 177 Sigma_prior = mo.rbind( 178 x=mo.cbind(x=block11, y=block12, backend=self.backend), 179 y=mo.cbind(x=block21, y=block22, backend=self.backend), 180 backend=self.backend, 181 ) 182 183 else: 184 Sigma_prior = (self.s2**2) * np.eye(q) 185 186 fit_obj = lmf.beta_Sigma_hat_rvfl2( 187 X=scaled_Z, 188 y=centered_y, 189 Sigma=Sigma_prior, 190 sigma=self.sigma, 191 fit_intercept=False, 192 return_cov=self.return_std, 193 backend=self.backend, 194 ) 195 196 self.beta_ = fit_obj["beta_hat"] 197 198 if self.return_std == True: 199 self.Sigma_ = fit_obj["Sigma_hat"] 200 201 self.GCV_ = fit_obj["GCV"] 202 203 return self
Fit BayesianRVFL2Regressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
205 def predict(self, X, return_std=False, **kwargs): 206 """Predict test data X. 207 208 Parameters: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 return_std: {boolean}, standard dev. is returned or not 215 216 **kwargs: additional parameters to be passed to 217 self.cook_test_set 218 219 Returns: 220 221 model predictions: {array-like} 222 223 """ 224 225 if len(X.shape) == 1: # one observation in the test set only 226 n_features = X.shape[0] 227 new_X = mo.rbind( 228 x=X.reshape(1, n_features), 229 y=np.ones(n_features).reshape(1, n_features), 230 backend=self.backend, 231 ) 232 233 self.return_std = return_std 234 235 if self.return_std == False: 236 if len(X.shape) == 1: 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 self.cook_test_set(new_X, **kwargs), 241 self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 self.cook_test_set(X, **kwargs), 248 self.beta_, 249 backend=self.backend, 250 ) 251 252 else: # confidence interval required for preds? 253 if len(X.shape) == 1: 254 Z = self.cook_test_set(new_X, **kwargs) 255 256 pred_obj = lmf.beta_Sigma_hat_rvfl2( 257 X_star=Z, 258 return_cov=self.return_std, 259 beta_hat_=self.beta_, 260 Sigma_hat_=self.Sigma_, 261 backend=self.backend, 262 ) 263 264 return ( 265 self.y_mean_ + pred_obj["preds"][0], 266 pred_obj["preds_std"][0], 267 ) 268 269 Z = self.cook_test_set(X, **kwargs) 270 271 pred_obj = lmf.beta_Sigma_hat_rvfl2( 272 X_star=Z, 273 return_cov=self.return_std, 274 beta_hat_=self.beta_, 275 Sigma_hat_=self.Sigma_, 276 backend=self.backend, 277 ) 278 279 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
42class ClassicalMTS(Base): 43 """Multivariate time series (FactorMTS) forecasting with Factor models 44 45 Parameters: 46 47 model: type of model: str. 48 currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta' 49 50 Attributes: 51 52 df_: data frame 53 the input data frame, in case a data.frame is provided to `fit` 54 55 level_: int 56 level of confidence for prediction intervals (default is 95) 57 58 Examples: 59 See examples/classical_mts_timeseries.py 60 """ 61 62 # construct the object ----- 63 64 def __init__(self, model="VAR"): 65 66 self.model = model 67 if self.model == "VAR": 68 self.obj = VAR 69 elif self.model == "VECM": 70 self.obj = VECM 71 elif self.model == "ARIMA": 72 self.obj = ARIMA 73 elif self.model == "ETS": 74 self.obj = ExponentialSmoothing 75 elif self.model == "Theta": 76 self.obj = ThetaModel 77 else: 78 raise ValueError("model not recognized") 79 self.n_series = None 80 self.replications = None 81 self.mean_ = None 82 self.upper_ = None 83 self.lower_ = None 84 self.output_dates_ = None 85 self.alpha_ = None 86 self.df_ = None 87 self.residuals_ = [] 88 self.sims_ = None 89 self.level_ = None 90 91 def fit(self, X, **kwargs): 92 """Fit FactorMTS model to training data X, with optional regressors xreg 93 94 Parameters: 95 96 X: {array-like}, shape = [n_samples, n_features] 97 Training time series, where n_samples is the number 98 of samples and n_features is the number of features; 99 X must be in increasing order (most recent observations last) 100 101 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 102 103 Returns: 104 105 self: object 106 """ 107 108 try: 109 self.n_series = X.shape[1] 110 except Exception: 111 self.n_series = 1 112 113 if (isinstance(X, pd.DataFrame) is False) and isinstance( 114 X, pd.Series 115 ) is False: # input data set is a numpy array 116 117 X = pd.DataFrame(X) 118 if self.n_series > 1: 119 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 120 else: 121 self.series_names = "series0" 122 123 else: # input data set is a DataFrame or Series with column names 124 125 X_index = None 126 if X.index is not None and len(X.shape) > 1: 127 X_index = X.index 128 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 129 if X_index is not None: 130 try: 131 X.index = X_index 132 except Exception: 133 pass 134 if isinstance(X, pd.DataFrame): 135 self.series_names = X.columns.tolist() 136 else: 137 self.series_names = X.name 138 139 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 140 self.df_ = X 141 X = X.values 142 self.df_.columns = self.series_names 143 self.input_dates = ts.compute_input_dates(self.df_) 144 else: 145 self.df_ = pd.DataFrame(X, columns=self.series_names) 146 147 if self.model == "Theta": 148 self.obj = self.obj(self.df_, **kwargs).fit() 149 else: 150 self.obj = self.obj(X, **kwargs).fit(**kwargs) 151 152 return self 153 154 def predict(self, h=5, level=95, **kwargs): 155 """Forecast all the time series, h steps ahead 156 157 Parameters: 158 159 h: {integer} 160 Forecasting horizon 161 162 **kwargs: additional parameters to be passed to 163 self.cook_test_set 164 165 Returns: 166 167 model predictions for horizon = h: {array-like} 168 169 """ 170 171 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 172 173 self.level_ = level 174 175 self.lower_ = None # do not remove (/!\) 176 177 self.upper_ = None # do not remove (/!\) 178 179 self.sims_ = None # do not remove (/!\) 180 181 self.level_ = level 182 183 self.alpha_ = 100 - level 184 185 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 186 187 # Named tuple for forecast results 188 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 189 190 if self.model == "VAR": 191 mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval( 192 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 193 ) 194 195 elif self.model == "VECM": 196 forecast_result = self.obj.predict(steps=h) 197 mean_forecast = forecast_result 198 lower_bound, upper_bound = self._compute_confidence_intervals( 199 forecast_result, alpha=self.alpha_ / 100, **kwargs 200 ) 201 202 elif self.model == "ARIMA": 203 forecast_result = self.obj.get_forecast(steps=h) 204 mean_forecast = forecast_result.predicted_mean 205 lower_bound = forecast_result.conf_int()[:, 0] 206 upper_bound = forecast_result.conf_int()[:, 1] 207 208 elif self.model == "ETS": 209 forecast_result = self.obj.forecast(steps=h) 210 residuals = self.obj.resid 211 std_errors = np.std(residuals) 212 mean_forecast = forecast_result 213 lower_bound = forecast_result - pi_multiplier * std_errors 214 upper_bound = forecast_result + pi_multiplier * std_errors 215 216 elif self.model == "Theta": 217 try: 218 mean_forecast = self.obj.forecast(steps=h).values 219 forecast_result = self.obj.prediction_intervals( 220 steps=h, alpha=self.alpha_ / 100, **kwargs 221 ) 222 lower_bound = forecast_result["lower"].values 223 upper_bound = forecast_result["upper"].values 224 except Exception: 225 mean_forecast = self.obj.forecast(steps=h) 226 forecast_result = self.obj.prediction_intervals( 227 steps=h, alpha=self.alpha_ / 100, **kwargs 228 ) 229 lower_bound = forecast_result["lower"] 230 upper_bound = forecast_result["upper"] 231 232 else: 233 234 raise ValueError("model not recognized") 235 236 try: 237 self.mean_ = pd.DataFrame( 238 mean_forecast, 239 columns=self.series_names, 240 index=self.output_dates_, 241 ) 242 self.lower_ = pd.DataFrame( 243 lower_bound, columns=self.series_names, index=self.output_dates_ 244 ) 245 self.upper_ = pd.DataFrame( 246 upper_bound, columns=self.series_names, index=self.output_dates_ 247 ) 248 except Exception: 249 self.mean_ = pd.Series( 250 mean_forecast, name=self.series_names, index=self.output_dates_ 251 ) 252 self.lower_ = pd.Series( 253 lower_bound, name=self.series_names, index=self.output_dates_ 254 ) 255 self.upper_ = pd.Series( 256 upper_bound, name=self.series_names, index=self.output_dates_ 257 ) 258 259 return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_) 260 261 def _compute_confidence_intervals(self, forecast_result, alpha): 262 """ 263 Compute confidence intervals for VECM forecasts. 264 Uses the covariance of residuals to approximate the confidence intervals. 265 """ 266 residuals = self.obj.resid 267 cov_matrix = np.cov(residuals.T) # Covariance matrix of residuals 268 std_errors = np.sqrt(np.diag(cov_matrix)) # Standard errors 269 270 z_value = norm.ppf(1 - alpha / 2) # Z-score for the given alpha level 271 lower_bound = forecast_result - z_value * std_errors 272 upper_bound = forecast_result + z_value * std_errors 273 274 return lower_bound, upper_bound 275 276 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 277 """Train on training_index, score on testing_index.""" 278 279 assert ( 280 bool(set(training_index).intersection(set(testing_index))) == False 281 ), "Non-overlapping 'training_index' and 'testing_index' required" 282 283 # Dimensions 284 try: 285 # multivariate time series 286 n, p = X.shape 287 except: 288 # univariate time series 289 n = X.shape[0] 290 p = 1 291 292 # Training and testing sets 293 if p > 1: 294 X_train = X[training_index, :] 295 X_test = X[testing_index, :] 296 else: 297 X_train = X[training_index] 298 X_test = X[testing_index] 299 300 # Horizon 301 h = len(testing_index) 302 assert ( 303 len(training_index) + h 304 ) <= n, "Please check lengths of training and testing windows" 305 306 # Fit and predict 307 self.fit(X_train, **kwargs) 308 preds = self.predict(h=h, **kwargs) 309 310 if scoring is None: 311 scoring = "neg_root_mean_squared_error" 312 313 # check inputs 314 assert scoring in ( 315 "explained_variance", 316 "neg_mean_absolute_error", 317 "neg_mean_squared_error", 318 "neg_root_mean_squared_error", 319 "neg_mean_squared_log_error", 320 "neg_median_absolute_error", 321 "r2", 322 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 323 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 324 'neg_median_absolute_error', 'r2')" 325 326 scoring_options = { 327 "explained_variance": skm2.explained_variance_score, 328 "neg_mean_absolute_error": skm2.mean_absolute_error, 329 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 330 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 331 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 332 "neg_median_absolute_error": skm2.median_absolute_error, 333 "r2": skm2.r2_score, 334 } 335 336 # if p > 1: 337 # return tuple( 338 # [ 339 # scoring_options[scoring]( 340 # X_test[:, i], preds[:, i]#, **kwargs 341 # ) 342 # for i in range(p) 343 # ] 344 # ) 345 # else: 346 return scoring_options[scoring](X_test, preds) 347 348 def plot(self, series=None, type_axis="dates", type_plot="pi"): 349 """Plot time series forecast 350 351 Parameters: 352 353 series: {integer} or {string} 354 series index or name 355 356 """ 357 358 assert all( 359 [ 360 self.mean_ is not None, 361 self.lower_ is not None, 362 self.upper_ is not None, 363 self.output_dates_ is not None, 364 ] 365 ), "model forecasting must be obtained first (with predict)" 366 367 if series is None: 368 assert ( 369 self.n_series == 1 370 ), "please specify series index or name (n_series > 1)" 371 series = 0 372 373 if isinstance(series, str): 374 assert ( 375 series in self.series_names 376 ), f"series {series} doesn't exist in the input dataset" 377 series_idx = self.df_.columns.get_loc(series) 378 else: 379 assert isinstance(series, int) and ( 380 0 <= series < self.n_series 381 ), f"check series index (< {self.n_series})" 382 series_idx = series 383 384 if isinstance(self.df_, pd.DataFrame): 385 y_all = list(self.df_.iloc[:, series_idx]) + list( 386 self.mean_.iloc[:, series_idx] 387 ) 388 y_test = list(self.mean_.iloc[:, series_idx]) 389 else: 390 y_all = list(self.df_.values) + list(self.mean_.values) 391 y_test = list(self.mean_.values) 392 n_points_all = len(y_all) 393 n_points_train = self.df_.shape[0] 394 395 if type_axis == "numeric": 396 x_all = [i for i in range(n_points_all)] 397 x_test = [i for i in range(n_points_train, n_points_all)] 398 399 if type_axis == "dates": # use dates 400 x_all = np.concatenate( 401 (self.input_dates.values, self.output_dates_.values), axis=None 402 ) 403 x_test = self.output_dates_.values 404 405 if type_plot == "pi": 406 fig, ax = plt.subplots() 407 ax.plot(x_all, y_all, "-") 408 ax.plot(x_test, y_test, "-", color="orange") 409 try: 410 ax.fill_between( 411 x_test, 412 self.lower_.iloc[:, series_idx], 413 self.upper_.iloc[:, series_idx], 414 alpha=0.2, 415 color="orange", 416 ) 417 except Exception: 418 ax.fill_between( 419 x_test, 420 self.lower_.values, 421 self.upper_.values, 422 alpha=0.2, 423 color="orange", 424 ) 425 if self.replications is None: 426 if self.n_series > 1: 427 plt.title( 428 f"prediction intervals for {series}", 429 loc="left", 430 fontsize=12, 431 fontweight=0, 432 color="black", 433 ) 434 else: 435 plt.title( 436 f"prediction intervals for input time series", 437 loc="left", 438 fontsize=12, 439 fontweight=0, 440 color="black", 441 ) 442 plt.show() 443 else: # self.replications is not None 444 if self.n_series > 1: 445 plt.title( 446 f"prediction intervals for {self.replications} simulations of {series}", 447 loc="left", 448 fontsize=12, 449 fontweight=0, 450 color="black", 451 ) 452 else: 453 plt.title( 454 f"prediction intervals for {self.replications} simulations of input time series", 455 loc="left", 456 fontsize=12, 457 fontweight=0, 458 color="black", 459 ) 460 plt.show() 461 462 if type_plot == "spaghetti": 463 palette = plt.get_cmap("Set1") 464 sims_ix = getsims(self.sims_, series_idx) 465 plt.plot(x_all, y_all, "-") 466 for col_ix in range( 467 sims_ix.shape[1] 468 ): # avoid this when there are thousands of simulations 469 plt.plot( 470 x_test, 471 sims_ix[:, col_ix], 472 "-", 473 color=palette(col_ix), 474 linewidth=1, 475 alpha=0.9, 476 ) 477 plt.plot(x_all, y_all, "-", color="black") 478 plt.plot(x_test, y_test, "-", color="blue") 479 # Add titles 480 if self.n_series > 1: 481 plt.title( 482 f"{self.replications} simulations of {series}", 483 loc="left", 484 fontsize=12, 485 fontweight=0, 486 color="black", 487 ) 488 else: 489 plt.title( 490 f"{self.replications} simulations of input time series", 491 loc="left", 492 fontsize=12, 493 fontweight=0, 494 color="black", 495 ) 496 plt.xlabel("Time") 497 plt.ylabel("Values") 498 # Show the graph 499 plt.show() 500 501 def cross_val_score( 502 self, 503 X, 504 scoring="root_mean_squared_error", 505 n_jobs=None, 506 verbose=0, 507 xreg=None, 508 initial_window=5, 509 horizon=3, 510 fixed_window=False, 511 show_progress=True, 512 level=95, 513 **kwargs, 514 ): 515 """Evaluate a score by time series cross-validation. 516 517 Parameters: 518 519 X: {array-like, sparse matrix} of shape (n_samples, n_features) 520 The data to fit. 521 522 scoring: str or a function 523 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 524 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 525 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 526 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 527 528 n_jobs: int, default=None 529 Number of jobs to run in parallel. 530 531 verbose: int, default=0 532 The verbosity level. 533 534 xreg: array-like, optional (default=None) 535 Additional (external) regressors to be passed to `fit` 536 xreg must be in 'increasing' order (most recent observations last) 537 538 initial_window: int 539 initial number of consecutive values in each training set sample 540 541 horizon: int 542 number of consecutive values in test set sample 543 544 fixed_window: boolean 545 if False, all training samples start at index 0, and the training 546 window's size is increasing. 547 if True, the training window's size is fixed, and the window is 548 rolling forward 549 550 show_progress: boolean 551 if True, a progress bar is printed 552 553 **kwargs: dict 554 additional parameters to be passed to `fit` and `predict` 555 556 Returns: 557 558 A tuple: descriptive statistics or errors and raw errors 559 560 """ 561 tscv = TimeSeriesSplit() 562 563 tscv_obj = tscv.split( 564 X, 565 initial_window=initial_window, 566 horizon=horizon, 567 fixed_window=fixed_window, 568 ) 569 570 if isinstance(scoring, str): 571 572 assert scoring in ( 573 "root_mean_squared_error", 574 "mean_squared_error", 575 "mean_error", 576 "mean_absolute_error", 577 "mean_percentage_error", 578 "mean_absolute_percentage_error", 579 "winkler_score", 580 "coverage", 581 ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 582 583 def err_func(X_test, X_pred, scoring): 584 if (self.replications is not None) or ( 585 self.type_pi == "gaussian" 586 ): # probabilistic 587 if scoring == "winkler_score": 588 return winkler_score(X_pred, X_test, level=level) 589 elif scoring == "coverage": 590 return coverage(X_pred, X_test, level=level) 591 else: 592 return mean_errors( 593 pred=X_pred.mean, actual=X_test, scoring=scoring 594 ) 595 else: # not probabilistic 596 return mean_errors(pred=X_pred, actual=X_test, scoring=scoring) 597 598 else: # isinstance(scoring, str) = False 599 600 err_func = scoring 601 602 errors = [] 603 604 train_indices = [] 605 606 test_indices = [] 607 608 for train_index, test_index in tscv_obj: 609 train_indices.append(train_index) 610 test_indices.append(test_index) 611 612 if show_progress is True: 613 iterator = tqdm(zip(train_indices, test_indices), total=len(train_indices)) 614 else: 615 iterator = zip(train_indices, test_indices) 616 617 for train_index, test_index in iterator: 618 619 if verbose == 1: 620 print(f"TRAIN: {train_index}") 621 print(f"TEST: {test_index}") 622 623 if isinstance(X, pd.DataFrame): 624 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 625 X_test = X.iloc[test_index, :] 626 else: 627 self.fit(X[train_index, :], xreg=xreg, **kwargs) 628 X_test = X[test_index, :] 629 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 630 631 errors.append(err_func(X_test, X_pred, scoring)) 632 633 res = np.asarray(errors) 634 635 return res, describe(res)
Multivariate time series (FactorMTS) forecasting with Factor models
Parameters:
model: type of model: str.
currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
Attributes:
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
level_: int
level of confidence for prediction intervals (default is 95)
Examples: See examples/classical_mts_timeseries.py
91 def fit(self, X, **kwargs): 92 """Fit FactorMTS model to training data X, with optional regressors xreg 93 94 Parameters: 95 96 X: {array-like}, shape = [n_samples, n_features] 97 Training time series, where n_samples is the number 98 of samples and n_features is the number of features; 99 X must be in increasing order (most recent observations last) 100 101 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 102 103 Returns: 104 105 self: object 106 """ 107 108 try: 109 self.n_series = X.shape[1] 110 except Exception: 111 self.n_series = 1 112 113 if (isinstance(X, pd.DataFrame) is False) and isinstance( 114 X, pd.Series 115 ) is False: # input data set is a numpy array 116 117 X = pd.DataFrame(X) 118 if self.n_series > 1: 119 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 120 else: 121 self.series_names = "series0" 122 123 else: # input data set is a DataFrame or Series with column names 124 125 X_index = None 126 if X.index is not None and len(X.shape) > 1: 127 X_index = X.index 128 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 129 if X_index is not None: 130 try: 131 X.index = X_index 132 except Exception: 133 pass 134 if isinstance(X, pd.DataFrame): 135 self.series_names = X.columns.tolist() 136 else: 137 self.series_names = X.name 138 139 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 140 self.df_ = X 141 X = X.values 142 self.df_.columns = self.series_names 143 self.input_dates = ts.compute_input_dates(self.df_) 144 else: 145 self.df_ = pd.DataFrame(X, columns=self.series_names) 146 147 if self.model == "Theta": 148 self.obj = self.obj(self.df_, **kwargs).fit() 149 else: 150 self.obj = self.obj(X, **kwargs).fit(**kwargs) 151 152 return self
Fit FactorMTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
154 def predict(self, h=5, level=95, **kwargs): 155 """Forecast all the time series, h steps ahead 156 157 Parameters: 158 159 h: {integer} 160 Forecasting horizon 161 162 **kwargs: additional parameters to be passed to 163 self.cook_test_set 164 165 Returns: 166 167 model predictions for horizon = h: {array-like} 168 169 """ 170 171 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 172 173 self.level_ = level 174 175 self.lower_ = None # do not remove (/!\) 176 177 self.upper_ = None # do not remove (/!\) 178 179 self.sims_ = None # do not remove (/!\) 180 181 self.level_ = level 182 183 self.alpha_ = 100 - level 184 185 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 186 187 # Named tuple for forecast results 188 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 189 190 if self.model == "VAR": 191 mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval( 192 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 193 ) 194 195 elif self.model == "VECM": 196 forecast_result = self.obj.predict(steps=h) 197 mean_forecast = forecast_result 198 lower_bound, upper_bound = self._compute_confidence_intervals( 199 forecast_result, alpha=self.alpha_ / 100, **kwargs 200 ) 201 202 elif self.model == "ARIMA": 203 forecast_result = self.obj.get_forecast(steps=h) 204 mean_forecast = forecast_result.predicted_mean 205 lower_bound = forecast_result.conf_int()[:, 0] 206 upper_bound = forecast_result.conf_int()[:, 1] 207 208 elif self.model == "ETS": 209 forecast_result = self.obj.forecast(steps=h) 210 residuals = self.obj.resid 211 std_errors = np.std(residuals) 212 mean_forecast = forecast_result 213 lower_bound = forecast_result - pi_multiplier * std_errors 214 upper_bound = forecast_result + pi_multiplier * std_errors 215 216 elif self.model == "Theta": 217 try: 218 mean_forecast = self.obj.forecast(steps=h).values 219 forecast_result = self.obj.prediction_intervals( 220 steps=h, alpha=self.alpha_ / 100, **kwargs 221 ) 222 lower_bound = forecast_result["lower"].values 223 upper_bound = forecast_result["upper"].values 224 except Exception: 225 mean_forecast = self.obj.forecast(steps=h) 226 forecast_result = self.obj.prediction_intervals( 227 steps=h, alpha=self.alpha_ / 100, **kwargs 228 ) 229 lower_bound = forecast_result["lower"] 230 upper_bound = forecast_result["upper"] 231 232 else: 233 234 raise ValueError("model not recognized") 235 236 try: 237 self.mean_ = pd.DataFrame( 238 mean_forecast, 239 columns=self.series_names, 240 index=self.output_dates_, 241 ) 242 self.lower_ = pd.DataFrame( 243 lower_bound, columns=self.series_names, index=self.output_dates_ 244 ) 245 self.upper_ = pd.DataFrame( 246 upper_bound, columns=self.series_names, index=self.output_dates_ 247 ) 248 except Exception: 249 self.mean_ = pd.Series( 250 mean_forecast, name=self.series_names, index=self.output_dates_ 251 ) 252 self.lower_ = pd.Series( 253 lower_bound, name=self.series_names, index=self.output_dates_ 254 ) 255 self.upper_ = pd.Series( 256 upper_bound, name=self.series_names, index=self.output_dates_ 257 ) 258 259 return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_)
Forecast all the time series, h steps ahead
Parameters:
h: {integer} Forecasting horizon
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions for horizon = h: {array-like}
276 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 277 """Train on training_index, score on testing_index.""" 278 279 assert ( 280 bool(set(training_index).intersection(set(testing_index))) == False 281 ), "Non-overlapping 'training_index' and 'testing_index' required" 282 283 # Dimensions 284 try: 285 # multivariate time series 286 n, p = X.shape 287 except: 288 # univariate time series 289 n = X.shape[0] 290 p = 1 291 292 # Training and testing sets 293 if p > 1: 294 X_train = X[training_index, :] 295 X_test = X[testing_index, :] 296 else: 297 X_train = X[training_index] 298 X_test = X[testing_index] 299 300 # Horizon 301 h = len(testing_index) 302 assert ( 303 len(training_index) + h 304 ) <= n, "Please check lengths of training and testing windows" 305 306 # Fit and predict 307 self.fit(X_train, **kwargs) 308 preds = self.predict(h=h, **kwargs) 309 310 if scoring is None: 311 scoring = "neg_root_mean_squared_error" 312 313 # check inputs 314 assert scoring in ( 315 "explained_variance", 316 "neg_mean_absolute_error", 317 "neg_mean_squared_error", 318 "neg_root_mean_squared_error", 319 "neg_mean_squared_log_error", 320 "neg_median_absolute_error", 321 "r2", 322 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 323 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 324 'neg_median_absolute_error', 'r2')" 325 326 scoring_options = { 327 "explained_variance": skm2.explained_variance_score, 328 "neg_mean_absolute_error": skm2.mean_absolute_error, 329 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 330 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 331 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 332 "neg_median_absolute_error": skm2.median_absolute_error, 333 "r2": skm2.r2_score, 334 } 335 336 # if p > 1: 337 # return tuple( 338 # [ 339 # scoring_options[scoring]( 340 # X_test[:, i], preds[:, i]#, **kwargs 341 # ) 342 # for i in range(p) 343 # ] 344 # ) 345 # else: 346 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
15class CustomClassifier(Custom, ClassifierMixin): 16 """Custom Classification model 17 18 Attributes: 19 20 obj: object 21 any object containing a method fit (obj.fit()) and a method predict 22 (obj.predict()) 23 24 n_hidden_features: int 25 number of nodes in the hidden layer 26 27 activation_name: str 28 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 29 30 a: float 31 hyperparameter for 'prelu' or 'elu' activation function 32 33 nodes_sim: str 34 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 35 'uniform' 36 37 bias: boolean 38 indicates if the hidden layer contains a bias term (True) or not 39 (False) 40 41 dropout: float 42 regularization parameter; (random) percentage of nodes dropped out 43 of the training 44 45 direct_link: boolean 46 indicates if the original predictors are included (True) in model''s 47 fitting or not (False) 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 col_sample: float 67 percentage of covariates randomly chosen for training 68 69 row_sample: float 70 percentage of rows chosen for training, by stratified bootstrapping 71 72 level: float 73 confidence level for prediction sets. Default is None. 74 75 pi_method: str 76 method for constructing the prediction sets: 'icp', 'tcp' if level is not None. Default is 'icp'. 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Examples: 85 86 Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly 87 88 ```python 89 import nnetsauce as ns 90 from sklearn.ensemble import RandomForestClassifier 91 from sklearn.model_selection import train_test_split 92 from sklearn.datasets import load_digits 93 from time import time 94 95 digits = load_digits() 96 X = digits.data 97 y = digits.target 98 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 99 random_state=123) 100 101 # layer 1 (base layer) ---- 102 layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123) 103 104 start = time() 105 106 layer1_regr.fit(X_train, y_train) 107 108 # Accuracy in layer 1 109 print(layer1_regr.score(X_test, y_test)) 110 111 # layer 2 using layer 1 ---- 112 layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5, 113 direct_link=True, bias=True, 114 nodes_sim='uniform', activation_name='relu', 115 n_clusters=2, seed=123) 116 layer2_regr.fit(X_train, y_train) 117 118 # Accuracy in layer 2 119 print(layer2_regr.score(X_test, y_test)) 120 121 # layer 3 using layer 2 ---- 122 layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10, 123 direct_link=True, bias=True, dropout=0.7, 124 nodes_sim='uniform', activation_name='relu', 125 n_clusters=2, seed=123) 126 layer3_regr.fit(X_train, y_train) 127 128 # Accuracy in layer 3 129 print(layer3_regr.score(X_test, y_test)) 130 131 print(f"Elapsed {time() - start}") 132 ``` 133 134 """ 135 136 # construct the object ----- 137 138 def __init__( 139 self, 140 obj, 141 n_hidden_features=5, 142 activation_name="relu", 143 a=0.01, 144 nodes_sim="sobol", 145 bias=True, 146 dropout=0, 147 direct_link=True, 148 n_clusters=2, 149 cluster_encode=True, 150 type_clust="kmeans", 151 type_scaling=("std", "std", "std"), 152 col_sample=1, 153 row_sample=1, 154 level=None, 155 pi_method="icp", 156 seed=123, 157 backend="cpu", 158 ): 159 super().__init__( 160 obj=obj, 161 n_hidden_features=n_hidden_features, 162 activation_name=activation_name, 163 a=a, 164 nodes_sim=nodes_sim, 165 bias=bias, 166 dropout=dropout, 167 direct_link=direct_link, 168 n_clusters=n_clusters, 169 cluster_encode=cluster_encode, 170 type_clust=type_clust, 171 type_scaling=type_scaling, 172 col_sample=col_sample, 173 row_sample=row_sample, 174 seed=seed, 175 backend=backend, 176 ) 177 self.level = level 178 self.pi_method = pi_method 179 self.coef_ = None 180 self.intercept_ = None 181 self.type_fit = "classification" 182 if self.level is not None: 183 self.obj = PredictionSet(self.obj, level=self.level, method=self.pi_method) 184 185 def fit(self, X, y, sample_weight=None, **kwargs): 186 """Fit custom model to training data (X, y). 187 188 Parameters: 189 190 X: {array-like}, shape = [n_samples, n_features] 191 Training vectors, where n_samples is the number 192 of samples and n_features is the number of features. 193 194 y: array-like, shape = [n_samples] 195 Target values. 196 197 sample_weight: array-like, shape = [n_samples] 198 Sample weights. 199 200 **kwargs: additional parameters to be passed to 201 self.cook_training_set or self.obj.fit 202 203 Returns: 204 205 self: object 206 """ 207 208 if len(X.shape) == 1: 209 if isinstance(X, pd.DataFrame): 210 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 211 else: 212 X = X.reshape(1, -1) 213 214 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 215 self.classes_ = np.unique(y) 216 self.n_classes_ = len(self.classes_) # for compatibility with 217 218 if self.level is not None: 219 self.obj = PredictionSet( 220 obj=self.obj, method=self.pi_method, level=self.level 221 ) 222 223 # if sample_weights, else: (must use self.row_index) 224 if sample_weight is not None: 225 self.obj.fit( 226 scaled_Z, 227 output_y, 228 sample_weight=sample_weight[self.index_row_].ravel(), 229 # **kwargs 230 ) 231 232 return self 233 234 # if sample_weight is None: 235 self.obj.fit(scaled_Z, output_y) 236 self.classes_ = np.unique(y) # for compatibility with sklearn 237 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 238 239 if hasattr(self.obj, "coef_"): 240 self.coef_ = self.obj.coef_ 241 242 if hasattr(self.obj, "intercept_"): 243 self.intercept_ = self.obj.intercept_ 244 245 return self 246 247 def partial_fit(self, X, y, sample_weight=None, **kwargs): 248 """Partial fit custom model to training data (X, y). 249 250 Parameters: 251 252 X: {array-like}, shape = [n_samples, n_features] 253 Subset of training vectors, where n_samples is the number 254 of samples and n_features is the number of features. 255 256 y: array-like, shape = [n_samples] 257 Subset of target values. 258 259 sample_weight: array-like, shape = [n_samples] 260 Sample weights. 261 262 **kwargs: additional parameters to be passed to 263 self.cook_training_set or self.obj.fit 264 265 Returns: 266 267 self: object 268 """ 269 270 if len(X.shape) == 1: 271 if isinstance(X, pd.DataFrame): 272 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 273 else: 274 X = X.reshape(1, -1) 275 y = np.array([y], dtype=np.integer) 276 277 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 278 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 279 280 # if sample_weights, else: (must use self.row_index) 281 if sample_weight is not None: 282 try: 283 self.obj.partial_fit( 284 scaled_Z, 285 output_y, 286 sample_weight=sample_weight[self.index_row_].ravel(), 287 # **kwargs 288 ) 289 except: 290 NotImplementedError