nnetsauce

 1from .base.base import Base
 2from .base.baseRegressor import BaseRegressor
 3from .boosting.adaBoostClassifier import AdaBoostClassifier
 4from .custom.customClassifier import CustomClassifier
 5from .custom.customRegressor import CustomRegressor
 6from .datasets import Downloader
 7from .deep.deepClassifier import DeepClassifier
 8from .deep.deepRegressor import DeepRegressor
 9from .deep.deepMTS import DeepMTS
10from .glm.glmClassifier import GLMClassifier
11from .glm.glmRegressor import GLMRegressor
12from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier
13from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor
14from .lazypredict.lazydeepClassifier import LazyDeepClassifier
15from .lazypredict.lazydeepRegressor import LazyDeepRegressor
16from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS
17from .mts.mts import MTS
18from .mts.classical import ClassicalMTS
19from .multitask.multitaskClassifier import MultitaskClassifier
20from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier
21from .neuralnet.neuralnetregression import NeuralNetRegressor
22from .neuralnet.neuralnetclassification import NeuralNetClassifier
23from .optimizers.optimizer import Optimizer
24from .predictioninterval import PredictionInterval
25from .quantile.quantileregression import QuantileRegressor
26from .quantile.quantileclassification import QuantileClassifier
27from .randombag.randomBagClassifier import RandomBagClassifier
28from .randombag.randomBagRegressor import RandomBagRegressor
29from .ridge2.ridge2Classifier import Ridge2Classifier
30from .ridge2.ridge2Regressor import Ridge2Regressor
31from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier
32from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor
33from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor
34from .sampling import SubSampler
35from .updater import RegressorUpdater, ClassifierUpdater
36from .votingregressor import MedianVotingRegressor
37
38__all__ = [
39    "AdaBoostClassifier",
40    "Base",
41    "BaseRegressor",
42    "BayesianRVFLRegressor",
43    "BayesianRVFL2Regressor",
44    "ClassicalMTS",
45    "CustomClassifier",
46    "CustomRegressor",
47    "DeepClassifier",
48    "DeepRegressor",
49    "DeepMTS",
50    "Downloader",
51    "GLMClassifier",
52    "GLMRegressor",
53    "LazyClassifier",
54    "LazyRegressor",
55    "LazyDeepClassifier",
56    "LazyDeepRegressor",
57    "LazyMTS",
58    "LazyDeepMTS",
59    "MedianVotingRegressor",
60    "MTS",
61    "MultitaskClassifier",
62    "NeuralNetRegressor",
63    "NeuralNetClassifier",
64    "PredictionInterval",
65    "SimpleMultitaskClassifier",
66    "Optimizer",
67    "QuantileRegressor",
68    "QuantileClassifier",
69    "RandomBagRegressor",
70    "RandomBagClassifier",
71    "RegressorUpdater",
72    "ClassifierUpdater",
73    "Ridge2Regressor",
74    "Ridge2Classifier",
75    "Ridge2MultitaskClassifier",
76    "SubSampler",
77]
class AdaBoostClassifier(nnetsauce.boosting.bst.Boosting, sklearn.base.ClassifierMixin):
 21class AdaBoostClassifier(Boosting, ClassifierMixin):
 22    """AdaBoost Classification (SAMME) model class derived from class Boosting
 23
 24    Parameters:
 25
 26        obj: object
 27            any object containing a method fit (obj.fit()) and a method predict
 28            (obj.predict())
 29
 30        n_estimators: int
 31            number of boosting iterations
 32
 33        learning_rate: float
 34            learning rate of the boosting procedure
 35
 36        n_hidden_features: int
 37            number of nodes in the hidden layer
 38
 39        reg_lambda: float
 40            regularization parameter for weights
 41
 42        reg_alpha: float
 43            controls compromize between l1 and l2 norm of weights
 44
 45        activation_name: str
 46            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 47
 48        a: float
 49            hyperparameter for 'prelu' or 'elu' activation function
 50
 51        nodes_sim: str
 52            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 53            'uniform'
 54
 55        bias: boolean
 56            indicates if the hidden layer contains a bias term (True) or not
 57            (False)
 58
 59        dropout: float
 60            regularization parameter; (random) percentage of nodes dropped out
 61            of the training
 62
 63        direct_link: boolean
 64            indicates if the original predictors are included (True) in model's
 65            fitting or not (False)
 66
 67        n_clusters: int
 68            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 69                no clustering)
 70
 71        cluster_encode: bool
 72            defines how the variable containing clusters is treated (default is one-hot)
 73            if `False`, then labels are used, without one-hot encoding
 74
 75        type_clust: str
 76            type of clustering method: currently k-means ('kmeans') or Gaussian
 77            Mixture Model ('gmm')
 78
 79        type_scaling: a tuple of 3 strings
 80            scaling methods for inputs, hidden layer, and clustering respectively
 81            (and when relevant).
 82            Currently available: standardization ('std') or MinMax scaling ('minmax')
 83
 84        col_sample: float
 85            percentage of covariates randomly chosen for training
 86
 87        row_sample: float
 88            percentage of rows chosen for training, by stratified bootstrapping
 89
 90        seed: int
 91            reproducibility seed for nodes_sim=='uniform'
 92
 93        verbose: int
 94            0 for no output, 1 for a progress bar (default is 1)
 95
 96        method: str
 97            type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
 98
 99        backend: str
100            "cpu" or "gpu" or "tpu"
101
102    Attributes:
103
104        alpha_: list
105            AdaBoost coefficients alpha_m
106
107        base_learners_: dict
108            a dictionary containing the base learners
109
110    Examples:
111
112    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py)
113
114    ```python
115    import nnetsauce as ns
116    import numpy as np
117    from sklearn.datasets import load_breast_cancer
118    from sklearn.linear_model import LogisticRegression
119    from sklearn.model_selection import train_test_split
120    from sklearn import metrics
121    from time import time
122
123    breast_cancer = load_breast_cancer()
124    Z = breast_cancer.data
125    t = breast_cancer.target
126    np.random.seed(123)
127    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
128
129    # SAMME.R
130    clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
131                            random_state=123)
132    fit_obj = ns.AdaBoostClassifier(clf,
133                                    n_hidden_features=int(11.22338867),
134                                    direct_link=True,
135                                    n_estimators=250, learning_rate=0.01126343,
136                                    col_sample=0.72684326, row_sample=0.86429443,
137                                    dropout=0.63078613, n_clusters=2,
138                                    type_clust="gmm",
139                                    verbose=1, seed = 123,
140                                    method="SAMME.R")
141
142    start = time()
143    fit_obj.fit(X_train, y_train)
144    print(f"Elapsed {time() - start}")
145
146    start = time()
147    print(fit_obj.score(X_test, y_test))
148    print(f"Elapsed {time() - start}")
149
150    preds = fit_obj.predict(X_test)
151
152    print(metrics.classification_report(preds, y_test))
153
154    ```
155
156    """
157
158    # construct the object -----
159
160    def __init__(
161        self,
162        obj,
163        n_estimators=10,
164        learning_rate=0.1,
165        n_hidden_features=1,
166        reg_lambda=0,
167        reg_alpha=0.5,
168        activation_name="relu",
169        a=0.01,
170        nodes_sim="sobol",
171        bias=True,
172        dropout=0,
173        direct_link=False,
174        n_clusters=2,
175        cluster_encode=True,
176        type_clust="kmeans",
177        type_scaling=("std", "std", "std"),
178        col_sample=1,
179        row_sample=1,
180        seed=123,
181        verbose=1,
182        method="SAMME",
183        backend="cpu",
184    ):
185        self.type_fit = "classification"
186        self.verbose = verbose
187        self.method = method
188        self.reg_lambda = reg_lambda
189        self.reg_alpha = reg_alpha
190
191        super().__init__(
192            obj=obj,
193            n_estimators=n_estimators,
194            learning_rate=learning_rate,
195            n_hidden_features=n_hidden_features,
196            activation_name=activation_name,
197            a=a,
198            nodes_sim=nodes_sim,
199            bias=bias,
200            dropout=dropout,
201            direct_link=direct_link,
202            n_clusters=n_clusters,
203            cluster_encode=cluster_encode,
204            type_clust=type_clust,
205            type_scaling=type_scaling,
206            col_sample=col_sample,
207            row_sample=row_sample,
208            seed=seed,
209            backend=backend,
210        )
211
212        self.alpha_ = []
213        self.base_learners_ = dict.fromkeys(range(n_estimators))
214
215    def fit(self, X, y, sample_weight=None, **kwargs):
216        """Fit Boosting model to training data (X, y).
217
218        Parameters:
219
220            X: {array-like}, shape = [n_samples, n_features]
221                Training vectors, where n_samples is the number
222                of samples and n_features is the number of features.
223
224            y: array-like, shape = [n_samples]
225                Target values.
226
227            **kwargs: additional parameters to be passed to
228                    self.cook_training_set or self.obj.fit
229
230        Returns:
231
232             self: object
233        """
234
235        assert mx.is_factor(y), "y must contain only integers"
236
237        assert self.method in (
238            "SAMME",
239            "SAMME.R",
240        ), "`method` must be either 'SAMME' or 'SAMME.R'"
241
242        assert (self.reg_lambda <= 1) & (
243            self.reg_lambda >= 0
244        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
245
246        assert (self.reg_alpha <= 1) & (
247            self.reg_alpha >= 0
248        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
249
250        # training
251        n, p = X.shape
252        self.n_classes = len(np.unique(y))
253        self.classes_ = np.unique(y)  # for compatibility with sklearn
254        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
255
256        if sample_weight is None:
257            w_m = np.repeat(1.0 / n, n)
258        else:
259            w_m = np.asarray(sample_weight)
260
261        base_learner = CustomClassifier(
262            self.obj,
263            n_hidden_features=self.n_hidden_features,
264            activation_name=self.activation_name,
265            a=self.a,
266            nodes_sim=self.nodes_sim,
267            bias=self.bias,
268            dropout=self.dropout,
269            direct_link=self.direct_link,
270            n_clusters=self.n_clusters,
271            type_clust=self.type_clust,
272            type_scaling=self.type_scaling,
273            col_sample=self.col_sample,
274            row_sample=self.row_sample,
275            seed=self.seed,
276        )
277
278        if self.verbose == 1:
279            pbar = Progbar(self.n_estimators)
280
281        if self.method == "SAMME":
282            err_m = 1e6
283            err_bound = 1 - 1 / self.n_classes
284            self.alpha_.append(1.0)
285            x_range_n = range(n)
286
287            for m in range(self.n_estimators):
288                preds = base_learner.fit(
289                    X, y, sample_weight=w_m.ravel(), **kwargs
290                ).predict(X)
291
292                self.base_learners_.update({m: deepcopy(base_learner)})
293
294                cond = [y[i] != preds[i] for i in x_range_n]
295
296                err_m = max(
297                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
298                    2.220446049250313e-16,
299                )  # sum(w_m) == 1
300
301                if self.reg_lambda > 0:
302                    err_m += self.reg_lambda * (
303                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
304                        + self.reg_alpha * sum([abs(x) for x in w_m])
305                    )
306
307                err_m = min(err_m, err_bound)
308
309                alpha_m = self.learning_rate * log(
310                    (self.n_classes - 1) * (1 - err_m) / err_m
311                )
312
313                self.alpha_.append(alpha_m)
314
315                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
316
317                sum_w_m = sum(w_m_temp)
318
319                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
320
321                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
322
323                if self.verbose == 1:
324                    pbar.update(m)
325
326            if self.verbose == 1:
327                pbar.update(self.n_estimators)
328
329            self.n_estimators = len(self.base_learners_)
330            self.classes_ = np.unique(y)
331
332            return self
333
334        if self.method == "SAMME.R":
335            Y = mo.one_hot_encode2(y, self.n_classes)
336
337            if sample_weight is None:
338                w_m = np.repeat(1.0 / n, n)  # (N, 1)
339
340            else:
341                w_m = np.asarray(sample_weight)
342
343            for m in range(self.n_estimators):
344                probs = base_learner.fit(
345                    X, y, sample_weight=w_m.ravel(), **kwargs
346                ).predict_proba(X)
347
348                np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
349
350                self.base_learners_.update({m: deepcopy(base_learner)})
351
352                w_m *= np.exp(
353                    -1.0
354                    * self.learning_rate
355                    * (1.0 - 1.0 / self.n_classes)
356                    * xlogy(Y, probs).sum(axis=1)
357                )
358
359                w_m /= np.sum(w_m)
360
361                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
362
363                if self.verbose == 1:
364                    pbar.update(m)
365
366            if self.verbose == 1:
367                pbar.update(self.n_estimators)
368
369            self.n_estimators = len(self.base_learners_)
370            self.classes_ = np.unique(y)
371
372            return self
373
374    def predict(self, X, **kwargs):
375        """Predict test data X.
376
377        Parameters:
378
379            X: {array-like}, shape = [n_samples, n_features]
380                Training vectors, where n_samples is the number
381                of samples and n_features is the number of features.
382
383            **kwargs: additional parameters to be passed to
384                  self.cook_test_set
385
386        Returns:
387
388            model predictions: {array-like}
389        """
390        return self.predict_proba(X, **kwargs).argmax(axis=1)
391
392    def predict_proba(self, X, **kwargs):
393        """Predict probabilities for test data X.
394
395        Parameters:
396
397            X: {array-like}, shape = [n_samples, n_features]
398                Training vectors, where n_samples is the number
399                of samples and n_features is the number of features.
400
401            **kwargs: additional parameters to be passed to
402                  self.cook_test_set
403
404        Returns:
405
406            probability estimates for test data: {array-like}
407
408        """
409
410        n_iter = len(self.base_learners_)
411
412        if self.method == "SAMME":
413            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
414
415            # if self.verbose == 1:
416            #    pbar = Progbar(n_iter)
417
418            for idx, base_learner in self.base_learners_.items():
419                preds = base_learner.predict(X, **kwargs)
420
421                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
422                    preds, self.n_classes
423                )
424
425                # if self.verbose == 1:
426                #    pbar.update(idx)
427
428            # if self.verbose == 1:
429            #    pbar.update(n_iter)
430
431            expit_ensemble_learner = expit(ensemble_learner)
432
433            sum_ensemble = expit_ensemble_learner.sum(axis=1)
434
435            return expit_ensemble_learner / sum_ensemble[:, None]
436
437        # if self.method == "SAMME.R":
438        ensemble_learner = 0
439
440        # if self.verbose == 1:
441        #    pbar = Progbar(n_iter)
442
443        for idx, base_learner in self.base_learners_.items():
444            probs = base_learner.predict_proba(X, **kwargs)
445
446            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
447
448            log_preds_proba = np.log(probs)
449
450            ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
451
452            # if self.verbose == 1:
453            #    pbar.update(idx)
454
455        ensemble_learner *= self.n_classes - 1
456
457        # if self.verbose == 1:
458        #    pbar.update(n_iter)
459
460        expit_ensemble_learner = expit(ensemble_learner)
461
462        sum_ensemble = expit_ensemble_learner.sum(axis=1)
463
464        return expit_ensemble_learner / sum_ensemble[:, None]

AdaBoost Classification (SAMME) model class derived from class Boosting

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

learning_rate: float
    learning rate of the boosting procedure

n_hidden_features: int
    number of nodes in the hidden layer

reg_lambda: float
    regularization parameter for weights

reg_alpha: float
    controls compromize between l1 and l2 norm of weights

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

verbose: int
    0 for no output, 1 for a progress bar (default is 1)

method: str
    type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

alpha_: list
    AdaBoost coefficients alpha_m

base_learners_: dict
    a dictionary containing the base learners

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
                        random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
                                n_hidden_features=int(11.22338867),
                                direct_link=True,
                                n_estimators=250, learning_rate=0.01126343,
                                col_sample=0.72684326, row_sample=0.86429443,
                                dropout=0.63078613, n_clusters=2,
                                type_clust="gmm",
                                verbose=1, seed = 123,
                                method="SAMME.R")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")

preds = fit_obj.predict(X_test)

print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
215    def fit(self, X, y, sample_weight=None, **kwargs):
216        """Fit Boosting model to training data (X, y).
217
218        Parameters:
219
220            X: {array-like}, shape = [n_samples, n_features]
221                Training vectors, where n_samples is the number
222                of samples and n_features is the number of features.
223
224            y: array-like, shape = [n_samples]
225                Target values.
226
227            **kwargs: additional parameters to be passed to
228                    self.cook_training_set or self.obj.fit
229
230        Returns:
231
232             self: object
233        """
234
235        assert mx.is_factor(y), "y must contain only integers"
236
237        assert self.method in (
238            "SAMME",
239            "SAMME.R",
240        ), "`method` must be either 'SAMME' or 'SAMME.R'"
241
242        assert (self.reg_lambda <= 1) & (
243            self.reg_lambda >= 0
244        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
245
246        assert (self.reg_alpha <= 1) & (
247            self.reg_alpha >= 0
248        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
249
250        # training
251        n, p = X.shape
252        self.n_classes = len(np.unique(y))
253        self.classes_ = np.unique(y)  # for compatibility with sklearn
254        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
255
256        if sample_weight is None:
257            w_m = np.repeat(1.0 / n, n)
258        else:
259            w_m = np.asarray(sample_weight)
260
261        base_learner = CustomClassifier(
262            self.obj,
263            n_hidden_features=self.n_hidden_features,
264            activation_name=self.activation_name,
265            a=self.a,
266            nodes_sim=self.nodes_sim,
267            bias=self.bias,
268            dropout=self.dropout,
269            direct_link=self.direct_link,
270            n_clusters=self.n_clusters,
271            type_clust=self.type_clust,
272            type_scaling=self.type_scaling,
273            col_sample=self.col_sample,
274            row_sample=self.row_sample,
275            seed=self.seed,
276        )
277
278        if self.verbose == 1:
279            pbar = Progbar(self.n_estimators)
280
281        if self.method == "SAMME":
282            err_m = 1e6
283            err_bound = 1 - 1 / self.n_classes
284            self.alpha_.append(1.0)
285            x_range_n = range(n)
286
287            for m in range(self.n_estimators):
288                preds = base_learner.fit(
289                    X, y, sample_weight=w_m.ravel(), **kwargs
290                ).predict(X)
291
292                self.base_learners_.update({m: deepcopy(base_learner)})
293
294                cond = [y[i] != preds[i] for i in x_range_n]
295
296                err_m = max(
297                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
298                    2.220446049250313e-16,
299                )  # sum(w_m) == 1
300
301                if self.reg_lambda > 0:
302                    err_m += self.reg_lambda * (
303                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
304                        + self.reg_alpha * sum([abs(x) for x in w_m])
305                    )
306
307                err_m = min(err_m, err_bound)
308
309                alpha_m = self.learning_rate * log(
310                    (self.n_classes - 1) * (1 - err_m) / err_m
311                )
312
313                self.alpha_.append(alpha_m)
314
315                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
316
317                sum_w_m = sum(w_m_temp)
318
319                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
320
321                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
322
323                if self.verbose == 1:
324                    pbar.update(m)
325
326            if self.verbose == 1:
327                pbar.update(self.n_estimators)
328
329            self.n_estimators = len(self.base_learners_)
330            self.classes_ = np.unique(y)
331
332            return self
333
334        if self.method == "SAMME.R":
335            Y = mo.one_hot_encode2(y, self.n_classes)
336
337            if sample_weight is None:
338                w_m = np.repeat(1.0 / n, n)  # (N, 1)
339
340            else:
341                w_m = np.asarray(sample_weight)
342
343            for m in range(self.n_estimators):
344                probs = base_learner.fit(
345                    X, y, sample_weight=w_m.ravel(), **kwargs
346                ).predict_proba(X)
347
348                np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
349
350                self.base_learners_.update({m: deepcopy(base_learner)})
351
352                w_m *= np.exp(
353                    -1.0
354                    * self.learning_rate
355                    * (1.0 - 1.0 / self.n_classes)
356                    * xlogy(Y, probs).sum(axis=1)
357                )
358
359                w_m /= np.sum(w_m)
360
361                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
362
363                if self.verbose == 1:
364                    pbar.update(m)
365
366            if self.verbose == 1:
367                pbar.update(self.n_estimators)
368
369            self.n_estimators = len(self.base_learners_)
370            self.classes_ = np.unique(y)
371
372            return self

Fit Boosting model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

 self: object
def predict(self, X, **kwargs):
374    def predict(self, X, **kwargs):
375        """Predict test data X.
376
377        Parameters:
378
379            X: {array-like}, shape = [n_samples, n_features]
380                Training vectors, where n_samples is the number
381                of samples and n_features is the number of features.
382
383            **kwargs: additional parameters to be passed to
384                  self.cook_test_set
385
386        Returns:
387
388            model predictions: {array-like}
389        """
390        return self.predict_proba(X, **kwargs).argmax(axis=1)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
392    def predict_proba(self, X, **kwargs):
393        """Predict probabilities for test data X.
394
395        Parameters:
396
397            X: {array-like}, shape = [n_samples, n_features]
398                Training vectors, where n_samples is the number
399                of samples and n_features is the number of features.
400
401            **kwargs: additional parameters to be passed to
402                  self.cook_test_set
403
404        Returns:
405
406            probability estimates for test data: {array-like}
407
408        """
409
410        n_iter = len(self.base_learners_)
411
412        if self.method == "SAMME":
413            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
414
415            # if self.verbose == 1:
416            #    pbar = Progbar(n_iter)
417
418            for idx, base_learner in self.base_learners_.items():
419                preds = base_learner.predict(X, **kwargs)
420
421                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
422                    preds, self.n_classes
423                )
424
425                # if self.verbose == 1:
426                #    pbar.update(idx)
427
428            # if self.verbose == 1:
429            #    pbar.update(n_iter)
430
431            expit_ensemble_learner = expit(ensemble_learner)
432
433            sum_ensemble = expit_ensemble_learner.sum(axis=1)
434
435            return expit_ensemble_learner / sum_ensemble[:, None]
436
437        # if self.method == "SAMME.R":
438        ensemble_learner = 0
439
440        # if self.verbose == 1:
441        #    pbar = Progbar(n_iter)
442
443        for idx, base_learner in self.base_learners_.items():
444            probs = base_learner.predict_proba(X, **kwargs)
445
446            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
447
448            log_preds_proba = np.log(probs)
449
450            ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
451
452            # if self.verbose == 1:
453            #    pbar.update(idx)
454
455        ensemble_learner *= self.n_classes - 1
456
457        # if self.verbose == 1:
458        #    pbar.update(n_iter)
459
460        expit_ensemble_learner = expit(ensemble_learner)
461
462        sum_ensemble = expit_ensemble_learner.sum(axis=1)
463
464        return expit_ensemble_learner / sum_ensemble[:, None]

Predict probabilities for test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class Base(sklearn.base.BaseEstimator):
 46class Base(BaseEstimator):
 47    """Base model from which all the other classes inherit.
 48
 49    This class contains the most important data preprocessing/feature engineering methods.
 50
 51    Parameters:
 52
 53        n_hidden_features: int
 54            number of nodes in the hidden layer
 55
 56        activation_name: str
 57            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 58
 59        a: float
 60            hyperparameter for 'prelu' or 'elu' activation function
 61
 62        nodes_sim: str
 63            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
 64            'uniform'
 65
 66        bias: boolean
 67            indicates if the hidden layer contains a bias term (True) or
 68            not (False)
 69
 70        dropout: float
 71            regularization parameter; (random) percentage of nodes dropped out
 72            of the training
 73
 74        direct_link: boolean
 75            indicates if the original features are included (True) in model's
 76            fitting or not (False)
 77
 78        n_clusters: int
 79            number of clusters for type_clust='kmeans' or type_clust='gmm'
 80            clustering (could be 0: no clustering)
 81
 82        cluster_encode: bool
 83            defines how the variable containing clusters is treated (default is one-hot);
 84            if `False`, then labels are used, without one-hot encoding
 85
 86        type_clust: str
 87            type of clustering method: currently k-means ('kmeans') or Gaussian
 88            Mixture Model ('gmm')
 89
 90        type_scaling: a tuple of 3 strings
 91            scaling methods for inputs, hidden layer, and clustering respectively
 92            (and when relevant).
 93            Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')
 94
 95        col_sample: float
 96            percentage of features randomly chosen for training
 97
 98        row_sample: float
 99            percentage of rows chosen for training, by stratified bootstrapping
100
101        seed: int
102            reproducibility seed for nodes_sim=='uniform', clustering and dropout
103
104        backend: str
105            "cpu" or "gpu" or "tpu"
106
107    """
108
109    # construct the object -----
110
111    def __init__(
112        self,
113        n_hidden_features=5,
114        activation_name="relu",
115        a=0.01,
116        nodes_sim="sobol",
117        bias=True,
118        dropout=0,
119        direct_link=True,
120        n_clusters=2,
121        cluster_encode=True,
122        type_clust="kmeans",
123        type_scaling=("std", "std", "std"),
124        col_sample=1,
125        row_sample=1,
126        seed=123,
127        backend="cpu",
128    ):
129        # input checks -----
130
131        sys_platform = platform.system()
132
133        if (sys_platform == "Windows") and (backend in ("gpu", "tpu")):
134            warnings.warn("No GPU/TPU computing on Windows yet, backend set to 'cpu'")
135            backend = "cpu"
136
137        assert activation_name in (
138            "relu",
139            "tanh",
140            "sigmoid",
141            "prelu",
142            "elu",
143        ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')"
144
145        assert nodes_sim in (
146            "sobol",
147            "hammersley",
148            "uniform",
149            "halton",
150        ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')"
151
152        assert type_clust in (
153            "kmeans",
154            "gmm",
155        ), "'type_clust' must be in ('kmeans', 'gmm')"
156
157        assert (len(type_scaling) == 3) & all(
158            type_scaling[i] in ("minmax", "std", "robust", "maxabs")
159            for i in range(len(type_scaling))
160        ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')"
161
162        assert (col_sample >= 0) & (
163            col_sample <= 1
164        ), "'col_sample' must be comprised between 0 and 1 (both included)"
165
166        assert backend in (
167            "cpu",
168            "gpu",
169            "tpu",
170        ), "must have 'backend' in ('cpu', 'gpu', 'tpu')"
171
172        self.n_hidden_features = n_hidden_features
173        self.activation_name = activation_name
174        self.a = a
175        self.nodes_sim = nodes_sim
176        self.bias = bias
177        self.seed = seed
178        self.backend = backend
179        self.dropout = dropout
180        self.direct_link = direct_link
181        self.cluster_encode = cluster_encode
182        self.type_clust = type_clust
183        self.type_scaling = type_scaling
184        self.col_sample = col_sample
185        self.row_sample = row_sample
186        self.n_clusters = n_clusters
187        if isinstance(self, RegressorMixin):
188            self.type_fit = "regression"
189        elif isinstance(self, ClassifierMixin):
190            self.type_fit = "classification"
191        self.subsampler_ = None
192        self.index_col_ = None
193        self.index_row_ = True
194        self.clustering_obj_ = None
195        self.clustering_scaler_ = None
196        self.nn_scaler_ = None
197        self.scaler_ = None
198        self.encoder_ = None
199        self.W_ = None
200        self.X_ = None
201        self.y_ = None
202        self.y_mean_ = None
203        self.beta_ = None
204
205        # activation function -----
206        if sys_platform in ("Linux", "Darwin"):
207            activation_options = {
208                "relu": ac.relu if (self.backend == "cpu") else jnn.relu,
209                "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh,
210                "sigmoid": (ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid),
211                "prelu": partial(ac.prelu, a=a),
212                "elu": (
213                    partial(ac.elu, a=a)
214                    if (self.backend == "cpu")
215                    else partial(jnn.elu, a=a)
216                ),
217            }
218        else:  # on Windows currently, no JAX
219            activation_options = {
220                "relu": (ac.relu if (self.backend == "cpu") else NotImplementedError),
221                "tanh": (np.tanh if (self.backend == "cpu") else NotImplementedError),
222                "sigmoid": (
223                    ac.sigmoid if (self.backend == "cpu") else NotImplementedError
224                ),
225                "prelu": partial(ac.prelu, a=a),
226                "elu": (
227                    partial(ac.elu, a=a)
228                    if (self.backend == "cpu")
229                    else NotImplementedError
230                ),
231            }
232        self.activation_func = activation_options[activation_name]
233
234    # "preprocessing" methods to be inherited -----
235
236    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
237        """Create new covariates with kmeans or GMM clustering
238
239        Parameters:
240
241            X: {array-like}, shape = [n_samples, n_features]
242                Training vectors, where n_samples is the number
243                of samples and n_features is the number of features.
244
245            predict: boolean
246                is False on training set and True on test set
247
248            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
249                if scaler has already been fitted on training data (online training), it can be passed here
250
251            **kwargs:
252                additional parameters to be passed to the
253                clustering method
254
255        Returns:
256
257            Clusters' matrix, one-hot encoded: {array-like}
258
259        """
260
261        np.random.seed(self.seed)
262
263        if X is None:
264            X = self.X_
265
266        if isinstance(X, pd.DataFrame):
267            X = copy.deepcopy(X.values.astype(float))
268
269        if len(X.shape) == 1:
270            X = X.reshape(1, -1)
271
272        if predict is False:  # encode training set
273
274            # scale input data before clustering
275            self.clustering_scaler_, scaled_X = mo.scale_covariates(
276                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
277            )
278
279            self.clustering_obj_, X_clustered = mo.cluster_covariates(
280                scaled_X,
281                self.n_clusters,
282                self.seed,
283                type_clust=self.type_clust,
284                **kwargs
285            )
286
287            if self.cluster_encode == True:
288                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
289                    np.float16
290                )
291
292            return X_clustered.astype(np.float16)
293
294        # if predict == True, encode test set
295        X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X))
296
297        if self.cluster_encode == True:
298            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16)
299
300        return X_clustered.astype(np.float16)
301
302    def create_layer(self, scaled_X, W=None):
303        """Create hidden layer.
304
305        Parameters:
306
307            scaled_X: {array-like}, shape = [n_samples, n_features]
308                Training vectors, where n_samples is the number
309                of samples and n_features is the number of features
310
311            W: {array-like}, shape = [n_features, hidden_features]
312                if provided, constructs the hidden layer with W; otherwise computed internally
313
314        Returns:
315
316            Hidden layer matrix: {array-like}
317
318        """
319
320        n_features = scaled_X.shape[1]
321
322        # hash_sim = {
323        #         "sobol": generate_sobol,
324        #         "hammersley": generate_hammersley,
325        #         "uniform": generate_uniform,
326        #         "halton": generate_halton
327        #     }
328
329        if self.bias is False:  # no bias term in the hidden layer
330            if W is None:
331                if self.nodes_sim == "sobol":
332                    self.W_ = generate_sobol(
333                        n_dims=n_features,
334                        n_points=self.n_hidden_features,
335                        seed=self.seed,
336                    )
337                elif self.nodes_sim == "hammersley":
338                    self.W_ = generate_hammersley(
339                        n_dims=n_features,
340                        n_points=self.n_hidden_features,
341                        seed=self.seed,
342                    )
343                elif self.nodes_sim == "uniform":
344                    self.W_ = generate_uniform(
345                        n_dims=n_features,
346                        n_points=self.n_hidden_features,
347                        seed=self.seed,
348                    )
349                else:
350                    self.W_ = generate_halton(
351                        n_dims=n_features,
352                        n_points=self.n_hidden_features,
353                        seed=self.seed,
354                    )
355
356                # self.W_ = hash_sim[self.nodes_sim](
357                #             n_dims=n_features,
358                #             n_points=self.n_hidden_features,
359                #             seed=self.seed,
360                #         )
361
362                assert (
363                    scaled_X.shape[1] == self.W_.shape[0]
364                ), "check dimensions of covariates X and matrix W"
365
366                return mo.dropout(
367                    x=self.activation_func(
368                        mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend)
369                    ),
370                    drop_prob=self.dropout,
371                    seed=self.seed,
372                )
373
374            # W is not none
375            assert (
376                scaled_X.shape[1] == W.shape[0]
377            ), "check dimensions of covariates X and matrix W"
378
379            # self.W_ = W
380            return mo.dropout(
381                x=self.activation_func(
382                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
383                ),
384                drop_prob=self.dropout,
385                seed=self.seed,
386            )
387
388        # with bias term in the hidden layer
389        if W is None:
390            n_features_1 = n_features + 1
391
392            if self.nodes_sim == "sobol":
393                self.W_ = generate_sobol(
394                    n_dims=n_features_1,
395                    n_points=self.n_hidden_features,
396                    seed=self.seed,
397                )
398            elif self.nodes_sim == "hammersley":
399                self.W_ = generate_hammersley(
400                    n_dims=n_features_1,
401                    n_points=self.n_hidden_features,
402                    seed=self.seed,
403                )
404            elif self.nodes_sim == "uniform":
405                self.W_ = generate_uniform(
406                    n_dims=n_features_1,
407                    n_points=self.n_hidden_features,
408                    seed=self.seed,
409                )
410            else:
411                self.W_ = generate_halton(
412                    n_dims=n_features_1,
413                    n_points=self.n_hidden_features,
414                    seed=self.seed,
415                )
416
417            # self.W_ = hash_sim[self.nodes_sim](
418            #         n_dims=n_features_1,
419            #         n_points=self.n_hidden_features,
420            #         seed=self.seed,
421            #     )
422
423            return mo.dropout(
424                x=self.activation_func(
425                    mo.safe_sparse_dot(
426                        a=mo.cbind(
427                            np.ones(scaled_X.shape[0]),
428                            scaled_X,
429                            backend=self.backend,
430                        ),
431                        b=self.W_,
432                        backend=self.backend,
433                    )
434                ),
435                drop_prob=self.dropout,
436                seed=self.seed,
437            )
438
439        # W is not None
440        # self.W_ = W
441        return mo.dropout(
442            x=self.activation_func(
443                mo.safe_sparse_dot(
444                    a=mo.cbind(
445                        np.ones(scaled_X.shape[0]),
446                        scaled_X,
447                        backend=self.backend,
448                    ),
449                    b=W,
450                    backend=self.backend,
451                )
452            ),
453            drop_prob=self.dropout,
454            seed=self.seed,
455        )
456
457    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
458        """Create new hidden features for training set, with hidden layer, center the response.
459
460        Parameters:
461
462            y: array-like, shape = [n_samples]
463                Target values
464
465            X: {array-like}, shape = [n_samples, n_features]
466                Training vectors, where n_samples is the number
467                of samples and n_features is the number of features
468
469            W: {array-like}, shape = [n_features, hidden_features]
470                if provided, constructs the hidden layer via W
471
472        Returns:
473
474            (centered response, direct link + hidden layer matrix): {tuple}
475
476        """
477
478        # either X and y are stored or not
479        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
480        if self.n_hidden_features > 0:  # has a hidden layer
481            assert (
482                len(self.type_scaling) >= 2
483            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
484
485        if X is None:
486
487            if self.col_sample == 1:
488                input_X = self.X_
489            else:
490                n_features = self.X_.shape[1]
491                new_n_features = int(np.ceil(n_features * self.col_sample))
492                assert (
493                    new_n_features >= 1
494                ), "check class attribute 'col_sample' and the number of covariates provided for X"
495                np.random.seed(self.seed)
496                index_col = np.random.choice(
497                    range(n_features), size=new_n_features, replace=False
498                )
499                self.index_col_ = index_col
500                input_X = self.X_[:, self.index_col_]
501
502        else:  # X is not None # keep X vs self.X_
503
504            if isinstance(X, pd.DataFrame):
505                X = copy.deepcopy(X.values.astype(float))
506
507            if self.col_sample == 1:
508                input_X = X
509            else:
510                n_features = X.shape[1]
511                new_n_features = int(np.ceil(n_features * self.col_sample))
512                assert (
513                    new_n_features >= 1
514                ), "check class attribute 'col_sample' and the number of covariates provided for X"
515                np.random.seed(self.seed)
516                index_col = np.random.choice(
517                    range(n_features), size=new_n_features, replace=False
518                )
519                self.index_col_ = index_col
520                input_X = X[:, self.index_col_]
521
522        if self.n_clusters <= 0:
523            # data without any clustering: self.n_clusters is None -----
524
525            if self.n_hidden_features > 0:  # with hidden layer
526
527                self.nn_scaler_, scaled_X = mo.scale_covariates(
528                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
529                )
530                Phi_X = (
531                    self.create_layer(scaled_X)
532                    if W is None
533                    else self.create_layer(scaled_X, W=W)
534                )
535                Z = (
536                    mo.cbind(input_X, Phi_X, backend=self.backend)
537                    if self.direct_link is True
538                    else Phi_X
539                )
540                self.scaler_, scaled_Z = mo.scale_covariates(
541                    Z, choice=self.type_scaling[0], scaler=self.scaler_
542                )
543            else:  # no hidden layer
544                Z = input_X
545                self.scaler_, scaled_Z = mo.scale_covariates(
546                    Z, choice=self.type_scaling[0], scaler=self.scaler_
547                )
548
549        else:
550
551            # data with clustering: self.n_clusters is not None ----- # keep
552
553            augmented_X = mo.cbind(
554                input_X,
555                self.encode_clusters(input_X, **kwargs),
556                backend=self.backend,
557            )
558
559            if self.n_hidden_features > 0:  # with hidden layer
560
561                self.nn_scaler_, scaled_X = mo.scale_covariates(
562                    augmented_X,
563                    choice=self.type_scaling[1],
564                    scaler=self.nn_scaler_,
565                )
566                Phi_X = (
567                    self.create_layer(scaled_X)
568                    if W is None
569                    else self.create_layer(scaled_X, W=W)
570                )
571                Z = (
572                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
573                    if self.direct_link is True
574                    else Phi_X
575                )
576                self.scaler_, scaled_Z = mo.scale_covariates(
577                    Z, choice=self.type_scaling[0], scaler=self.scaler_
578                )
579            else:  # no hidden layer
580                Z = augmented_X
581                self.scaler_, scaled_Z = mo.scale_covariates(
582                    Z, choice=self.type_scaling[0], scaler=self.scaler_
583                )
584
585        # Returning model inputs -----
586        if mx.is_factor(y) is False:  # regression
587            # center y
588            if y is None:
589                self.y_mean_, centered_y = mo.center_response(self.y_)
590            else:
591                self.y_mean_, centered_y = mo.center_response(y)
592
593            # y is subsampled
594            if self.row_sample < 1:
595                n, p = Z.shape
596
597                self.subsampler_ = (
598                    SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed)
599                    if y is None
600                    else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
601                )
602
603                self.index_row_ = self.subsampler_.subsample()
604
605                n_row_sample = len(self.index_row_)
606                # regression
607                return (
608                    centered_y[self.index_row_].reshape(n_row_sample),
609                    self.scaler_.transform(
610                        Z[self.index_row_, :].reshape(n_row_sample, p)
611                    ),
612                )
613            # y is not subsampled
614            # regression
615            return (centered_y, self.scaler_.transform(Z))
616
617        # classification
618        # y is subsampled
619        if self.row_sample < 1:
620            n, p = Z.shape
621
622            self.subsampler_ = (
623                SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed)
624                if y is None
625                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
626            )
627
628            self.index_row_ = self.subsampler_.subsample()
629
630            n_row_sample = len(self.index_row_)
631            # classification
632            return (
633                y[self.index_row_].reshape(n_row_sample),
634                self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)),
635            )
636        # y is not subsampled
637        # classification
638        return (y, self.scaler_.transform(Z))
639
640    def cook_test_set(self, X, **kwargs):
641        """Transform data from test set, with hidden layer.
642
643        Parameters:
644
645            X: {array-like}, shape = [n_samples, n_features]
646                Training vectors, where n_samples is the number
647                of samples and n_features is the number of features
648
649            **kwargs: additional parameters to be passed to self.encode_cluster
650
651        Returns:
652
653            Transformed test set : {array-like}
654        """
655
656        if isinstance(X, pd.DataFrame):
657            X = copy.deepcopy(X.values.astype(float))
658
659        if len(X.shape) == 1:
660            X = X.reshape(1, -1)
661
662        if (
663            self.n_clusters == 0
664        ):  # data without clustering: self.n_clusters is None -----
665            if self.n_hidden_features > 0:
666                # if hidden layer
667                scaled_X = (
668                    self.nn_scaler_.transform(X)
669                    if (self.col_sample == 1)
670                    else self.nn_scaler_.transform(X[:, self.index_col_])
671                )
672                Phi_X = self.create_layer(scaled_X, self.W_)
673                if self.direct_link == True:
674                    return self.scaler_.transform(
675                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
676                    )
677                # when self.direct_link == False
678                return self.scaler_.transform(Phi_X)
679            # if no hidden layer # self.n_hidden_features == 0
680            return self.scaler_.transform(X)
681
682        # data with clustering: self.n_clusters > 0 -----
683        if self.col_sample == 1:
684            predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs)
685            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
686        else:
687            predicted_clusters = self.encode_clusters(
688                X=X[:, self.index_col_], predict=True, **kwargs
689            )
690            augmented_X = mo.cbind(
691                X[:, self.index_col_], predicted_clusters, backend=self.backend
692            )
693
694        if self.n_hidden_features > 0:  # if hidden layer
695            scaled_X = self.nn_scaler_.transform(augmented_X)
696            Phi_X = self.create_layer(scaled_X, self.W_)
697            if self.direct_link == True:
698                return self.scaler_.transform(
699                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
700                )
701            return self.scaler_.transform(Phi_X)
702
703        # if no hidden layer
704        return self.scaler_.transform(augmented_X)
705
706    def cross_val_score(
707        self,
708        X,
709        y,
710        cv=5,
711        scoring="accuracy",
712        random_state=42,
713        n_jobs=-1,
714        epsilon=0.5,
715        penalized=True,
716        objective="abs",
717        **kwargs
718    ):
719        """
720        Penalized Cross-validation score for a model.
721
722        Parameters:
723
724            X: {array-like}, shape = [n_samples, n_features]
725                Training vectors, where n_samples is the number
726                of samples and n_features is the number of features
727
728            y: array-like, shape = [n_samples]
729                Target values
730
731            X_test: {array-like}, shape = [n_samples, n_features]
732                Test vectors, where n_samples is the number
733                of samples and n_features is the number of features
734
735            y_test: array-like, shape = [n_samples]
736                Target values
737
738            cv: int
739                Number of folds
740
741            scoring: str
742                Scoring metric
743
744            random_state: int
745                Random state
746
747            n_jobs: int
748                Number of jobs to run in parallel
749
750            epsilon: float
751                Penalty parameter
752
753            penalized: bool
754                Whether to obtain penalized cross-validation score or not
755
756            objective: str
757                'abs': Minimize the absolute difference between cross-validation score and validation score
758                'relative': Minimize the relative difference between cross-validation score and validation score
759        Returns:
760
761            A namedtuple with the following fields:
762                - cv_score: float
763                    cross-validation score
764                - val_score: float
765                    validation score
766                - penalized_score: float
767                    penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score)
768                    If higher scoring metric is better, minimize the function result.
769                    If lower scoring metric is better, maximize the function result.
770        """
771        if scoring == "accuracy":
772            scoring_func = accuracy_score
773        elif scoring == "balanced_accuracy":
774            scoring_func = balanced_accuracy_score
775        elif scoring == "f1":
776            scoring_func = f1_score
777        elif scoring == "roc_auc":
778            scoring_func = roc_auc_score
779        elif scoring == "r2":
780            scoring_func = r2_score
781        elif scoring == "mse":
782            scoring_func = mean_squared_error
783        elif scoring == "mae":
784            scoring_func = mean_absolute_error
785        elif scoring == "mape":
786            scoring_func = mean_absolute_percentage_error
787        elif scoring == "rmse":
788
789            def scoring_func(y_true, y_pred):
790                return np.sqrt(mean_squared_error(y_true, y_pred))
791
792        X_train, X_val, y_train, y_val = train_test_split(
793            X, y, test_size=0.2, random_state=random_state
794        )
795
796        res = cross_val_score(
797            self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs
798        )  # cross-validation error
799
800        if penalized == False:
801            return res
802
803        DescribeResult = namedtuple(
804            "DescribeResult", ["cv_score", "val_score", "penalized_score"]
805        )
806
807        numerator = res.mean()
808
809        # Evaluate on the (cv+1)-th fold
810        preds_val = self.fit(X_train, y_train).predict(X_val)
811        try:
812            denominator = scoring(y_val, preds_val)  # validation error
813        except Exception as e:
814            denominator = scoring_func(y_val, preds_val)
815
816        # if higher is better
817        if objective == "abs":
818            penalized_score = np.abs(numerator - denominator) + epsilon * (
819                1 / denominator + 1 / numerator
820            )
821        elif objective == "relative":
822            ratio = numerator / denominator
823            penalized_score = np.abs(ratio - 1) + epsilon * (
824                1 / denominator + 1 / numerator
825            )
826
827        return DescribeResult(
828            cv_score=numerator,
829            val_score=denominator,
830            penalized_score=penalized_score,
831        )

Base model from which all the other classes inherit.

This class contains the most important data preprocessing/feature engineering methods.

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"
def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):
236    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
237        """Create new covariates with kmeans or GMM clustering
238
239        Parameters:
240
241            X: {array-like}, shape = [n_samples, n_features]
242                Training vectors, where n_samples is the number
243                of samples and n_features is the number of features.
244
245            predict: boolean
246                is False on training set and True on test set
247
248            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
249                if scaler has already been fitted on training data (online training), it can be passed here
250
251            **kwargs:
252                additional parameters to be passed to the
253                clustering method
254
255        Returns:
256
257            Clusters' matrix, one-hot encoded: {array-like}
258
259        """
260
261        np.random.seed(self.seed)
262
263        if X is None:
264            X = self.X_
265
266        if isinstance(X, pd.DataFrame):
267            X = copy.deepcopy(X.values.astype(float))
268
269        if len(X.shape) == 1:
270            X = X.reshape(1, -1)
271
272        if predict is False:  # encode training set
273
274            # scale input data before clustering
275            self.clustering_scaler_, scaled_X = mo.scale_covariates(
276                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
277            )
278
279            self.clustering_obj_, X_clustered = mo.cluster_covariates(
280                scaled_X,
281                self.n_clusters,
282                self.seed,
283                type_clust=self.type_clust,
284                **kwargs
285            )
286
287            if self.cluster_encode == True:
288                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
289                    np.float16
290                )
291
292            return X_clustered.astype(np.float16)
293
294        # if predict == True, encode test set
295        X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X))
296
297        if self.cluster_encode == True:
298            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16)
299
300        return X_clustered.astype(np.float16)

Create new covariates with kmeans or GMM clustering

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

predict: boolean
    is False on training set and True on test set

scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
    if scaler has already been fitted on training data (online training), it can be passed here

**kwargs:
    additional parameters to be passed to the
    clustering method

Returns:

Clusters' matrix, one-hot encoded: {array-like}
def create_layer(self, scaled_X, W=None):
302    def create_layer(self, scaled_X, W=None):
303        """Create hidden layer.
304
305        Parameters:
306
307            scaled_X: {array-like}, shape = [n_samples, n_features]
308                Training vectors, where n_samples is the number
309                of samples and n_features is the number of features
310
311            W: {array-like}, shape = [n_features, hidden_features]
312                if provided, constructs the hidden layer with W; otherwise computed internally
313
314        Returns:
315
316            Hidden layer matrix: {array-like}
317
318        """
319
320        n_features = scaled_X.shape[1]
321
322        # hash_sim = {
323        #         "sobol": generate_sobol,
324        #         "hammersley": generate_hammersley,
325        #         "uniform": generate_uniform,
326        #         "halton": generate_halton
327        #     }
328
329        if self.bias is False:  # no bias term in the hidden layer
330            if W is None:
331                if self.nodes_sim == "sobol":
332                    self.W_ = generate_sobol(
333                        n_dims=n_features,
334                        n_points=self.n_hidden_features,
335                        seed=self.seed,
336                    )
337                elif self.nodes_sim == "hammersley":
338                    self.W_ = generate_hammersley(
339                        n_dims=n_features,
340                        n_points=self.n_hidden_features,
341                        seed=self.seed,
342                    )
343                elif self.nodes_sim == "uniform":
344                    self.W_ = generate_uniform(
345                        n_dims=n_features,
346                        n_points=self.n_hidden_features,
347                        seed=self.seed,
348                    )
349                else:
350                    self.W_ = generate_halton(
351                        n_dims=n_features,
352                        n_points=self.n_hidden_features,
353                        seed=self.seed,
354                    )
355
356                # self.W_ = hash_sim[self.nodes_sim](
357                #             n_dims=n_features,
358                #             n_points=self.n_hidden_features,
359                #             seed=self.seed,
360                #         )
361
362                assert (
363                    scaled_X.shape[1] == self.W_.shape[0]
364                ), "check dimensions of covariates X and matrix W"
365
366                return mo.dropout(
367                    x=self.activation_func(
368                        mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend)
369                    ),
370                    drop_prob=self.dropout,
371                    seed=self.seed,
372                )
373
374            # W is not none
375            assert (
376                scaled_X.shape[1] == W.shape[0]
377            ), "check dimensions of covariates X and matrix W"
378
379            # self.W_ = W
380            return mo.dropout(
381                x=self.activation_func(
382                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
383                ),
384                drop_prob=self.dropout,
385                seed=self.seed,
386            )
387
388        # with bias term in the hidden layer
389        if W is None:
390            n_features_1 = n_features + 1
391
392            if self.nodes_sim == "sobol":
393                self.W_ = generate_sobol(
394                    n_dims=n_features_1,
395                    n_points=self.n_hidden_features,
396                    seed=self.seed,
397                )
398            elif self.nodes_sim == "hammersley":
399                self.W_ = generate_hammersley(
400                    n_dims=n_features_1,
401                    n_points=self.n_hidden_features,
402                    seed=self.seed,
403                )
404            elif self.nodes_sim == "uniform":
405                self.W_ = generate_uniform(
406                    n_dims=n_features_1,
407                    n_points=self.n_hidden_features,
408                    seed=self.seed,
409                )
410            else:
411                self.W_ = generate_halton(
412                    n_dims=n_features_1,
413                    n_points=self.n_hidden_features,
414                    seed=self.seed,
415                )
416
417            # self.W_ = hash_sim[self.nodes_sim](
418            #         n_dims=n_features_1,
419            #         n_points=self.n_hidden_features,
420            #         seed=self.seed,
421            #     )
422
423            return mo.dropout(
424                x=self.activation_func(
425                    mo.safe_sparse_dot(
426                        a=mo.cbind(
427                            np.ones(scaled_X.shape[0]),
428                            scaled_X,
429                            backend=self.backend,
430                        ),
431                        b=self.W_,
432                        backend=self.backend,
433                    )
434                ),
435                drop_prob=self.dropout,
436                seed=self.seed,
437            )
438
439        # W is not None
440        # self.W_ = W
441        return mo.dropout(
442            x=self.activation_func(
443                mo.safe_sparse_dot(
444                    a=mo.cbind(
445                        np.ones(scaled_X.shape[0]),
446                        scaled_X,
447                        backend=self.backend,
448                    ),
449                    b=W,
450                    backend=self.backend,
451                )
452            ),
453            drop_prob=self.dropout,
454            seed=self.seed,
455        )

Create hidden layer.

Parameters:

scaled_X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer with W; otherwise computed internally

Returns:

Hidden layer matrix: {array-like}
def cook_training_set(self, y=None, X=None, W=None, **kwargs):
457    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
458        """Create new hidden features for training set, with hidden layer, center the response.
459
460        Parameters:
461
462            y: array-like, shape = [n_samples]
463                Target values
464
465            X: {array-like}, shape = [n_samples, n_features]
466                Training vectors, where n_samples is the number
467                of samples and n_features is the number of features
468
469            W: {array-like}, shape = [n_features, hidden_features]
470                if provided, constructs the hidden layer via W
471
472        Returns:
473
474            (centered response, direct link + hidden layer matrix): {tuple}
475
476        """
477
478        # either X and y are stored or not
479        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
480        if self.n_hidden_features > 0:  # has a hidden layer
481            assert (
482                len(self.type_scaling) >= 2
483            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
484
485        if X is None:
486
487            if self.col_sample == 1:
488                input_X = self.X_
489            else:
490                n_features = self.X_.shape[1]
491                new_n_features = int(np.ceil(n_features * self.col_sample))
492                assert (
493                    new_n_features >= 1
494                ), "check class attribute 'col_sample' and the number of covariates provided for X"
495                np.random.seed(self.seed)
496                index_col = np.random.choice(
497                    range(n_features), size=new_n_features, replace=False
498                )
499                self.index_col_ = index_col
500                input_X = self.X_[:, self.index_col_]
501
502        else:  # X is not None # keep X vs self.X_
503
504            if isinstance(X, pd.DataFrame):
505                X = copy.deepcopy(X.values.astype(float))
506
507            if self.col_sample == 1:
508                input_X = X
509            else:
510                n_features = X.shape[1]
511                new_n_features = int(np.ceil(n_features * self.col_sample))
512                assert (
513                    new_n_features >= 1
514                ), "check class attribute 'col_sample' and the number of covariates provided for X"
515                np.random.seed(self.seed)
516                index_col = np.random.choice(
517                    range(n_features), size=new_n_features, replace=False
518                )
519                self.index_col_ = index_col
520                input_X = X[:, self.index_col_]
521
522        if self.n_clusters <= 0:
523            # data without any clustering: self.n_clusters is None -----
524
525            if self.n_hidden_features > 0:  # with hidden layer
526
527                self.nn_scaler_, scaled_X = mo.scale_covariates(
528                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
529                )
530                Phi_X = (
531                    self.create_layer(scaled_X)
532                    if W is None
533                    else self.create_layer(scaled_X, W=W)
534                )
535                Z = (
536                    mo.cbind(input_X, Phi_X, backend=self.backend)
537                    if self.direct_link is True
538                    else Phi_X
539                )
540                self.scaler_, scaled_Z = mo.scale_covariates(
541                    Z, choice=self.type_scaling[0], scaler=self.scaler_
542                )
543            else:  # no hidden layer
544                Z = input_X
545                self.scaler_, scaled_Z = mo.scale_covariates(
546                    Z, choice=self.type_scaling[0], scaler=self.scaler_
547                )
548
549        else:
550
551            # data with clustering: self.n_clusters is not None ----- # keep
552
553            augmented_X = mo.cbind(
554                input_X,
555                self.encode_clusters(input_X, **kwargs),
556                backend=self.backend,
557            )
558
559            if self.n_hidden_features > 0:  # with hidden layer
560
561                self.nn_scaler_, scaled_X = mo.scale_covariates(
562                    augmented_X,
563                    choice=self.type_scaling[1],
564                    scaler=self.nn_scaler_,
565                )
566                Phi_X = (
567                    self.create_layer(scaled_X)
568                    if W is None
569                    else self.create_layer(scaled_X, W=W)
570                )
571                Z = (
572                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
573                    if self.direct_link is True
574                    else Phi_X
575                )
576                self.scaler_, scaled_Z = mo.scale_covariates(
577                    Z, choice=self.type_scaling[0], scaler=self.scaler_
578                )
579            else:  # no hidden layer
580                Z = augmented_X
581                self.scaler_, scaled_Z = mo.scale_covariates(
582                    Z, choice=self.type_scaling[0], scaler=self.scaler_
583                )
584
585        # Returning model inputs -----
586        if mx.is_factor(y) is False:  # regression
587            # center y
588            if y is None:
589                self.y_mean_, centered_y = mo.center_response(self.y_)
590            else:
591                self.y_mean_, centered_y = mo.center_response(y)
592
593            # y is subsampled
594            if self.row_sample < 1:
595                n, p = Z.shape
596
597                self.subsampler_ = (
598                    SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed)
599                    if y is None
600                    else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
601                )
602
603                self.index_row_ = self.subsampler_.subsample()
604
605                n_row_sample = len(self.index_row_)
606                # regression
607                return (
608                    centered_y[self.index_row_].reshape(n_row_sample),
609                    self.scaler_.transform(
610                        Z[self.index_row_, :].reshape(n_row_sample, p)
611                    ),
612                )
613            # y is not subsampled
614            # regression
615            return (centered_y, self.scaler_.transform(Z))
616
617        # classification
618        # y is subsampled
619        if self.row_sample < 1:
620            n, p = Z.shape
621
622            self.subsampler_ = (
623                SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed)
624                if y is None
625                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
626            )
627
628            self.index_row_ = self.subsampler_.subsample()
629
630            n_row_sample = len(self.index_row_)
631            # classification
632            return (
633                y[self.index_row_].reshape(n_row_sample),
634                self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)),
635            )
636        # y is not subsampled
637        # classification
638        return (y, self.scaler_.transform(Z))

Create new hidden features for training set, with hidden layer, center the response.

Parameters:

y: array-like, shape = [n_samples]
    Target values

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer via W

Returns:

(centered response, direct link + hidden layer matrix): {tuple}
def cook_test_set(self, X, **kwargs):
640    def cook_test_set(self, X, **kwargs):
641        """Transform data from test set, with hidden layer.
642
643        Parameters:
644
645            X: {array-like}, shape = [n_samples, n_features]
646                Training vectors, where n_samples is the number
647                of samples and n_features is the number of features
648
649            **kwargs: additional parameters to be passed to self.encode_cluster
650
651        Returns:
652
653            Transformed test set : {array-like}
654        """
655
656        if isinstance(X, pd.DataFrame):
657            X = copy.deepcopy(X.values.astype(float))
658
659        if len(X.shape) == 1:
660            X = X.reshape(1, -1)
661
662        if (
663            self.n_clusters == 0
664        ):  # data without clustering: self.n_clusters is None -----
665            if self.n_hidden_features > 0:
666                # if hidden layer
667                scaled_X = (
668                    self.nn_scaler_.transform(X)
669                    if (self.col_sample == 1)
670                    else self.nn_scaler_.transform(X[:, self.index_col_])
671                )
672                Phi_X = self.create_layer(scaled_X, self.W_)
673                if self.direct_link == True:
674                    return self.scaler_.transform(
675                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
676                    )
677                # when self.direct_link == False
678                return self.scaler_.transform(Phi_X)
679            # if no hidden layer # self.n_hidden_features == 0
680            return self.scaler_.transform(X)
681
682        # data with clustering: self.n_clusters > 0 -----
683        if self.col_sample == 1:
684            predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs)
685            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
686        else:
687            predicted_clusters = self.encode_clusters(
688                X=X[:, self.index_col_], predict=True, **kwargs
689            )
690            augmented_X = mo.cbind(
691                X[:, self.index_col_], predicted_clusters, backend=self.backend
692            )
693
694        if self.n_hidden_features > 0:  # if hidden layer
695            scaled_X = self.nn_scaler_.transform(augmented_X)
696            Phi_X = self.create_layer(scaled_X, self.W_)
697            if self.direct_link == True:
698                return self.scaler_.transform(
699                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
700                )
701            return self.scaler_.transform(Phi_X)
702
703        # if no hidden layer
704        return self.scaler_.transform(augmented_X)

Transform data from test set, with hidden layer.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.encode_cluster

Returns:

Transformed test set : {array-like}
class BaseRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BaseRegressor(Base, RegressorMixin):
 16    """Random Vector Functional Link Network regression without shrinkage
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
 31            'uniform'
 32
 33        bias: boolean
 34            indicates if the hidden layer contains a bias term (True) or
 35            not (False)
 36
 37        dropout: float
 38            regularization parameter; (random) percentage of nodes dropped out
 39            of the training
 40
 41        direct_link: boolean
 42            indicates if the original features are included (True) in model's
 43            fitting or not (False)
 44
 45        n_clusters: int
 46            number of clusters for type_clust='kmeans' or type_clust='gmm'
 47            clustering (could be 0: no clustering)
 48
 49        cluster_encode: bool
 50            defines how the variable containing clusters is treated (default is one-hot);
 51            if `False`, then labels are used, without one-hot encoding
 52
 53        type_clust: str
 54            type of clustering method: currently k-means ('kmeans') or Gaussian
 55            Mixture Model ('gmm')
 56
 57        type_scaling: a tuple of 3 strings
 58            scaling methods for inputs, hidden layer, and clustering respectively
 59            (and when relevant).
 60            Currently available: standardization ('std') or MinMax scaling ('minmax')
 61
 62        col_sample: float
 63            percentage of features randomly chosen for training
 64
 65        row_sample: float
 66            percentage of rows chosen for training, by stratified bootstrapping
 67
 68        seed: int
 69            reproducibility seed for nodes_sim=='uniform', clustering and dropout
 70
 71        backend: str
 72            "cpu" or "gpu" or "tpu"
 73
 74    Attributes:
 75
 76        beta_: vector
 77            regression coefficients
 78
 79        GCV_: float
 80            Generalized Cross-Validation error
 81
 82    """
 83
 84    # construct the object -----
 85
 86    def __init__(
 87        self,
 88        n_hidden_features=5,
 89        activation_name="relu",
 90        a=0.01,
 91        nodes_sim="sobol",
 92        bias=True,
 93        dropout=0,
 94        direct_link=True,
 95        n_clusters=2,
 96        cluster_encode=True,
 97        type_clust="kmeans",
 98        type_scaling=("std", "std", "std"),
 99        col_sample=1,
100        row_sample=1,
101        seed=123,
102        backend="cpu",
103    ):
104        super().__init__(
105            n_hidden_features=n_hidden_features,
106            activation_name=activation_name,
107            a=a,
108            nodes_sim=nodes_sim,
109            bias=bias,
110            dropout=dropout,
111            direct_link=direct_link,
112            n_clusters=n_clusters,
113            cluster_encode=cluster_encode,
114            type_clust=type_clust,
115            type_scaling=type_scaling,
116            col_sample=col_sample,
117            row_sample=row_sample,
118            seed=seed,
119            backend=backend,
120        )
121
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend)
144
145        self.beta_ = fit_obj["beta_hat"]
146
147        self.GCV_ = fit_obj["GCV"]
148
149        return self
150
151    def predict(self, X, **kwargs):
152        """Predict test data X.
153
154        Parameters:
155
156            X: {array-like}, shape = [n_samples, n_features]
157                Training vectors, where n_samples is the number
158                of samples and n_features is the number of features
159
160            **kwargs: additional parameters to be passed to self.cook_test_set
161
162        Returns:
163
164            model predictions: {array-like}
165        """
166
167        if len(X.shape) == 1:
168            n_features = X.shape[0]
169            new_X = mo.rbind(
170                X.reshape(1, n_features),
171                np.ones(n_features).reshape(1, n_features),
172            )
173
174            return (
175                self.y_mean_
176                + mo.safe_sparse_dot(
177                    a=self.cook_test_set(new_X, **kwargs),
178                    b=self.beta_,
179                    backend=self.backend,
180                )
181            )[0]
182
183        return self.y_mean_ + mo.safe_sparse_dot(
184            a=self.cook_test_set(X, **kwargs),
185            b=self.beta_,
186            backend=self.backend,
187        )

Random Vector Functional Link Network regression without shrinkage

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: vector
    regression coefficients

GCV_: float
    Generalized Cross-Validation error
def fit(self, X, y, **kwargs):
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend)
144
145        self.beta_ = fit_obj["beta_hat"]
146
147        self.GCV_ = fit_obj["GCV"]
148
149        return self

Fit BaseRegressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to self.cook_training_set

Returns:

self: object
def predict(self, X, **kwargs):
151    def predict(self, X, **kwargs):
152        """Predict test data X.
153
154        Parameters:
155
156            X: {array-like}, shape = [n_samples, n_features]
157                Training vectors, where n_samples is the number
158                of samples and n_features is the number of features
159
160            **kwargs: additional parameters to be passed to self.cook_test_set
161
162        Returns:
163
164            model predictions: {array-like}
165        """
166
167        if len(X.shape) == 1:
168            n_features = X.shape[0]
169            new_X = mo.rbind(
170                X.reshape(1, n_features),
171                np.ones(n_features).reshape(1, n_features),
172            )
173
174            return (
175                self.y_mean_
176                + mo.safe_sparse_dot(
177                    a=self.cook_test_set(new_X, **kwargs),
178                    b=self.beta_,
179                    backend=self.backend,
180                )
181            )[0]
182
183        return self.y_mean_ + mo.safe_sparse_dot(
184            a=self.cook_test_set(X, **kwargs),
185            b=self.beta_,
186            backend=self.backend,
187        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFLRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFLRegressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with one prior
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s: float
 61            std. dev. of regression parameters in Bayesian Ridge Regression
 62
 63        sigma: float
 64            std. dev. of residuals in Bayesian Ridge Regression
 65
 66        return_std: boolean
 67            if True, uncertainty around predictions is evaluated
 68
 69        backend: str
 70            "cpu" or "gpu" or "tpu"
 71
 72    Attributes:
 73
 74        beta_: array-like
 75            regression''s coefficients
 76
 77        Sigma_: array-like
 78            covariance of the distribution of fitted parameters
 79
 80        GCV_: float
 81            Generalized cross-validation error
 82
 83        y_mean_: float
 84            average response
 85
 86    Examples:
 87
 88    ```python
 89    TBD
 90    ```
 91
 92    """
 93
 94    # construct the object -----
 95
 96    def __init__(
 97        self,
 98        n_hidden_features=5,
 99        activation_name="relu",
100        a=0.01,
101        nodes_sim="sobol",
102        bias=True,
103        dropout=0,
104        direct_link=True,
105        n_clusters=2,
106        cluster_encode=True,
107        type_clust="kmeans",
108        type_scaling=("std", "std", "std"),
109        seed=123,
110        s=0.1,
111        sigma=0.05,
112        return_std=True,
113        backend="cpu",
114    ):
115        super().__init__(
116            n_hidden_features=n_hidden_features,
117            activation_name=activation_name,
118            a=a,
119            nodes_sim=nodes_sim,
120            bias=bias,
121            dropout=dropout,
122            direct_link=direct_link,
123            n_clusters=n_clusters,
124            cluster_encode=cluster_encode,
125            type_clust=type_clust,
126            type_scaling=type_scaling,
127            seed=seed,
128            backend=backend,
129        )
130        self.s = s
131        self.sigma = sigma
132        self.beta_ = None
133        self.Sigma_ = None
134        self.GCV_ = None
135        self.return_std = return_std
136
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self
178
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with one prior

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s: float
    std. dev. of regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self

Fit BayesianRVFLRegressor to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFL2Regressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFL2Regressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with two priors
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s1: float
 61            std. dev. of init. regression parameters in Bayesian Ridge Regression
 62
 63        s2: float
 64            std. dev. of augmented regression parameters in Bayesian Ridge Regression
 65
 66        sigma: float
 67            std. dev. of residuals in Bayesian Ridge Regression
 68
 69        return_std: boolean
 70            if True, uncertainty around predictions is evaluated
 71
 72        backend: str
 73            "cpu" or "gpu" or "tpu"
 74
 75    Attributes:
 76
 77        beta_: array-like
 78            regression''s coefficients
 79
 80        Sigma_: array-like
 81            covariance of the distribution of fitted parameters
 82
 83        GCV_: float
 84            Generalized cross-validation error
 85
 86        y_mean_: float
 87            average response
 88
 89    Examples:
 90
 91    ```python
 92    TBD
 93    ```
 94
 95    """
 96
 97    # construct the object -----
 98
 99    def __init__(
100        self,
101        n_hidden_features=5,
102        activation_name="relu",
103        a=0.01,
104        nodes_sim="sobol",
105        bias=True,
106        dropout=0,
107        direct_link=True,
108        n_clusters=0,
109        cluster_encode=True,
110        type_clust="kmeans",
111        type_scaling=("std", "std", "std"),
112        seed=123,
113        s1=0.1,
114        s2=0.1,
115        sigma=0.05,
116        return_std=True,
117        backend="cpu",
118    ):
119        super().__init__(
120            n_hidden_features=n_hidden_features,
121            activation_name=activation_name,
122            a=a,
123            nodes_sim=nodes_sim,
124            bias=bias,
125            dropout=dropout,
126            direct_link=direct_link,
127            n_clusters=n_clusters,
128            cluster_encode=cluster_encode,
129            type_clust=type_clust,
130            type_scaling=type_scaling,
131            seed=seed,
132            backend=backend,
133        )
134
135        self.s1 = s1
136        self.s2 = s2
137        self.sigma = sigma
138        self.beta_ = None
139        self.Sigma_ = None
140        self.GCV_ = None
141        self.return_std = return_std
142
143    def fit(self, X, y, **kwargs):
144        """Fit BayesianRVFL2Regressor to training data (X, y)
145
146        Parameters:
147
148            X: {array-like}, shape = [n_samples, n_features]
149                Training vectors, where n_samples is the number
150                of samples and n_features is the number of features
151
152            y: array-like, shape = [n_samples]
153                Target values
154
155            **kwargs: additional parameters to be passed to
156                    self.cook_training_set
157
158        Returns:
159
160            self: object
161
162        """
163
164        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
165
166        n, p = X.shape
167        q = self.n_hidden_features
168
169        if self.direct_link == True:
170            r = p + self.n_clusters
171
172            block11 = (self.s1**2) * np.eye(r)
173            block12 = np.zeros((r, q))
174            block21 = np.zeros((q, r))
175            block22 = (self.s2**2) * np.eye(q)
176
177            Sigma_prior = mo.rbind(
178                x=mo.cbind(x=block11, y=block12, backend=self.backend),
179                y=mo.cbind(x=block21, y=block22, backend=self.backend),
180                backend=self.backend,
181            )
182
183        else:
184            Sigma_prior = (self.s2**2) * np.eye(q)
185
186        fit_obj = lmf.beta_Sigma_hat_rvfl2(
187            X=scaled_Z,
188            y=centered_y,
189            Sigma=Sigma_prior,
190            sigma=self.sigma,
191            fit_intercept=False,
192            return_cov=self.return_std,
193            backend=self.backend,
194        )
195
196        self.beta_ = fit_obj["beta_hat"]
197
198        if self.return_std == True:
199            self.Sigma_ = fit_obj["Sigma_hat"]
200
201        self.GCV_ = fit_obj["GCV"]
202
203        return self
204
205    def predict(self, X, return_std=False, **kwargs):
206        """Predict test data X.
207
208        Parameters:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            return_std: {boolean}, standard dev. is returned or not
215
216            **kwargs: additional parameters to be passed to
217                    self.cook_test_set
218
219        Returns:
220
221            model predictions: {array-like}
222
223        """
224
225        if len(X.shape) == 1:  # one observation in the test set only
226            n_features = X.shape[0]
227            new_X = mo.rbind(
228                x=X.reshape(1, n_features),
229                y=np.ones(n_features).reshape(1, n_features),
230                backend=self.backend,
231            )
232
233        self.return_std = return_std
234
235        if self.return_std == False:
236            if len(X.shape) == 1:
237                return (
238                    self.y_mean_
239                    + mo.safe_sparse_dot(
240                        self.cook_test_set(new_X, **kwargs),
241                        self.beta_,
242                        backend=self.backend,
243                    )
244                )[0]
245
246            return self.y_mean_ + mo.safe_sparse_dot(
247                self.cook_test_set(X, **kwargs),
248                self.beta_,
249                backend=self.backend,
250            )
251
252        else:  # confidence interval required for preds?
253            if len(X.shape) == 1:
254                Z = self.cook_test_set(new_X, **kwargs)
255
256                pred_obj = lmf.beta_Sigma_hat_rvfl2(
257                    X_star=Z,
258                    return_cov=self.return_std,
259                    beta_hat_=self.beta_,
260                    Sigma_hat_=self.Sigma_,
261                    backend=self.backend,
262                )
263
264                return (
265                    self.y_mean_ + pred_obj["preds"][0],
266                    pred_obj["preds_std"][0],
267                )
268
269            Z = self.cook_test_set(X, **kwargs)
270
271            pred_obj = lmf.beta_Sigma_hat_rvfl2(
272                X_star=Z,
273                return_cov=self.return_std,
274                beta_hat_=self.beta_,
275                Sigma_hat_=self.Sigma_,
276                backend=self.backend,
277            )
278
279            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with two priors

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s1: float
    std. dev. of init. regression parameters in Bayesian Ridge Regression

s2: float
    std. dev. of augmented regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
143    def fit(self, X, y, **kwargs):
144        """Fit BayesianRVFL2Regressor to training data (X, y)
145
146        Parameters:
147
148            X: {array-like}, shape = [n_samples, n_features]
149                Training vectors, where n_samples is the number
150                of samples and n_features is the number of features
151
152            y: array-like, shape = [n_samples]
153                Target values
154
155            **kwargs: additional parameters to be passed to
156                    self.cook_training_set
157
158        Returns:
159
160            self: object
161
162        """
163
164        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
165
166        n, p = X.shape
167        q = self.n_hidden_features
168
169        if self.direct_link == True:
170            r = p + self.n_clusters
171
172            block11 = (self.s1**2) * np.eye(r)
173            block12 = np.zeros((r, q))
174            block21 = np.zeros((q, r))
175            block22 = (self.s2**2) * np.eye(q)
176
177            Sigma_prior = mo.rbind(
178                x=mo.cbind(x=block11, y=block12, backend=self.backend),
179                y=mo.cbind(x=block21, y=block22, backend=self.backend),
180                backend=self.backend,
181            )
182
183        else:
184            Sigma_prior = (self.s2**2) * np.eye(q)
185
186        fit_obj = lmf.beta_Sigma_hat_rvfl2(
187            X=scaled_Z,
188            y=centered_y,
189            Sigma=Sigma_prior,
190            sigma=self.sigma,
191            fit_intercept=False,
192            return_cov=self.return_std,
193            backend=self.backend,
194        )
195
196        self.beta_ = fit_obj["beta_hat"]
197
198        if self.return_std == True:
199            self.Sigma_ = fit_obj["Sigma_hat"]
200
201        self.GCV_ = fit_obj["GCV"]
202
203        return self

Fit BayesianRVFL2Regressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
205    def predict(self, X, return_std=False, **kwargs):
206        """Predict test data X.
207
208        Parameters:
209
210            X: {array-like}, shape = [n_samples, n_features]
211                Training vectors, where n_samples is the number
212                of samples and n_features is the number of features.
213
214            return_std: {boolean}, standard dev. is returned or not
215
216            **kwargs: additional parameters to be passed to
217                    self.cook_test_set
218
219        Returns:
220
221            model predictions: {array-like}
222
223        """
224
225        if len(X.shape) == 1:  # one observation in the test set only
226            n_features = X.shape[0]
227            new_X = mo.rbind(
228                x=X.reshape(1, n_features),
229                y=np.ones(n_features).reshape(1, n_features),
230                backend=self.backend,
231            )
232
233        self.return_std = return_std
234
235        if self.return_std == False:
236            if len(X.shape) == 1:
237                return (
238                    self.y_mean_
239                    + mo.safe_sparse_dot(
240                        self.cook_test_set(new_X, **kwargs),
241                        self.beta_,
242                        backend=self.backend,
243                    )
244                )[0]
245
246            return self.y_mean_ + mo.safe_sparse_dot(
247                self.cook_test_set(X, **kwargs),
248                self.beta_,
249                backend=self.backend,
250            )
251
252        else:  # confidence interval required for preds?
253            if len(X.shape) == 1:
254                Z = self.cook_test_set(new_X, **kwargs)
255
256                pred_obj = lmf.beta_Sigma_hat_rvfl2(
257                    X_star=Z,
258                    return_cov=self.return_std,
259                    beta_hat_=self.beta_,
260                    Sigma_hat_=self.Sigma_,
261                    backend=self.backend,
262                )
263
264                return (
265                    self.y_mean_ + pred_obj["preds"][0],
266                    pred_obj["preds_std"][0],
267                )
268
269            Z = self.cook_test_set(X, **kwargs)
270
271            pred_obj = lmf.beta_Sigma_hat_rvfl2(
272                X_star=Z,
273                return_cov=self.return_std,
274                beta_hat_=self.beta_,
275                Sigma_hat_=self.Sigma_,
276                backend=self.backend,
277            )
278
279            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class ClassicalMTS(nnetsauce.Base):
 42class ClassicalMTS(Base):
 43    """Multivariate time series (FactorMTS) forecasting with Factor models
 44
 45    Parameters:
 46
 47        model: type of model: str.
 48            currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
 49
 50    Attributes:
 51
 52        df_: data frame
 53            the input data frame, in case a data.frame is provided to `fit`
 54
 55        level_: int
 56            level of confidence for prediction intervals (default is 95)
 57
 58    Examples:
 59    See examples/classical_mts_timeseries.py
 60    """
 61
 62    # construct the object -----
 63
 64    def __init__(self, model="VAR"):
 65
 66        self.model = model
 67        if self.model == "VAR":
 68            self.obj = VAR
 69        elif self.model == "VECM":
 70            self.obj = VECM
 71        elif self.model == "ARIMA":
 72            self.obj = ARIMA
 73        elif self.model == "ETS":
 74            self.obj = ExponentialSmoothing
 75        elif self.model == "Theta":
 76            self.obj = ThetaModel
 77        else:
 78            raise ValueError("model not recognized")
 79        self.n_series = None
 80        self.replications = None
 81        self.mean_ = None
 82        self.upper_ = None
 83        self.lower_ = None
 84        self.output_dates_ = None
 85        self.alpha_ = None
 86        self.df_ = None
 87        self.residuals_ = []
 88        self.sims_ = None
 89        self.level_ = None
 90
 91    def fit(self, X, **kwargs):
 92        """Fit FactorMTS model to training data X, with optional regressors xreg
 93
 94        Parameters:
 95
 96        X: {array-like}, shape = [n_samples, n_features]
 97            Training time series, where n_samples is the number
 98            of samples and n_features is the number of features;
 99            X must be in increasing order (most recent observations last)
100
101        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
102
103        Returns:
104
105        self: object
106        """
107
108        try:
109            self.n_series = X.shape[1]
110        except Exception:
111            self.n_series = 1
112
113        if (isinstance(X, pd.DataFrame) is False) and isinstance(
114            X, pd.Series
115        ) is False:  # input data set is a numpy array
116
117            X = pd.DataFrame(X)
118            if self.n_series > 1:
119                self.series_names = ["series" + str(i) for i in range(X.shape[1])]
120            else:
121                self.series_names = "series0"
122
123        else:  # input data set is a DataFrame or Series with column names
124
125            X_index = None
126            if X.index is not None and len(X.shape) > 1:
127                X_index = X.index
128                X = copy.deepcopy(mo.convert_df_to_numeric(X))
129            if X_index is not None:
130                try:
131                    X.index = X_index
132                except Exception:
133                    pass
134            if isinstance(X, pd.DataFrame):
135                self.series_names = X.columns.tolist()
136            else:
137                self.series_names = X.name
138
139        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
140            self.df_ = X
141            X = X.values
142            self.df_.columns = self.series_names
143            self.input_dates = ts.compute_input_dates(self.df_)
144        else:
145            self.df_ = pd.DataFrame(X, columns=self.series_names)
146
147        if self.model == "Theta":
148            self.obj = self.obj(self.df_, **kwargs).fit()
149        else:
150            self.obj = self.obj(X, **kwargs).fit(**kwargs)
151
152        return self
153
154    def predict(self, h=5, level=95, **kwargs):
155        """Forecast all the time series, h steps ahead
156
157        Parameters:
158
159        h: {integer}
160            Forecasting horizon
161
162        **kwargs: additional parameters to be passed to
163                self.cook_test_set
164
165        Returns:
166
167        model predictions for horizon = h: {array-like}
168
169        """
170
171        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
172
173        self.level_ = level
174
175        self.lower_ = None  # do not remove (/!\)
176
177        self.upper_ = None  # do not remove (/!\)
178
179        self.sims_ = None  # do not remove (/!\)
180
181        self.level_ = level
182
183        self.alpha_ = 100 - level
184
185        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
186
187        # Named tuple for forecast results
188        DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
189
190        if self.model == "VAR":
191            mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval(
192                self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
193            )
194
195        elif self.model == "VECM":
196            forecast_result = self.obj.predict(steps=h)
197            mean_forecast = forecast_result
198            lower_bound, upper_bound = self._compute_confidence_intervals(
199                forecast_result, alpha=self.alpha_ / 100, **kwargs
200            )
201
202        elif self.model == "ARIMA":
203            forecast_result = self.obj.get_forecast(steps=h)
204            mean_forecast = forecast_result.predicted_mean
205            lower_bound = forecast_result.conf_int()[:, 0]
206            upper_bound = forecast_result.conf_int()[:, 1]
207
208        elif self.model == "ETS":
209            forecast_result = self.obj.forecast(steps=h)
210            residuals = self.obj.resid
211            std_errors = np.std(residuals)
212            mean_forecast = forecast_result
213            lower_bound = forecast_result - pi_multiplier * std_errors
214            upper_bound = forecast_result + pi_multiplier * std_errors
215
216        elif self.model == "Theta":
217            try:
218                mean_forecast = self.obj.forecast(steps=h).values
219                forecast_result = self.obj.prediction_intervals(
220                    steps=h, alpha=self.alpha_ / 100, **kwargs
221                )
222                lower_bound = forecast_result["lower"].values
223                upper_bound = forecast_result["upper"].values
224            except Exception:
225                mean_forecast = self.obj.forecast(steps=h)
226                forecast_result = self.obj.prediction_intervals(
227                    steps=h, alpha=self.alpha_ / 100, **kwargs
228                )
229                lower_bound = forecast_result["lower"]
230                upper_bound = forecast_result["upper"]
231
232        else:
233
234            raise ValueError("model not recognized")
235
236        try:
237            self.mean_ = pd.DataFrame(
238                mean_forecast,
239                columns=self.series_names,
240                index=self.output_dates_,
241            )
242            self.lower_ = pd.DataFrame(
243                lower_bound, columns=self.series_names, index=self.output_dates_
244            )
245            self.upper_ = pd.DataFrame(
246                upper_bound, columns=self.series_names, index=self.output_dates_
247            )
248        except Exception:
249            self.mean_ = pd.Series(
250                mean_forecast, name=self.series_names, index=self.output_dates_
251            )
252            self.lower_ = pd.Series(
253                lower_bound, name=self.series_names, index=self.output_dates_
254            )
255            self.upper_ = pd.Series(
256                upper_bound, name=self.series_names, index=self.output_dates_
257            )
258
259        return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_)
260
261    def _compute_confidence_intervals(self, forecast_result, alpha):
262        """
263        Compute confidence intervals for VECM forecasts.
264        Uses the covariance of residuals to approximate the confidence intervals.
265        """
266        residuals = self.obj.resid
267        cov_matrix = np.cov(residuals.T)  # Covariance matrix of residuals
268        std_errors = np.sqrt(np.diag(cov_matrix))  # Standard errors
269
270        z_value = norm.ppf(1 - alpha / 2)  # Z-score for the given alpha level
271        lower_bound = forecast_result - z_value * std_errors
272        upper_bound = forecast_result + z_value * std_errors
273
274        return lower_bound, upper_bound
275
276    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
277        """Train on training_index, score on testing_index."""
278
279        assert (
280            bool(set(training_index).intersection(set(testing_index))) == False
281        ), "Non-overlapping 'training_index' and 'testing_index' required"
282
283        # Dimensions
284        try:
285            # multivariate time series
286            n, p = X.shape
287        except:
288            # univariate time series
289            n = X.shape[0]
290            p = 1
291
292        # Training and testing sets
293        if p > 1:
294            X_train = X[training_index, :]
295            X_test = X[testing_index, :]
296        else:
297            X_train = X[training_index]
298            X_test = X[testing_index]
299
300        # Horizon
301        h = len(testing_index)
302        assert (
303            len(training_index) + h
304        ) <= n, "Please check lengths of training and testing windows"
305
306        # Fit and predict
307        self.fit(X_train, **kwargs)
308        preds = self.predict(h=h, **kwargs)
309
310        if scoring is None:
311            scoring = "neg_root_mean_squared_error"
312
313        # check inputs
314        assert scoring in (
315            "explained_variance",
316            "neg_mean_absolute_error",
317            "neg_mean_squared_error",
318            "neg_root_mean_squared_error",
319            "neg_mean_squared_log_error",
320            "neg_median_absolute_error",
321            "r2",
322        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
323                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
324                               'neg_median_absolute_error', 'r2')"
325
326        scoring_options = {
327            "explained_variance": skm2.explained_variance_score,
328            "neg_mean_absolute_error": skm2.mean_absolute_error,
329            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
330            "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)),
331            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
332            "neg_median_absolute_error": skm2.median_absolute_error,
333            "r2": skm2.r2_score,
334        }
335
336        # if p > 1:
337        #     return tuple(
338        #         [
339        #             scoring_options[scoring](
340        #                 X_test[:, i], preds[:, i]#, **kwargs
341        #             )
342        #             for i in range(p)
343        #         ]
344        #     )
345        # else:
346        return scoring_options[scoring](X_test, preds)
347
348    def plot(self, series=None, type_axis="dates", type_plot="pi"):
349        """Plot time series forecast
350
351        Parameters:
352
353        series: {integer} or {string}
354            series index or name
355
356        """
357
358        assert all(
359            [
360                self.mean_ is not None,
361                self.lower_ is not None,
362                self.upper_ is not None,
363                self.output_dates_ is not None,
364            ]
365        ), "model forecasting must be obtained first (with predict)"
366
367        if series is None:
368            assert (
369                self.n_series == 1
370            ), "please specify series index or name (n_series > 1)"
371            series = 0
372
373        if isinstance(series, str):
374            assert (
375                series in self.series_names
376            ), f"series {series} doesn't exist in the input dataset"
377            series_idx = self.df_.columns.get_loc(series)
378        else:
379            assert isinstance(series, int) and (
380                0 <= series < self.n_series
381            ), f"check series index (< {self.n_series})"
382            series_idx = series
383
384        if isinstance(self.df_, pd.DataFrame):
385            y_all = list(self.df_.iloc[:, series_idx]) + list(
386                self.mean_.iloc[:, series_idx]
387            )
388            y_test = list(self.mean_.iloc[:, series_idx])
389        else:
390            y_all = list(self.df_.values) + list(self.mean_.values)
391            y_test = list(self.mean_.values)
392        n_points_all = len(y_all)
393        n_points_train = self.df_.shape[0]
394
395        if type_axis == "numeric":
396            x_all = [i for i in range(n_points_all)]
397            x_test = [i for i in range(n_points_train, n_points_all)]
398
399        if type_axis == "dates":  # use dates
400            x_all = np.concatenate(
401                (self.input_dates.values, self.output_dates_.values), axis=None
402            )
403            x_test = self.output_dates_.values
404
405        if type_plot == "pi":
406            fig, ax = plt.subplots()
407            ax.plot(x_all, y_all, "-")
408            ax.plot(x_test, y_test, "-", color="orange")
409            try:
410                ax.fill_between(
411                    x_test,
412                    self.lower_.iloc[:, series_idx],
413                    self.upper_.iloc[:, series_idx],
414                    alpha=0.2,
415                    color="orange",
416                )
417            except Exception:
418                ax.fill_between(
419                    x_test,
420                    self.lower_.values,
421                    self.upper_.values,
422                    alpha=0.2,
423                    color="orange",
424                )
425            if self.replications is None:
426                if self.n_series > 1:
427                    plt.title(
428                        f"prediction intervals for {series}",
429                        loc="left",
430                        fontsize=12,
431                        fontweight=0,
432                        color="black",
433                    )
434                else:
435                    plt.title(
436                        f"prediction intervals for input time series",
437                        loc="left",
438                        fontsize=12,
439                        fontweight=0,
440                        color="black",
441                    )
442                plt.show()
443            else:  # self.replications is not None
444                if self.n_series > 1:
445                    plt.title(
446                        f"prediction intervals for {self.replications} simulations of {series}",
447                        loc="left",
448                        fontsize=12,
449                        fontweight=0,
450                        color="black",
451                    )
452                else:
453                    plt.title(
454                        f"prediction intervals for {self.replications} simulations of input time series",
455                        loc="left",
456                        fontsize=12,
457                        fontweight=0,
458                        color="black",
459                    )
460                plt.show()
461
462        if type_plot == "spaghetti":
463            palette = plt.get_cmap("Set1")
464            sims_ix = getsims(self.sims_, series_idx)
465            plt.plot(x_all, y_all, "-")
466            for col_ix in range(
467                sims_ix.shape[1]
468            ):  # avoid this when there are thousands of simulations
469                plt.plot(
470                    x_test,
471                    sims_ix[:, col_ix],
472                    "-",
473                    color=palette(col_ix),
474                    linewidth=1,
475                    alpha=0.9,
476                )
477            plt.plot(x_all, y_all, "-", color="black")
478            plt.plot(x_test, y_test, "-", color="blue")
479            # Add titles
480            if self.n_series > 1:
481                plt.title(
482                    f"{self.replications} simulations of {series}",
483                    loc="left",
484                    fontsize=12,
485                    fontweight=0,
486                    color="black",
487                )
488            else:
489                plt.title(
490                    f"{self.replications} simulations of input time series",
491                    loc="left",
492                    fontsize=12,
493                    fontweight=0,
494                    color="black",
495                )
496            plt.xlabel("Time")
497            plt.ylabel("Values")
498            # Show the graph
499            plt.show()
500
501    def cross_val_score(
502        self,
503        X,
504        scoring="root_mean_squared_error",
505        n_jobs=None,
506        verbose=0,
507        xreg=None,
508        initial_window=5,
509        horizon=3,
510        fixed_window=False,
511        show_progress=True,
512        level=95,
513        **kwargs,
514    ):
515        """Evaluate a score by time series cross-validation.
516
517        Parameters:
518
519            X: {array-like, sparse matrix} of shape (n_samples, n_features)
520                The data to fit.
521
522            scoring: str or a function
523                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
524                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
525                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
526                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
527
528            n_jobs: int, default=None
529                Number of jobs to run in parallel.
530
531            verbose: int, default=0
532                The verbosity level.
533
534            xreg: array-like, optional (default=None)
535                Additional (external) regressors to be passed to `fit`
536                xreg must be in 'increasing' order (most recent observations last)
537
538            initial_window: int
539                initial number of consecutive values in each training set sample
540
541            horizon: int
542                number of consecutive values in test set sample
543
544            fixed_window: boolean
545                if False, all training samples start at index 0, and the training
546                window's size is increasing.
547                if True, the training window's size is fixed, and the window is
548                rolling forward
549
550            show_progress: boolean
551                if True, a progress bar is printed
552
553            **kwargs: dict
554                additional parameters to be passed to `fit` and `predict`
555
556        Returns:
557
558            A tuple: descriptive statistics or errors and raw errors
559
560        """
561        tscv = TimeSeriesSplit()
562
563        tscv_obj = tscv.split(
564            X,
565            initial_window=initial_window,
566            horizon=horizon,
567            fixed_window=fixed_window,
568        )
569
570        if isinstance(scoring, str):
571
572            assert scoring in (
573                "root_mean_squared_error",
574                "mean_squared_error",
575                "mean_error",
576                "mean_absolute_error",
577                "mean_percentage_error",
578                "mean_absolute_percentage_error",
579                "winkler_score",
580                "coverage",
581            ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
582
583            def err_func(X_test, X_pred, scoring):
584                if (self.replications is not None) or (
585                    self.type_pi == "gaussian"
586                ):  # probabilistic
587                    if scoring == "winkler_score":
588                        return winkler_score(X_pred, X_test, level=level)
589                    elif scoring == "coverage":
590                        return coverage(X_pred, X_test, level=level)
591                    else:
592                        return mean_errors(
593                            pred=X_pred.mean, actual=X_test, scoring=scoring
594                        )
595                else:  # not probabilistic
596                    return mean_errors(pred=X_pred, actual=X_test, scoring=scoring)
597
598        else:  # isinstance(scoring, str) = False
599
600            err_func = scoring
601
602        errors = []
603
604        train_indices = []
605
606        test_indices = []
607
608        for train_index, test_index in tscv_obj:
609            train_indices.append(train_index)
610            test_indices.append(test_index)
611
612        if show_progress is True:
613            iterator = tqdm(zip(train_indices, test_indices), total=len(train_indices))
614        else:
615            iterator = zip(train_indices, test_indices)
616
617        for train_index, test_index in iterator:
618
619            if verbose == 1:
620                print(f"TRAIN: {train_index}")
621                print(f"TEST: {test_index}")
622
623            if isinstance(X, pd.DataFrame):
624                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
625                X_test = X.iloc[test_index, :]
626            else:
627                self.fit(X[train_index, :], xreg=xreg, **kwargs)
628                X_test = X[test_index, :]
629            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
630
631            errors.append(err_func(X_test, X_pred, scoring))
632
633        res = np.asarray(errors)
634
635        return res, describe(res)

Multivariate time series (FactorMTS) forecasting with Factor models

Parameters:

model: type of model: str.
    currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'

Attributes:

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

level_: int
    level of confidence for prediction intervals (default is 95)

Examples: See examples/classical_mts_timeseries.py

def fit(self, X, **kwargs):
 91    def fit(self, X, **kwargs):
 92        """Fit FactorMTS model to training data X, with optional regressors xreg
 93
 94        Parameters:
 95
 96        X: {array-like}, shape = [n_samples, n_features]
 97            Training time series, where n_samples is the number
 98            of samples and n_features is the number of features;
 99            X must be in increasing order (most recent observations last)
100
101        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
102
103        Returns:
104
105        self: object
106        """
107
108        try:
109            self.n_series = X.shape[1]
110        except Exception:
111            self.n_series = 1
112
113        if (isinstance(X, pd.DataFrame) is False) and isinstance(
114            X, pd.Series
115        ) is False:  # input data set is a numpy array
116
117            X = pd.DataFrame(X)
118            if self.n_series > 1:
119                self.series_names = ["series" + str(i) for i in range(X.shape[1])]
120            else:
121                self.series_names = "series0"
122
123        else:  # input data set is a DataFrame or Series with column names
124
125            X_index = None
126            if X.index is not None and len(X.shape) > 1:
127                X_index = X.index
128                X = copy.deepcopy(mo.convert_df_to_numeric(X))
129            if X_index is not None:
130                try:
131                    X.index = X_index
132                except Exception:
133                    pass
134            if isinstance(X, pd.DataFrame):
135                self.series_names = X.columns.tolist()
136            else:
137                self.series_names = X.name
138
139        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
140            self.df_ = X
141            X = X.values
142            self.df_.columns = self.series_names
143            self.input_dates = ts.compute_input_dates(self.df_)
144        else:
145            self.df_ = pd.DataFrame(X, columns=self.series_names)
146
147        if self.model == "Theta":
148            self.obj = self.obj(self.df_, **kwargs).fit()
149        else:
150            self.obj = self.obj(X, **kwargs).fit(**kwargs)
151
152        return self

Fit FactorMTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, **kwargs):
154    def predict(self, h=5, level=95, **kwargs):
155        """Forecast all the time series, h steps ahead
156
157        Parameters:
158
159        h: {integer}
160            Forecasting horizon
161
162        **kwargs: additional parameters to be passed to
163                self.cook_test_set
164
165        Returns:
166
167        model predictions for horizon = h: {array-like}
168
169        """
170
171        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
172
173        self.level_ = level
174
175        self.lower_ = None  # do not remove (/!\)
176
177        self.upper_ = None  # do not remove (/!\)
178
179        self.sims_ = None  # do not remove (/!\)
180
181        self.level_ = level
182
183        self.alpha_ = 100 - level
184
185        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
186
187        # Named tuple for forecast results
188        DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper"))
189
190        if self.model == "VAR":
191            mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval(
192                self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
193            )
194
195        elif self.model == "VECM":
196            forecast_result = self.obj.predict(steps=h)
197            mean_forecast = forecast_result
198            lower_bound, upper_bound = self._compute_confidence_intervals(
199                forecast_result, alpha=self.alpha_ / 100, **kwargs
200            )
201
202        elif self.model == "ARIMA":
203            forecast_result = self.obj.get_forecast(steps=h)
204            mean_forecast = forecast_result.predicted_mean
205            lower_bound = forecast_result.conf_int()[:, 0]
206            upper_bound = forecast_result.conf_int()[:, 1]
207
208        elif self.model == "ETS":
209            forecast_result = self.obj.forecast(steps=h)
210            residuals = self.obj.resid
211            std_errors = np.std(residuals)
212            mean_forecast = forecast_result
213            lower_bound = forecast_result - pi_multiplier * std_errors
214            upper_bound = forecast_result + pi_multiplier * std_errors
215
216        elif self.model == "Theta":
217            try:
218                mean_forecast = self.obj.forecast(steps=h).values
219                forecast_result = self.obj.prediction_intervals(
220                    steps=h, alpha=self.alpha_ / 100, **kwargs
221                )
222                lower_bound = forecast_result["lower"].values
223                upper_bound = forecast_result["upper"].values
224            except Exception:
225                mean_forecast = self.obj.forecast(steps=h)
226                forecast_result = self.obj.prediction_intervals(
227                    steps=h, alpha=self.alpha_ / 100, **kwargs
228                )
229                lower_bound = forecast_result["lower"]
230                upper_bound = forecast_result["upper"]
231
232        else:
233
234            raise ValueError("model not recognized")
235
236        try:
237            self.mean_ = pd.DataFrame(
238                mean_forecast,
239                columns=self.series_names,
240                index=self.output_dates_,
241            )
242            self.lower_ = pd.DataFrame(
243                lower_bound, columns=self.series_names, index=self.output_dates_
244            )
245            self.upper_ = pd.DataFrame(
246                upper_bound, columns=self.series_names, index=self.output_dates_
247            )
248        except Exception:
249            self.mean_ = pd.Series(
250                mean_forecast, name=self.series_names, index=self.output_dates_
251            )
252            self.lower_ = pd.Series(
253                lower_bound, name=self.series_names, index=self.output_dates_
254            )
255            self.upper_ = pd.Series(
256                upper_bound, name=self.series_names, index=self.output_dates_
257            )
258
259        return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_)

Forecast all the time series, h steps ahead

Parameters:

h: {integer} Forecasting horizon

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions for horizon = h: {array-like}

def score(self, X, training_index, testing_index, scoring=None, **kwargs):
276    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
277        """Train on training_index, score on testing_index."""
278
279        assert (
280            bool(set(training_index).intersection(set(testing_index))) == False
281        ), "Non-overlapping 'training_index' and 'testing_index' required"
282
283        # Dimensions
284        try:
285            # multivariate time series
286            n, p = X.shape
287        except:
288            # univariate time series
289            n = X.shape[0]
290            p = 1
291
292        # Training and testing sets
293        if p > 1:
294            X_train = X[training_index, :]
295            X_test = X[testing_index, :]
296        else:
297            X_train = X[training_index]
298            X_test = X[testing_index]
299
300        # Horizon
301        h = len(testing_index)
302        assert (
303            len(training_index) + h
304        ) <= n, "Please check lengths of training and testing windows"
305
306        # Fit and predict
307        self.fit(X_train, **kwargs)
308        preds = self.predict(h=h, **kwargs)
309
310        if scoring is None:
311            scoring = "neg_root_mean_squared_error"
312
313        # check inputs
314        assert scoring in (
315            "explained_variance",
316            "neg_mean_absolute_error",
317            "neg_mean_squared_error",
318            "neg_root_mean_squared_error",
319            "neg_mean_squared_log_error",
320            "neg_median_absolute_error",
321            "r2",
322        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
323                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
324                               'neg_median_absolute_error', 'r2')"
325
326        scoring_options = {
327            "explained_variance": skm2.explained_variance_score,
328            "neg_mean_absolute_error": skm2.mean_absolute_error,
329            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
330            "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)),
331            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
332            "neg_median_absolute_error": skm2.median_absolute_error,
333            "r2": skm2.r2_score,
334        }
335
336        # if p > 1:
337        #     return tuple(
338        #         [
339        #             scoring_options[scoring](
340        #                 X_test[:, i], preds[:, i]#, **kwargs
341        #             )
342        #             for i in range(p)
343        #         ]
344        #     )
345        # else:
346        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class CustomClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 15class CustomClassifier(Custom, ClassifierMixin):
 16    """Custom Classification model
 17
 18    Attributes:
 19
 20        obj: object
 21            any object containing a method fit (obj.fit()) and a method predict
 22            (obj.predict())
 23
 24        n_hidden_features: int
 25            number of nodes in the hidden layer
 26
 27        activation_name: str
 28            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 29
 30        a: float
 31            hyperparameter for 'prelu' or 'elu' activation function
 32
 33        nodes_sim: str
 34            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 35            'uniform'
 36
 37        bias: boolean
 38            indicates if the hidden layer contains a bias term (True) or not
 39            (False)
 40
 41        dropout: float
 42            regularization parameter; (random) percentage of nodes dropped out
 43            of the training
 44
 45        direct_link: boolean
 46            indicates if the original predictors are included (True) in model''s
 47            fitting or not (False)
 48
 49        n_clusters: int
 50            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 51                no clustering)
 52
 53        cluster_encode: bool
 54            defines how the variable containing clusters is treated (default is one-hot)
 55            if `False`, then labels are used, without one-hot encoding
 56
 57        type_clust: str
 58            type of clustering method: currently k-means ('kmeans') or Gaussian
 59            Mixture Model ('gmm')
 60
 61        type_scaling: a tuple of 3 strings
 62            scaling methods for inputs, hidden layer, and clustering respectively
 63            (and when relevant).
 64            Currently available: standardization ('std') or MinMax scaling ('minmax')
 65
 66        col_sample: float
 67            percentage of covariates randomly chosen for training
 68
 69        row_sample: float
 70            percentage of rows chosen for training, by stratified bootstrapping
 71
 72        level: float
 73            confidence level for prediction sets. Default is None.
 74
 75        pi_method: str
 76            method for constructing the prediction sets: 'icp', 'tcp' if level is not None. Default is 'icp'.
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Examples:
 85
 86    Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly
 87
 88    ```python
 89    import nnetsauce as ns
 90    from sklearn.ensemble import RandomForestClassifier
 91    from sklearn.model_selection import train_test_split
 92    from sklearn.datasets import load_digits
 93    from time import time
 94
 95    digits = load_digits()
 96    X = digits.data
 97    y = digits.target
 98    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
 99                                                        random_state=123)
100
101    # layer 1 (base layer) ----
102    layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
103
104    start = time()
105
106    layer1_regr.fit(X_train, y_train)
107
108    # Accuracy in layer 1
109    print(layer1_regr.score(X_test, y_test))
110
111    # layer 2 using layer 1 ----
112    layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
113                            direct_link=True, bias=True,
114                            nodes_sim='uniform', activation_name='relu',
115                            n_clusters=2, seed=123)
116    layer2_regr.fit(X_train, y_train)
117
118    # Accuracy in layer 2
119    print(layer2_regr.score(X_test, y_test))
120
121    # layer 3 using layer 2 ----
122    layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
123                            direct_link=True, bias=True, dropout=0.7,
124                            nodes_sim='uniform', activation_name='relu',
125                            n_clusters=2, seed=123)
126    layer3_regr.fit(X_train, y_train)
127
128    # Accuracy in layer 3
129    print(layer3_regr.score(X_test, y_test))
130
131    print(f"Elapsed {time() - start}")
132    ```
133
134    """
135
136    # construct the object -----
137
138    def __init__(
139        self,
140        obj,
141        n_hidden_features=5,
142        activation_name="relu",
143        a=0.01,
144        nodes_sim="sobol",
145        bias=True,
146        dropout=0,
147        direct_link=True,
148        n_clusters=2,
149        cluster_encode=True,
150        type_clust="kmeans",
151        type_scaling=("std", "std", "std"),
152        col_sample=1,
153        row_sample=1,
154        level=None,
155        pi_method="icp",
156        seed=123,
157        backend="cpu",
158    ):
159        super().__init__(
160            obj=obj,
161            n_hidden_features=n_hidden_features,
162            activation_name=activation_name,
163            a=a,
164            nodes_sim=nodes_sim,
165            bias=bias,
166            dropout=dropout,
167            direct_link=direct_link,
168            n_clusters=n_clusters,
169            cluster_encode=cluster_encode,
170            type_clust=type_clust,
171            type_scaling=type_scaling,
172            col_sample=col_sample,
173            row_sample=row_sample,
174            seed=seed,
175            backend=backend,
176        )
177        self.level = level
178        self.pi_method = pi_method
179        self.coef_ = None
180        self.intercept_ = None
181        self.type_fit = "classification"
182        if self.level is not None:
183            self.obj = PredictionSet(self.obj, level=self.level, method=self.pi_method)
184
185    def fit(self, X, y, sample_weight=None, **kwargs):
186        """Fit custom model to training data (X, y).
187
188        Parameters:
189
190            X: {array-like}, shape = [n_samples, n_features]
191                Training vectors, where n_samples is the number
192                of samples and n_features is the number of features.
193
194            y: array-like, shape = [n_samples]
195                Target values.
196
197            sample_weight: array-like, shape = [n_samples]
198                Sample weights.
199
200            **kwargs: additional parameters to be passed to
201                        self.cook_training_set or self.obj.fit
202
203        Returns:
204
205            self: object
206        """
207
208        if len(X.shape) == 1:
209            if isinstance(X, pd.DataFrame):
210                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
211            else:
212                X = X.reshape(1, -1)
213
214        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
215        self.classes_ = np.unique(y)
216        self.n_classes_ = len(self.classes_)  # for compatibility with
217
218        if self.level is not None:
219            self.obj = PredictionSet(
220                obj=self.obj, method=self.pi_method, level=self.level
221            )
222
223        # if sample_weights, else: (must use self.row_index)
224        if sample_weight is not None:
225            self.obj.fit(
226                scaled_Z,
227                output_y,
228                sample_weight=sample_weight[self.index_row_].ravel(),
229                # **kwargs
230            )
231
232            return self
233
234        # if sample_weight is None:
235        self.obj.fit(scaled_Z, output_y)
236        self.classes_ = np.unique(y)  # for compatibility with sklearn
237        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
238
239        if hasattr(self.obj, "coef_"):
240            self.coef_ = self.obj.coef_
241
242        if hasattr(self.obj, "intercept_"):
243            self.intercept_ = self.obj.intercept_
244
245        return self
246
247    def partial_fit(self, X, y, sample_weight=None, **kwargs):
248        """Partial fit custom model to training data (X, y).
249
250        Parameters:
251
252            X: {array-like}, shape = [n_samples, n_features]
253                Subset of training vectors, where n_samples is the number
254                of samples and n_features is the number of features.
255
256            y: array-like, shape = [n_samples]
257                Subset of target values.
258
259            sample_weight: array-like, shape = [n_samples]
260                Sample weights.
261
262            **kwargs: additional parameters to be passed to
263                        self.cook_training_set or self.obj.fit
264
265        Returns:
266
267            self: object
268        """
269
270        if len(X.shape) == 1:
271            if isinstance(X, pd.DataFrame):
272                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
273            else:
274                X = X.reshape(1, -1)
275            y = np.array([y], dtype=np.integer)
276
277        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
278        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
279
280        # if sample_weights, else: (must use self.row_index)
281        if sample_weight is not None:
282            try:
283                self.obj.partial_fit(
284                    scaled_Z,
285                    output_y,
286                    sample_weight=sample_weight[self.index_row_].ravel(),
287                    # **kwargs
288                )
289            except:
290                NotImplementedError