GenericBoosting Regressor

GenericBoostingRegressor(
  base_model = NULL,
  n_estimators = 100L,
  learning_rate = 0.1,
  n_hidden_features = 5L,
  reg_lambda = 0.1,
  row_sample = 1,
  col_sample = 1,
  dropout = 0,
  tolerance = 1e-04,
  direct_link = 1L,
  verbose = 1L,
  seed = 123L,
  activation = "relu",
  n_clusters = 0,
  clustering_method = "kmeans",
  cluster_scaling = "standard",
  degree = NULL,
  weights_distr = "uniform"
)

Arguments

base_model:

object, base model to be boosted.

n_estimators:

int, number of boosting iterations.

learning_rate:

float, controls the learning speed at training time.

n_hidden_features:

int

number

of nodes in successive hidden layers.

reg_lambda:

float, L2 regularization parameter for successive errors in the optimizer (at training time).

row_sample:

float, percentage of rows chosen from the training set.

col_sample:

float, percentage of columns chosen from the training set.

dropout:

float, percentage of nodes dropped from the training set.

tolerance:

float, controls early stopping in gradient descent (at training time).

direct_link:

bool, indicates whether the original features are included (True) in model's fitting or not (False).

verbose:

int, progress bar (yes = 1) or not (no = 0) (currently).

seed:

int, reproducibility seed for nodes_sim=='uniform', clustering and dropout.

activation:

str, activation function: currently 'relu', 'relu6', 'sigmoid', 'tanh'

n_clusters:

int, number of clusters for clustering.

clustering_method:

str, clustering method: currently 'kmeans', 'gmm' (Gaussian Mixture Model)

cluster_scaling:

str, scaling method for clustering: currently 'standard', 'minmax', 'robust'

degree:

int, degree of polynomial interactions features.

weights_distr:

str, distribution of weights for the hidden layer currently 'uniform', 'gaussian'

Value

An object of class GenericBoostingRegressor

Examples


library(datasets)

X <- as.matrix(datasets::mtcars[, -1])
y <- as.integer(datasets::mtcars[, 1])

n <- dim(X)[1]
p <- dim(X)[2]
set.seed(21341)
train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE)
test_index <- -train_index
X_train <- as.matrix(X[train_index, ])
y_train <- as.double(y[train_index])
X_test <- as.matrix(X[test_index, ])
y_test <- as.double(y[test_index])

obj <- mlsauce::GenericBoostingRegressor()

print(obj$get_params())
#> $activation
#> [1] "relu"
#> 
#> $alpha
#> [1] 0.5
#> 
#> $backend
#> [1] "cpu"
#> 
#> $base_model__ccp_alpha
#> [1] 0
#> 
#> $base_model__criterion
#> [1] "squared_error"
#> 
#> $base_model__max_depth
#> NULL
#> 
#> $base_model__max_features
#> [1] 1
#> 
#> $base_model__max_leaf_nodes
#> NULL
#> 
#> $base_model__min_impurity_decrease
#> [1] 0
#> 
#> $base_model__min_samples_leaf
#> [1] 1
#> 
#> $base_model__min_samples_split
#> [1] 2
#> 
#> $base_model__min_weight_fraction_leaf
#> [1] 0
#> 
#> $base_model__monotonic_cst
#> NULL
#> 
#> $base_model__random_state
#> NULL
#> 
#> $base_model__splitter
#> [1] "random"
#> 
#> $base_model
#> ExtraTreeRegressor()
#> 
#> $cluster_scaling
#> [1] "standard"
#> 
#> $clustering_method
#> [1] "kmeans"
#> 
#> $col_sample
#> [1] 1
#> 
#> $degree
#> NULL
#> 
#> $direct_link
#> [1] 1
#> 
#> $dropout
#> [1] 0
#> 
#> $kernel
#> NULL
#> 
#> $learning_rate
#> [1] 0.1
#> 
#> $n_clusters
#> [1] 0
#> 
#> $n_estimators
#> [1] 100
#> 
#> $n_hidden_features
#> [1] 5
#> 
#> $reg_lambda
#> [1] 0.1
#> 
#> $replications
#> NULL
#> 
#> $row_sample
#> [1] 1
#> 
#> $seed
#> [1] 123
#> 
#> $solver
#> [1] "ridge"
#> 
#> $tolerance
#> [1] 1e-04
#> 
#> $type_pi
#> NULL
#> 
#> $verbose
#> [1] 1
#> 
#> $weights_distr
#> [1] "uniform"
#> 

obj$fit(X_train, y_train)
#> GenericBoostingRegressor(col_sample=1.0, dropout=0.0, n_clusters=0.0,
#>                          row_sample=1.0)

print(obj$score(X_test, y_test))
#> [1] 0.8162114