GenericBoosting Regressor

GenericBoostingRegressor(
  base_model = NULL,
  n_estimators = 100L,
  learning_rate = 0.1,
  n_hidden_features = 5L,
  reg_lambda = 0.1,
  row_sample = 1,
  col_sample = 1,
  dropout = 0,
  tolerance = 1e-04,
  direct_link = 1L,
  verbose = 1L,
  seed = 123L,
  activation = "relu",
  n_clusters = 0,
  clustering_method = "kmeans",
  cluster_scaling = "standard",
  degree = NULL,
  weights_distr = "uniform"
)

Arguments

base_model:: object, base model to be boosted.
n_estimators:: int, number of boosting iterations.
learning_rate:: float, controls the learning speed at training time.
n_hidden_features:: int
number: of nodes in successive hidden layers.
reg_lambda:: float, L2 regularization parameter for successive errors in the optimizer (at training time).
row_sample:: float, percentage of rows chosen from the training set.
col_sample:: float, percentage of columns chosen from the training set.
dropout:: float, percentage of nodes dropped from the training set.
tolerance:: float, controls early stopping in gradient descent (at training time).
direct_link:: bool, indicates whether the original features are included (True) in model's fitting or not (False).
verbose:: int, progress bar (yes = 1) or not (no = 0) (currently).
seed:: int, reproducibility seed for nodes_sim=='uniform', clustering and dropout.
activation:: str, activation function: currently 'relu', 'relu6', 'sigmoid', 'tanh'
n_clusters:: int, number of clusters for clustering.
clustering_method:: str, clustering method: currently 'kmeans', 'gmm' (Gaussian Mixture Model)
cluster_scaling:: str, scaling method for clustering: currently 'standard', 'minmax', 'robust'
degree:: int, degree of polynomial interactions features.
weights_distr:: str, distribution of weights for the hidden layer currently 'uniform', 'gaussian'

Value

An object of class GenericBoostingRegressor

Examples


library(datasets)

X <- as.matrix(datasets::mtcars[, -1])
y <- as.integer(datasets::mtcars[, 1])

n <- dim(X)[1]
p <- dim(X)[2]
set.seed(21341)
train_index <- sample(x = 1:n, size = floor(0.8*n), replace = TRUE)
test_index <- -train_index
X_train <- as.matrix(X[train_index, ])
y_train <- as.double(y[train_index])
X_test <- as.matrix(X[test_index, ])
y_test <- as.double(y[test_index])

obj <- mlsauce::GenericBoostingRegressor()

print(obj$get_params())
#> $activation
#> [1] "relu"
#> 
#> $alpha
#> [1] 0.5
#> 
#> $backend
#> [1] "cpu"
#> 
#> $base_model__ccp_alpha
#> [1] 0
#> 
#> $base_model__criterion
#> [1] "squared_error"
#> 
#> $base_model__max_depth
#> NULL
#> 
#> $base_model__max_features
#> [1] 1
#> 
#> $base_model__max_leaf_nodes
#> NULL
#> 
#> $base_model__min_impurity_decrease
#> [1] 0
#> 
#> $base_model__min_samples_leaf
#> [1] 1
#> 
#> $base_model__min_samples_split
#> [1] 2
#> 
#> $base_model__min_weight_fraction_leaf
#> [1] 0
#> 
#> $base_model__monotonic_cst
#> NULL
#> 
#> $base_model__random_state
#> NULL
#> 
#> $base_model__splitter
#> [1] "random"
#> 
#> $base_model
#> ExtraTreeRegressor()
#> 
#> $cluster_scaling
#> [1] "standard"
#> 
#> $clustering_method
#> [1] "kmeans"
#> 
#> $col_sample
#> [1] 1
#> 
#> $degree
#> NULL
#> 
#> $direct_link
#> [1] 1
#> 
#> $dropout
#> [1] 0
#> 
#> $kernel
#> NULL
#> 
#> $learning_rate
#> [1] 0.1
#> 
#> $n_clusters
#> [1] 0
#> 
#> $n_estimators
#> [1] 100
#> 
#> $n_hidden_features
#> [1] 5
#> 
#> $reg_lambda
#> [1] 0.1
#> 
#> $replications
#> NULL
#> 
#> $row_sample
#> [1] 1
#> 
#> $seed
#> [1] 123
#> 
#> $solver
#> [1] "ridge"
#> 
#> $tolerance
#> [1] 1e-04
#> 
#> $type_pi
#> NULL
#> 
#> $verbose
#> [1] 1
#> 
#> $weights_distr
#> [1] "uniform"
#> 

obj$fit(X_train, y_train)
#> GenericBoostingRegressor(col_sample=1.0, dropout=0.0, n_clusters=0.0,
#>                          row_sample=1.0)

print(obj$score(X_test, y_test))
#> [1] 0.8162114