getting-started.Rmd
library(learningmachine)
## Loading required package: randtoolbox
## Loading required package: rngWELL
## This is randtoolbox. For an overview, type 'help("randtoolbox")'.
## Loading required package: tseries
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: memoise
## Loading required package: foreach
## Loading required package: skimr
## Loading required package: snow
## Loading required package: doSNOW
## Loading required package: iterators
## Loading required package: ggplot2
## Loading required package: lattice
library(mlbench)
library(palmerpenguins)
X <- as.matrix(mtcars[,-1])
y <- mtcars$mpg
set.seed(123)
(index_train <- base::sample.int(n = nrow(X),
size = floor(0.8*nrow(X)),
replace = FALSE))
## [1] 31 15 19 14 3 10 18 22 11 5 20 29 23 30 9 28 8 27 7 32 26 17 4 1 24
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 25 10
dim(X_test)
## [1] 7 10
obj <- learningmachine::Regressor$new(method = "lm", pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
obj$get_method()
## [1] "lm"
## Elapsed: 0.01 s
## [1] 3.548852
(res <- obj$predict(X = X_test))
## $preds
## Mazda RX4 Wag Valiant Merc 450SE Merc 450SL
## 21.67584 19.80291 14.75149 15.70693
## Lincoln Continental Toyota Corona Pontiac Firebird
## 12.03666 28.20630 13.55241
##
## $lower
## Mazda RX4 Wag Valiant Merc 450SE Merc 450SL
## 10.675844 8.802908 3.751488 4.706932
## Lincoln Continental Toyota Corona Pontiac Firebird
## 1.036659 17.206298 2.552412
##
## $upper
## Mazda RX4 Wag Valiant Merc 450SE Merc 450SL
## 32.67584 30.80291 25.75149 26.70693
## Lincoln Continental Toyota Corona Pontiac Firebird
## 23.03666 39.20630 24.55241
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$fit(X_train, y_train,
pi_method = "jackknifeplus")
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.06 s
obj$set_level(95L)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
ranger
regression
obj <- learningmachine::Regressor$new(method = "ranger", pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
## Elapsed: 1.527 s
## [1] 2.344976
## Elapsed: 0.013 s
obj$set_level(95)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
KRR
& ranger
regression on
Boston
# Boston dataset (dataset has an ethical problem)
library(MASS)
data("Boston")
set.seed(13)
train_idx <- sample(nrow(Boston), 0.8 * nrow(Boston))
X_train <- as.matrix(Boston[train_idx, -ncol(Boston)])
X_test <- as.matrix(Boston[-train_idx, -ncol(Boston)])
y_train <- Boston$medv[train_idx]
y_test <- Boston$medv[-train_idx]
KRR
obj <- learningmachine::Regressor$new(method = "krr", pi_method = "none")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
obj$get_method()
## [1] "krr"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.108 s
## [1] 2.888748
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] 0.906853
##
## $R_squared_adj
## [1] 0.8930926
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.33671 -1.10461 -0.02411 0.12105 1.46980 9.29398
##
## $citests
## estimate lower upper p-value signif
## crim -0.0544164530 -0.075529516 -0.0333033896 1.509961e-06 ***
## zn -0.0046418100 -0.009605911 0.0003222915 6.652190e-02 .
## indus -0.0245357110 -0.051187606 0.0021161842 7.077114e-02 .
## chas 7.1730075477 6.375361838 7.9706532575 5.512518e-33 ***
## nox -9.4958030753 -12.095806303 -6.8957998474 8.811259e-11 ***
## rm 4.7080249286 3.939353604 5.4766962526 1.787394e-21 ***
## age -0.0439718628 -0.053082559 -0.0348611667 7.779486e-16 ***
## dis -1.4214523042 -1.573146091 -1.2697585171 2.257304e-34 ***
## rad 0.1810040336 0.155692359 0.2063157080 8.827967e-26 ***
## tax -0.0115644823 -0.013066430 -0.0100625342 5.303780e-28 ***
## ptratio -0.4819300831 -0.582754790 -0.3811053766 1.242164e-15 ***
## black -0.0002461991 -0.001967809 0.0014754112 7.772335e-01
## lstat -0.4091458985 -0.475209948 -0.3430818492 9.126116e-22 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 102
## Number of columns 13
## _______________________
## Column type frequency:
## numeric 13
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75
## 1 crim -0.0544 0.107 -0.416 -0.0696 -0.00478 0.00661
## 2 zn -0.00464 0.0253 -0.0524 -0.0200 -0.00301 0.00416
## 3 indus -0.0245 0.136 -0.308 -0.109 -0.0396 0.0220
## 4 chas 7.17 4.06 -17.7 5.97 7.12 9.16
## 5 nox -9.50 13.2 -78.3 -15.0 -7.32 -2.30
## 6 rm 4.71 3.91 -3.43 1.83 4.48 7.72
## 7 age -0.0440 0.0464 -0.138 -0.0778 -0.0506 -0.00674
## 8 dis -1.42 0.772 -3.43 -1.80 -1.32 -0.933
## 9 rad 0.181 0.129 -0.0944 0.0827 0.173 0.261
## 10 tax -0.0116 0.00765 -0.0388 -0.0148 -0.00963 -0.00718
## 11 ptratio -0.482 0.513 -2.13 -0.671 -0.441 -0.209
## 12 black -0.000246 0.00877 -0.0263 -0.00450 0.0000316 0.00344
## 13 lstat -0.409 0.336 -1.62 -0.474 -0.310 -0.189
## p100 hist
## 1 0.107 ▁▁▂▇▅
## 2 0.0728 ▃▇▇▂▁
## 3 0.499 ▂▇▂▁▁
## 4 14.8 ▁▁▁▇▅
## 5 15.5 ▁▁▁▇▃
## 6 12.4 ▃▇▇▇▅
## 7 0.0628 ▂▇▆▃▂
## 8 0.0716 ▂▂▇▇▃
## 9 0.492 ▂▇▇▃▂
## 10 0.00304 ▁▁▃▇▂
## 11 1.01 ▁▁▇▃▁
## 12 0.0391 ▁▇▇▁▁
## 13 0.0311 ▁▁▂▆▇
## Elapsed: 1.01 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] 0.906853
##
## $R_squared_adj
## [1] 0.8930926
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.33671 -1.10461 -0.02411 0.12105 1.46980 9.29398
##
## $citests
## estimate lower upper p-value signif
## crim -0.0544164530 -0.077388205 -0.033868739 3.636383e-06 ***
## zn -0.0046418100 -0.008950983 0.000193778 4.932471e-02 *
## indus -0.0245357110 -0.049120109 0.000544111 5.559257e-02 .
## chas 7.1730075477 6.366223778 7.870788737 1.484058e-34 ***
## nox -9.4958030753 -12.303031437 -7.145086697 1.011413e-10 ***
## rm 4.7080249286 3.856816287 5.491698401 1.329440e-19 ***
## age -0.0439718628 -0.053220093 -0.035161838 9.023576e-16 ***
## dis -1.4214523042 -1.569696745 -1.285297810 3.493404e-36 ***
## rad 0.1810040336 0.155700757 0.206461984 2.383885e-25 ***
## tax -0.0115644823 -0.013086037 -0.010031979 3.950118e-27 ***
## ptratio -0.4819300831 -0.582929860 -0.390711993 2.146847e-16 ***
## black -0.0002461991 -0.001876356 0.001546006 7.785258e-01
## lstat -0.4091458985 -0.478128045 -0.343430510 6.000266e-21 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 102
## Number of columns 13
## _______________________
## Column type frequency:
## numeric 13
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75
## 1 crim -0.0544 0.107 -0.416 -0.0696 -0.00478 0.00661
## 2 zn -0.00464 0.0253 -0.0524 -0.0200 -0.00301 0.00416
## 3 indus -0.0245 0.136 -0.308 -0.109 -0.0396 0.0220
## 4 chas 7.17 4.06 -17.7 5.97 7.12 9.16
## 5 nox -9.50 13.2 -78.3 -15.0 -7.32 -2.30
## 6 rm 4.71 3.91 -3.43 1.83 4.48 7.72
## 7 age -0.0440 0.0464 -0.138 -0.0778 -0.0506 -0.00674
## 8 dis -1.42 0.772 -3.43 -1.80 -1.32 -0.933
## 9 rad 0.181 0.129 -0.0944 0.0827 0.173 0.261
## 10 tax -0.0116 0.00765 -0.0388 -0.0148 -0.00963 -0.00718
## 11 ptratio -0.482 0.513 -2.13 -0.671 -0.441 -0.209
## 12 black -0.000246 0.00877 -0.0263 -0.00450 0.0000316 0.00344
## 13 lstat -0.409 0.336 -1.62 -0.474 -0.310 -0.189
## p100 hist
## 1 0.107 ▁▁▂▇▅
## 2 0.0728 ▃▇▇▂▁
## 3 0.499 ▂▇▂▁▁
## 4 14.8 ▁▁▁▇▅
## 5 15.5 ▁▁▁▇▃
## 6 12.4 ▃▇▇▇▅
## 7 0.0628 ▂▇▆▃▂
## 8 0.0716 ▂▂▇▇▃
## 9 0.492 ▂▇▇▃▂
## 10 0.00304 ▁▁▃▇▂
## 11 1.01 ▁▁▇▃▁
## 12 0.0391 ▁▇▇▁▁
## 13 0.0311 ▁▁▂▆▇
## Elapsed: 1.556 s
ranger
obj <- learningmachine::Regressor$new(method = "ranger", pi_method="splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
## Elapsed: 0.093 s
## [1] 3.79469
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] 0.8392681
##
## $R_squared_adj
## [1] 0.8155236
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.1590 -1.7302 -0.2022 0.4068 2.0991 12.6480
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## crim -46.07120224 -210.45915529 118.31675081 5.794688e-01
## zn -0.01770371 -0.20552612 0.17011870 8.520499e-01
## indus -15.27494058 -26.85443276 -3.69544841 1.023790e-02 *
## chas 0.00000000 NaN NaN NaN
## nox -521.68056125 -688.90286706 -354.45825544 1.310563e-08 ***
## rm 275.37923329 224.35153031 326.40693626 2.514291e-18 ***
## age -1.75704619 -2.46954300 -1.04454938 3.780961e-06 ***
## dis 20.57598690 -4.55485374 45.70682754 1.074525e-01
## rad 2.13991832 0.02367618 4.25616046 4.753842e-02 *
## tax -0.60978051 -0.79942682 -0.42013420 5.448371e-09 ***
## ptratio -32.87462570 -38.77698867 -26.97226272 4.440300e-19 ***
## black -0.26384711 -0.49997135 -0.02772287 2.889086e-02 *
## lstat -44.64193590 -57.99309865 -31.29077315 1.652805e-09 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 102
## Number of columns 13
## _______________________
## Column type frequency:
## numeric 13
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 crim -46.1 837. -5846. -80.5 -4.90 114. 2107.
## 2 zn -0.0177 0.956 -4.47 0 0 0 5.22
## 3 indus -15.3 59.0 -299. -4.20 1.66 8.67 41.1
## 4 chas 0 0 0 0 0 0 0
## 5 nox -522. 851. -3931. -965. -334. 4.36 852.
## 6 rm 275. 260. -11.5 73.4 180. 378. 870.
## 7 age -1.76 3.63 -26.2 -2.88 -1.16 0 5.56
## 8 dis 20.6 128. -234. -33.8 -2.58 12.7 636.
## 9 rad 2.14 10.8 -6.31 0 0 0.638 91.8
## 10 tax -0.610 0.966 -3.85 -0.621 -0.233 -0.116 0.525
## 11 ptratio -32.9 30.0 -140. -44.7 -26.5 -13.8 4.66
## 12 black -0.264 1.20 -4.11 -1.07 0.0418 0.521 2.22
## 13 lstat -44.6 68.0 -335. -56.4 -21.1 -5.23 45.9
## hist
## 1 ▁▁▁▇▁
## 2 ▁▁▇▁▁
## 3 ▁▁▁▁▇
## 4 ▁▁▇▁▁
## 5 ▁▁▃▇▇
## 6 ▇▅▁▂▂
## 7 ▁▁▁▇▆
## 8 ▁▇▁▁▁
## 9 ▇▁▁▁▁
## 10 ▁▁▁▃▇
## 11 ▁▁▂▇▇
## 12 ▁▂▃▇▂
## 13 ▁▁▁▃▇
## Elapsed: 0.92 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] 0.8392681
##
## $R_squared_adj
## [1] 0.8155236
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.1590 -1.7302 -0.2022 0.4068 2.0991 12.6480
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## crim -46.07120224 -226.0963824 86.64772635 5.649169e-01
## zn -0.01770371 -0.1911345 0.16043942 8.439189e-01
## indus -15.27494058 -27.0066412 -4.76012553 8.326543e-03 **
## chas 0.00000000 0.0000000 0.00000000 NaN
## nox -521.68056125 -694.5379226 -351.49586075 3.697818e-08 ***
## rm 275.37923329 225.1057152 332.38712155 6.551598e-17 ***
## age -1.75704619 -2.5409652 -1.08938031 6.883580e-06 ***
## dis 20.57598690 -3.0778308 44.65777193 9.417985e-02 .
## rad 2.13991832 0.4743989 4.36379845 3.340076e-02 *
## tax -0.60978051 -0.7988964 -0.42355307 5.710666e-09 ***
## ptratio -32.87462570 -38.9972525 -27.71671691 6.732061e-20 ***
## black -0.26384711 -0.4737498 -0.03739985 1.967444e-02 *
## lstat -44.64193590 -57.9757644 -32.27758919 7.154857e-10 ***
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 102
## Number of columns 13
## _______________________
## Column type frequency:
## numeric 13
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 crim -46.1 837. -5846. -80.5 -4.90 114. 2107.
## 2 zn -0.0177 0.956 -4.47 0 0 0 5.22
## 3 indus -15.3 59.0 -299. -4.20 1.66 8.67 41.1
## 4 chas 0 0 0 0 0 0 0
## 5 nox -522. 851. -3931. -965. -334. 4.36 852.
## 6 rm 275. 260. -11.5 73.4 180. 378. 870.
## 7 age -1.76 3.63 -26.2 -2.88 -1.16 0 5.56
## 8 dis 20.6 128. -234. -33.8 -2.58 12.7 636.
## 9 rad 2.14 10.8 -6.31 0 0 0.638 91.8
## 10 tax -0.610 0.966 -3.85 -0.621 -0.233 -0.116 0.525
## 11 ptratio -32.9 30.0 -140. -44.7 -26.5 -13.8 4.66
## 12 black -0.264 1.20 -4.11 -1.07 0.0418 0.521 2.22
## 13 lstat -44.6 68.0 -335. -56.4 -21.1 -5.23 45.9
## hist
## 1 ▁▁▁▇▁
## 2 ▁▁▇▁▁
## 3 ▁▁▁▁▇
## 4 ▁▁▇▁▁
## 5 ▁▁▃▇▇
## 6 ▇▅▁▂▂
## 7 ▁▁▁▇▆
## 8 ▁▇▁▁▁
## 9 ▇▁▁▁▁
## 10 ▁▁▁▃▇
## 11 ▁▁▂▇▇
## 12 ▁▂▃▇▂
## 13 ▁▁▁▃▇
## Elapsed: 1.433 s
KRR
regression on mtcars
X <- as.matrix(mtcars[,-1])
y <- mtcars$mpg
set.seed(123)
(index_train <- base::sample.int(n = nrow(X),
size = floor(0.7*nrow(X)),
replace = FALSE))
## [1] 31 15 19 14 3 10 18 22 11 5 20 29 23 30 9 28 8 27 7 32 26 17
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 22 10
dim(X_test)
## [1] 10 10
obj <- learningmachine::Regressor$new(method = "krr", pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.005 s
print(obj$predict(X_test))
## $preds
## [1] 22.151349 21.802194 12.541365 10.124759 13.408181 14.155816 7.421184
## [8] 16.879536 13.615153 12.749565
##
## $lower
## [1] 12.1513495 11.8021941 2.5413650 0.1247588 3.4081805 4.1558157
## [7] -2.5788160 6.8795365 3.6151533 2.7495651
##
## $upper
## [1] 32.15135 31.80219 22.54137 20.12476 23.40818 24.15582 17.42118 26.87954
## [9] 23.61515 22.74957
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.01 s
obj$set_level(95)
obj$set_pi_method("splitconformal")
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.006 s
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] -0.8614864
##
## $R_squared_adj
## [1] 17.75338
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.1513 0.5083 3.0680 3.4751 5.9929 8.8586
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl -23.98943109 -46.4975109 -1.4813513 3.918219e-02 *
## disp -0.61133395 -0.9655770 -0.2570909 3.597927e-03 **
## hp -0.07828878 -0.3785573 0.2219797 5.698268e-01
## drat 310.94399534 160.4146969 461.4732937 1.163859e-03 **
## wt -197.39979731 -240.1776661 -154.6219286 2.500030e-06 ***
## qsec -19.50660485 -54.1139966 15.1007869 2.342132e-01
## vs 69.84795566 -85.8899529 225.5858643 3.368080e-01
## am 137.97019623 -0.2148915 276.1552839 5.028830e-02 .
## gear 191.57905165 134.3446800 248.8134233 3.424783e-05 ***
## carb 3.39227959 -22.2875140 29.0720732 7.718555e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl -24.0 31.5 -64.0 -40.9 -34.1 -0.849 37.6
## 2 disp -0.611 0.495 -1.66 -0.934 -0.429 -0.307 -0.0817
## 3 hp -0.0783 0.420 -1.00 -0.218 -0.0402 0.235 0.359
## 4 drat 311. 210. -159. 195. 369. 464. 534.
## 5 wt -197. 59.8 -280. -252. -196. -144. -124.
## 6 qsec -19.5 48.4 -73.4 -60.0 -29.3 12.8 60.3
## 7 vs 69.8 218. -218. -104. 86.1 103. 421.
## 8 am 138. 193. -161. 99.8 162. 201. 516.
## 9 gear 192. 80.0 74.7 142. 178. 224. 367.
## 10 carb 3.39 35.9 -56.3 -6.54 3.71 36.0 41.3
## hist
## 1 ▃▇▂▃▂
## 2 ▂▂▂▆▇
## 3 ▂▁▆▃▇
## 4 ▂▁▆▃▇
## 5 ▇▁▇▂▇
## 6 ▇▇▂▂▅
## 7 ▆▂▇▁▃
## 8 ▂▁▇▁▁
## 9 ▂▇▃▂▂
## 10 ▃▁▆▂▇
## Elapsed: 0.216 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -0.8614864
##
## $R_squared_adj
## [1] 17.75338
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.1513 0.5083 3.0680 3.4751 5.9929 8.8586
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl -23.98943109 -40.5922255 -6.2447901 2.292950e-02 *
## disp -0.61133395 -0.9172916 -0.3424231 2.416713e-03 **
## hp -0.07828878 -0.3397646 0.1496909 5.462365e-01
## drat 310.94399534 179.0656056 417.8607418 6.414323e-04 ***
## wt -197.39979731 -231.8809681 -164.5228586 1.115553e-06 ***
## qsec -19.50660485 -44.6930749 9.1315753 1.891311e-01
## vs 69.84795566 -48.5034409 190.0256695 2.806199e-01
## am 137.97019623 27.1241565 250.3954442 3.846436e-02 *
## gear 191.57905165 147.4748755 241.7819503 2.296739e-05 ***
## carb 3.39227959 -17.8115551 22.1933920 7.472011e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl -24.0 31.5 -64.0 -40.9 -34.1 -0.849 37.6
## 2 disp -0.611 0.495 -1.66 -0.934 -0.429 -0.307 -0.0817
## 3 hp -0.0783 0.420 -1.00 -0.218 -0.0402 0.235 0.359
## 4 drat 311. 210. -159. 195. 369. 464. 534.
## 5 wt -197. 59.8 -280. -252. -196. -144. -124.
## 6 qsec -19.5 48.4 -73.4 -60.0 -29.3 12.8 60.3
## 7 vs 69.8 218. -218. -104. 86.1 103. 421.
## 8 am 138. 193. -161. 99.8 162. 201. 516.
## 9 gear 192. 80.0 74.7 142. 178. 224. 367.
## 10 carb 3.39 35.9 -56.3 -6.54 3.71 36.0 41.3
## hist
## 1 ▃▇▂▃▂
## 2 ▂▂▂▆▇
## 3 ▂▁▆▃▇
## 4 ▂▁▆▃▇
## 5 ▇▁▇▂▇
## 6 ▇▇▂▂▅
## 7 ▆▂▇▁▃
## 8 ▂▁▇▁▁
## 9 ▂▇▃▂▂
## 10 ▃▁▆▂▇
## Elapsed: 0.337 s
obj$set_pi_method("kdejackknifeplus")
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
## | | | 0% | |=== | 5% | |======= | 10% | |========== | 14% | |============= | 19% | |================= | 24% | |==================== | 29% | |======================= | 33% | |=========================== | 38% | |============================== | 43% | |================================= | 48% | |===================================== | 52% | |======================================== | 57% | |=========================================== | 62% | |=============================================== | 67% | |================================================== | 71% | |===================================================== | 76% | |========================================================= | 81% | |============================================================ | 86% | |=============================================================== | 90% | |=================================================================== | 95% | |======================================================================| 100%
## Elapsed: 0.023 s
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] -3.076311
##
## $R_squared_adj
## [1] 37.6868
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.660 6.009 6.743 6.944 7.966 10.753
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl -36.7817740 -50.6623622 -22.9011858 2.038888e-04 ***
## disp -0.2133047 -0.6537255 0.2271161 3.017009e-01
## hp -0.2920633 -0.8288709 0.2447443 2.495994e-01
## drat 259.9789584 141.9967301 377.9611867 7.545111e-04 ***
## wt -125.6032827 -159.9084338 -91.2981317 1.675653e-05 ***
## qsec 6.1547882 -22.2053730 34.5149494 6.352182e-01
## vs 35.1176737 -92.7670167 163.0023641 5.498770e-01
## am 85.8109695 -32.8651723 204.4871113 1.363325e-01
## gear 264.4099446 185.2573493 343.5625399 3.479849e-05 ***
## carb -24.1859057 -56.7206453 8.3488338 1.269314e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl -36.8 19.4 -52.8 -48.3 -40.9 -37.7 13.0
## 2 disp -0.213 0.616 -1.19 -0.561 -0.212 -0.146 0.781
## 3 hp -0.292 0.750 -1.73 -0.461 0.00750 0.135 0.531
## 4 drat 260. 165. -74.7 161. 265. 381. 485.
## 5 wt -126. 48.0 -202. -152. -125. -105. -30.9
## 6 qsec 6.15 39.6 -55.5 -20.7 -1.84 33.6 64.2
## 7 vs 35.1 179. -232. -108. 68.7 95.1 292.
## 8 am 85.8 166. -205. 94.4 122. 144. 346.
## 9 gear 264. 111. 122. 206. 242. 301. 529.
## 10 carb -24.2 45.5 -73.7 -54.9 -47.2 22.6 39.5
## hist
## 1 ▇▅▂▁▂
## 2 ▂▃▇▁▃
## 3 ▃▁▂▇▆
## 4 ▂▃▁▇▆
## 5 ▃▂▇▂▂
## 6 ▂▇▅▅▅
## 7 ▇▁▇▅▅
## 8 ▃▁▇▆▂
## 9 ▇▇▇▁▂
## 10 ▇▇▁▂▇
## Elapsed: 1.093 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -3.076311
##
## $R_squared_adj
## [1] 37.6868
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.660 6.009 6.743 6.944 7.966 10.753
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl -36.7817740 -45.3808553 -26.1949547 3.635150e-05 ***
## disp -0.2133047 -0.5752521 0.1492085 2.781632e-01
## hp -0.2920633 -0.7217967 0.1034058 1.987160e-01
## drat 259.9789584 162.2017490 343.4030751 3.241203e-04 ***
## wt -125.6032827 -152.6736671 -98.7351447 7.604780e-06 ***
## qsec 6.1547882 -14.7007260 29.0726634 5.949372e-01
## vs 35.1176737 -62.1816637 130.3916175 4.928333e-01
## am 85.8109695 -10.0596731 174.5354755 1.017444e-01
## gear 264.4099446 208.5642717 335.6537285 1.897459e-05 ***
## carb -24.1859057 -50.4118778 2.2305024 1.052301e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl -36.8 19.4 -52.8 -48.3 -40.9 -37.7 13.0
## 2 disp -0.213 0.616 -1.19 -0.561 -0.212 -0.146 0.781
## 3 hp -0.292 0.750 -1.73 -0.461 0.00750 0.135 0.531
## 4 drat 260. 165. -74.7 161. 265. 381. 485.
## 5 wt -126. 48.0 -202. -152. -125. -105. -30.9
## 6 qsec 6.15 39.6 -55.5 -20.7 -1.84 33.6 64.2
## 7 vs 35.1 179. -232. -108. 68.7 95.1 292.
## 8 am 85.8 166. -205. 94.4 122. 144. 346.
## 9 gear 264. 111. 122. 206. 242. 301. 529.
## 10 carb -24.2 45.5 -73.7 -54.9 -47.2 22.6 39.5
## hist
## 1 ▇▅▂▁▂
## 2 ▂▃▇▁▃
## 3 ▃▁▂▇▆
## 4 ▂▃▁▇▆
## 5 ▃▂▇▂▂
## 6 ▂▇▅▅▅
## 7 ▇▁▇▅▅
## 8 ▃▁▇▆▂
## 9 ▇▇▇▁▂
## 10 ▇▇▁▂▇
## Elapsed: 0.914 s
xgboost
obj <- learningmachine::Regressor$new(method = "xgboost", pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.485 s
print(obj$predict(X_test))
## $preds
## [1] 18.13500 18.13500 17.13105 17.13105 14.64118 14.64118 14.03685 21.29947
## [9] 15.33300 14.03685
##
## $lower
## [1] 12.135002 12.135002 11.131052 11.131052 8.641179 8.641179 8.036854
## [8] 15.299475 9.333004 8.036854
##
## $upper
## [1] 24.13500 24.13500 23.13105 23.13105 20.64118 20.64118 20.03685 27.29947
## [9] 21.33300 20.03685
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] 0.2881145
##
## $R_squared_adj
## [1] 7.406969
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.6369 0.3926 2.2088 1.5079 2.8650 5.1631
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 0.0000000 NaN NaN NaN
## disp -0.1859971 -0.6067516 0.2347575 0.3434364
## hp 0.0000000 NaN NaN NaN
## drat 28.9866074 -18.4823056 76.4555203 0.2004909
## wt 0.0000000 NaN NaN NaN
## qsec -1.7295559 -5.6420830 2.1829713 0.3434364
## vs 0.0000000 NaN NaN NaN
## am 0.0000000 NaN NaN NaN
## gear 0.0000000 NaN NaN NaN
## carb 0.0000000 NaN NaN NaN
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100 hist
## 1 cyl 0 0 0 0 0 0 0 ▁▁▇▁▁
## 2 disp -0.186 0.588 -1.86 0 0 0 0 ▁▁▁▁▇
## 3 hp 0 0 0 0 0 0 0 ▁▁▇▁▁
## 4 drat 29.0 66.4 0 0 0 0 200. ▇▁▁▁▁
## 5 wt 0 0 0 0 0 0 0 ▁▁▇▁▁
## 6 qsec -1.73 5.47 -17.3 0 0 0 0 ▁▁▁▁▇
## 7 vs 0 0 0 0 0 0 0 ▁▁▇▁▁
## 8 am 0 0 0 0 0 0 0 ▁▁▇▁▁
## 9 gear 0 0 0 0 0 0 0 ▁▁▇▁▁
## 10 carb 0 0 0 0 0 0 0 ▁▁▇▁▁
## Elapsed: 0.385 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] 0.2881145
##
## $R_squared_adj
## [1] 7.406969
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.6369 0.3926 2.2088 1.5079 2.8650 5.1631
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 0.0000000 0.0000000 0.00000 NaN
## disp -0.1859971 -0.5579912 0.00000 0.2237346
## hp 0.0000000 0.0000000 0.00000 NaN
## drat 28.9866074 0.0000000 77.01909 0.1742390
## wt 0.0000000 0.0000000 0.00000 NaN
## qsec -1.7295559 -5.1886676 0.00000 0.2237346
## vs 0.0000000 0.0000000 0.00000 NaN
## am 0.0000000 0.0000000 0.00000 NaN
## gear 0.0000000 0.0000000 0.00000 NaN
## carb 0.0000000 0.0000000 0.00000 NaN
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100 hist
## 1 cyl 0 0 0 0 0 0 0 ▁▁▇▁▁
## 2 disp -0.186 0.588 -1.86 0 0 0 0 ▁▁▁▁▇
## 3 hp 0 0 0 0 0 0 0 ▁▁▇▁▁
## 4 drat 29.0 66.4 0 0 0 0 200. ▇▁▁▁▁
## 5 wt 0 0 0 0 0 0 0 ▁▁▇▁▁
## 6 qsec -1.73 5.47 -17.3 0 0 0 0 ▁▁▁▁▇
## 7 vs 0 0 0 0 0 0 0 ▁▁▇▁▁
## 8 am 0 0 0 0 0 0 0 ▁▁▇▁▁
## 9 gear 0 0 0 0 0 0 0 ▁▁▇▁▁
## 10 carb 0 0 0 0 0 0 0 ▁▁▇▁▁
## Elapsed: 0.378 s
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.011 s
obj$set_level(95)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
obj$set_pi_method("kdesplitconformal")
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.019 s
obj$set_level(95)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
obj$set_pi_method("bootjackknifeplus")
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.007 s
obj$set_level(95)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
obj <- learningmachine::Regressor$new(method = "rvfl",
nb_hidden = 50L,
pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.01)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.053 s
print(obj$predict(X_test))
## $preds
## Mazda RX4 Mazda RX4 Wag Hornet 4 Drive Valiant
## 21.350888 19.789387 13.106761 9.695310
## Merc 450SE Merc 450SL Lincoln Continental Toyota Corona
## 11.131161 12.568682 2.044672 19.289805
## Camaro Z28 Pontiac Firebird
## 14.847878 12.282272
##
## $lower
## Mazda RX4 Mazda RX4 Wag Hornet 4 Drive Valiant
## 12.3508879 10.7893873 4.1067608 0.6953102
## Merc 450SE Merc 450SL Lincoln Continental Toyota Corona
## 2.1311611 3.5686817 -6.9553279 10.2898053
## Camaro Z28 Pontiac Firebird
## 5.8478777 3.2822719
##
## $upper
## Mazda RX4 Mazda RX4 Wag Hornet 4 Drive Valiant
## 30.35089 28.78939 22.10676 18.69531
## Merc 450SE Merc 450SL Lincoln Continental Toyota Corona
## 20.13116 21.56868 11.04467 28.28981
## Camaro Z28 Pontiac Firebird
## 23.84788 21.28227
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] -1.505856
##
## $R_squared_adj
## [1] 23.55271
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.548 1.461 5.000 4.349 7.949 8.405
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 137.649985 39.777048 235.5229227 1.115728e-02 *
## disp -2.406399 -4.650678 -0.1621204 3.825959e-02 *
## hp -0.527573 -1.402043 0.3468975 2.054686e-01
## drat 707.372951 246.095138 1168.6507638 7.059500e-03 **
## wt -500.429007 -565.047979 -435.8100352 2.910469e-08 ***
## qsec -89.930939 -124.899691 -54.9621860 2.537870e-04 ***
## vs 234.198406 -127.886990 596.2838006 1.774484e-01
## am -235.789718 -512.422513 40.8430776 8.592503e-02 .
## gear 52.646721 -6.640614 111.9340567 7.547657e-02 .
## carb -17.100561 -87.819649 53.6185270 5.976705e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 138. 137. -8.40 75.8 91.1 98.6 394.
## 2 disp -2.41 3.14 -8.46 -1.32 -1.08 -0.775 -0.300
## 3 hp -0.528 1.22 -3.40 -0.695 -0.188 0.0137 0.893
## 4 drat 707. 645. 55.7 388. 482. 563. 1939.
## 5 wt -500. 90.3 -698. -538. -500. -458. -377.
## 6 qsec -89.9 48.9 -145. -128. -102. -64.0 2.67
## 7 vs 234. 506. -121. -13.2 36.8 53.2 1269.
## 8 am -236. 387. -653. -450. -397. -168. 519.
## 9 gear 52.6 82.9 -107. -4.69 66.2 112. 170.
## 10 carb -17.1 98.9 -117. -64.6 -60.6 -17.5 171.
## hist
## 1 ▂▇▁▁▂
## 2 ▂▁▁▁▇
## 3 ▁▁▁▇▂
## 4 ▅▇▁▁▃
## 5 ▂▁▆▇▃
## 6 ▇▆▁▂▃
## 7 ▇▁▁▁▂
## 8 ▆▇▂▁▃
## 9 ▂▅▅▅▇
## 10 ▇▂▁▁▂
## Elapsed: 0.819 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -1.505856
##
## $R_squared_adj
## [1] 23.55271
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.548 1.461 5.000 4.349 7.949 8.405
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 137.649985 67.927710 224.49681122 7.317784e-03 **
## disp -2.406399 -4.543447 -0.83146440 3.164812e-02 *
## hp -0.527573 -1.228274 0.08917815 1.509239e-01
## drat 707.372951 379.807480 1129.02366858 4.912929e-03 **
## wt -500.429007 -557.338124 -452.13988493 1.682493e-08 ***
## qsec -89.930939 -115.877565 -59.82831064 1.427513e-04 ***
## vs 234.198406 -17.180072 581.09310604 1.592806e-01
## am -235.789718 -437.517547 13.80315000 7.084922e-02 .
## gear 52.646721 6.427697 94.54115818 4.386846e-02 *
## carb -17.100561 -69.147781 45.17009752 5.720359e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 10
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 138. 137. -8.40 75.8 91.1 98.6 394.
## 2 disp -2.41 3.14 -8.46 -1.32 -1.08 -0.775 -0.300
## 3 hp -0.528 1.22 -3.40 -0.695 -0.188 0.0137 0.893
## 4 drat 707. 645. 55.7 388. 482. 563. 1939.
## 5 wt -500. 90.3 -698. -538. -500. -458. -377.
## 6 qsec -89.9 48.9 -145. -128. -102. -64.0 2.67
## 7 vs 234. 506. -121. -13.2 36.8 53.2 1269.
## 8 am -236. 387. -653. -450. -397. -168. 519.
## 9 gear 52.6 82.9 -107. -4.69 66.2 112. 170.
## 10 carb -17.1 98.9 -117. -64.6 -60.6 -17.5 171.
## hist
## 1 ▂▇▁▁▂
## 2 ▂▁▁▁▇
## 3 ▁▁▁▇▂
## 4 ▅▇▁▁▃
## 5 ▂▁▆▇▃
## 6 ▇▆▁▂▃
## 7 ▇▁▁▁▂
## 8 ▆▇▂▁▃
## 9 ▂▅▅▅▇
## 10 ▇▂▁▁▂
## Elapsed: 0.78 s
## Elapsed: 0.008 s
obj$set_level(95)
res <- obj$predict(X = X_test)
plot(c(y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")
mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
update RVFL model
previous_coefs <- drop(obj$model$coef)
newx <- X_test[1, ]
newy <- y_test[1]
new_X_test <- X_test[-1, ]
new_y_test <- y_test[-1]
t0 <- proc.time()[3]
obj$update(newx, newy)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed: 0.008 s
summary(previous_coefs)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.68212 -0.26567 -0.05157 0.00700 0.21046 2.19222
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.030666 -0.002610 0.004189 0.002917 0.011386 0.025243
start <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE)
## $R_squared
## [1] -1.809339
##
## $R_squared_adj
## [1] 12.23735
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.168 2.513 5.541 5.058 8.185 8.703
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 111.6701473 17.076928 206.2633669 2.615518e-02 *
## disp -1.7983224 -3.876380 0.2797349 8.106884e-02 .
## hp -0.4167545 -1.501658 0.6681495 4.015523e-01
## drat 569.9102780 148.862037 990.9585186 1.420088e-02 *
## wt -504.1496696 -583.757006 -424.5423330 4.741273e-07 ***
## qsec -107.9102921 -138.571336 -77.2492482 3.936777e-05 ***
## vs 145.0280002 -173.164419 463.2204193 3.239468e-01
## am -319.6910568 -566.618653 -72.7634604 1.745263e-02 *
## gear 57.7630332 -18.934712 134.4607782 1.206459e-01
## carb -42.9572292 -108.690903 22.7764447 1.702409e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 9
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 112. 123. -13.5 64.5 93.6 93.9 426.
## 2 disp -1.80 2.70 -8.94 -1.41 -0.805 -0.689 -0.361
## 3 hp -0.417 1.41 -3.54 -0.679 -0.0942 -0.0556 1.19
## 4 drat 570. 548. 36.8 371. 439. 501. 1972.
## 5 wt -504. 104. -742. -523. -497. -461. -382.
## 6 qsec -108. 39.9 -152. -143. -115. -93.0 -35.9
## 7 vs 145. 414. -116. -23.9 51.1 81.2 1231.
## 8 am -320. 321. -575. -479. -395. -368. 465.
## 9 gear 57.8 99.8 -113. 1.22 35.2 130. 196.
## 10 carb -43.0 85.5 -129. -79.6 -77.9 -22.5 165.
## hist
## 1 ▅▇▁▁▂
## 2 ▁▁▁▁▇
## 3 ▂▁▂▇▃
## 4 ▅▇▁▁▂
## 5 ▂▁▂▇▃
## 6 ▇▅▅▂▂
## 7 ▇▁▁▁▁
## 8 ▇▁▁▁▁
## 9 ▃▇▇▇▇
## 10 ▇▅▁▁▂
## Elapsed: 0.158 s
start <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -1.809339
##
## $R_squared_adj
## [1] 12.23735
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.168 2.513 5.541 5.058 8.185 8.703
##
## $Coverage_rate
## [1] 100
##
## $citests
## estimate lower upper p-value signif
## cyl 111.6701473 56.594459 201.0414388 1.629833e-02 *
## disp -1.7983224 -3.653135 -0.7503408 4.130189e-02 *
## hp -0.4167545 -1.276126 0.2910989 3.277078e-01
## drat 569.9102780 332.259718 964.1132085 7.668526e-03 **
## wt -504.1496696 -578.789199 -452.5053839 2.773501e-07 ***
## qsec -107.9102921 -129.483384 -82.5423190 1.836083e-05 ***
## vs 145.0280002 -28.272020 432.6517253 2.524377e-01
## am -319.6910568 -475.989615 -90.6464455 1.166475e-02 *
## gear 57.7630332 1.751775 117.2749634 8.565636e-02 .
## carb -42.9572292 -84.450432 16.1551891 1.327138e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 9
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 112. 123. -13.5 64.5 93.6 93.9 426.
## 2 disp -1.80 2.70 -8.94 -1.41 -0.805 -0.689 -0.361
## 3 hp -0.417 1.41 -3.54 -0.679 -0.0942 -0.0556 1.19
## 4 drat 570. 548. 36.8 371. 439. 501. 1972.
## 5 wt -504. 104. -742. -523. -497. -461. -382.
## 6 qsec -108. 39.9 -152. -143. -115. -93.0 -35.9
## 7 vs 145. 414. -116. -23.9 51.1 81.2 1231.
## 8 am -320. 321. -575. -479. -395. -368. 465.
## 9 gear 57.8 99.8 -113. 1.22 35.2 130. 196.
## 10 carb -43.0 85.5 -129. -79.6 -77.9 -22.5 165.
## hist
## 1 ▅▇▁▁▂
## 2 ▁▁▁▁▇
## 3 ▂▁▂▇▃
## 4 ▅▇▁▁▂
## 5 ▂▁▂▇▃
## 6 ▇▅▅▂▂
## 7 ▇▁▁▁▁
## 8 ▇▁▁▁▁
## 9 ▃▇▇▇▇
## 10 ▇▅▁▁▂
## Elapsed: 0.306 s
res <- obj$predict(X = new_X_test)
new_y_train <- c(y_train, newy)
plot(c(new_y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(new_y_train, res$upper), col="gray60")
lines(c(new_y_train, res$lower), col="gray60")
lines(c(new_y_train, res$preds), col = "red")
lines(c(new_y_train, new_y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")
mean((new_y_test >= as.numeric(res$lower)) * (new_y_test <= as.numeric(res$upper)))
## [1] 1
update RVFL model (Pt.2)
newx <- X_test[2, ]
newy <- y_test[2]
new_X_test <- X_test[-c(1, 2), ]
new_y_test <- y_test[-c(1, 2)]
## Elapsed: 0.005 s
t0 <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE)
## $R_squared
## [1] -3.356623
##
## $R_squared_adj
## [1] 11.16545
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.950 5.030 6.374 6.369 8.774 11.528
##
## $Coverage_rate
## [1] 75
##
## $citests
## estimate lower upper p-value signif
## cyl 40.8981137 6.878148 74.9180798 2.494779e-02 *
## disp -0.7335494 -1.206939 -0.2601595 8.026181e-03 **
## hp -0.8233606 -2.198927 0.5522055 1.998737e-01
## drat 549.7206897 416.053783 683.3875968 2.570765e-05 ***
## wt -469.9351032 -535.877454 -403.9927527 6.344763e-07 ***
## qsec -116.6183871 -156.767393 -76.4693814 2.380078e-04 ***
## vs -194.4213942 -288.046178 -100.7966103 1.732503e-03 **
## am -395.7216847 -562.762331 -228.6810387 8.143911e-04 ***
## gear 53.0732573 -59.833653 165.9801679 3.030574e-01
## carb -25.9448064 -63.759959 11.8703467 1.487567e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 8
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 40.9 40.7 -40.5 23.9 56.3 69.9 77.8
## 2 disp -0.734 0.566 -1.64 -1.03 -0.571 -0.372 -0.139
## 3 hp -0.823 1.65 -3.99 -1.18 -0.974 -0.196 1.25
## 4 drat 550. 160. 170. 549. 606. 642. 643.
## 5 wt -470. 78.9 -543. -537. -489. -437. -336.
## 6 qsec -117. 48.0 -179. -143. -131. -99.1 -29.9
## 7 vs -194. 112. -377. -283. -162. -120. -46.3
## 8 am -396. 200. -719. -481. -357. -319. -67.7
## 9 gear 53.1 135. -143. -23.9 16.5 172. 231.
## 10 carb -25.9 45.2 -101. -48.8 -23.8 -9.36 45.7
## hist
## 1 ▂▂▂▁▇
## 2 ▅▁▂▇▅
## 3 ▂▁▇▂▃
## 4 ▁▁▁▁▇
## 5 ▇▅▂▁▅
## 6 ▂▇▂▂▂
## 7 ▂▅▂▇▂
## 8 ▃▁▇▂▂
## 9 ▂▅▅▁▇
## 10 ▂▅▇▁▅
## Elapsed: 0.177 s
t0 <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -3.356623
##
## $R_squared_adj
## [1] 11.16545
##
## $Residuals
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.950 5.030 6.374 6.369 8.774 11.528
##
## $Coverage_rate
## [1] 75
##
## $citests
## estimate lower upper p-value signif
## cyl 40.8981137 13.611534 65.5300212 1.761953e-02 *
## disp -0.7335494 -1.115045 -0.4043467 4.895727e-03 **
## hp -0.8233606 -1.910531 0.1687140 1.645431e-01
## drat 549.7206897 443.074920 626.9047838 7.440063e-06 ***
## wt -469.9351032 -516.601161 -418.4594690 3.026569e-07 ***
## qsec -116.6183871 -145.417748 -84.7362399 1.335105e-04 ***
## vs -194.4213942 -267.427325 -127.3499666 9.655321e-04 ***
## am -395.7216847 -515.026287 -273.6581298 3.580841e-04 ***
## gear 53.0732573 -37.486884 134.4552153 2.655631e-01
## carb -25.9448064 -53.884043 2.7424530 1.155572e-01
##
## $signif_codes
## [1] "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
##
## $effects
## ── Data Summary ────────────────────────
## Values
## Name effects
## Number of rows 8
## Number of columns 10
## _______________________
## Column type frequency:
## numeric 10
## ________________________
## Group variables None
##
## ── Variable type: numeric ──────────────────────────────────────────────────────
## skim_variable mean sd p0 p25 p50 p75 p100
## 1 cyl 40.9 40.7 -40.5 23.9 56.3 69.9 77.8
## 2 disp -0.734 0.566 -1.64 -1.03 -0.571 -0.372 -0.139
## 3 hp -0.823 1.65 -3.99 -1.18 -0.974 -0.196 1.25
## 4 drat 550. 160. 170. 549. 606. 642. 643.
## 5 wt -470. 78.9 -543. -537. -489. -437. -336.
## 6 qsec -117. 48.0 -179. -143. -131. -99.1 -29.9
## 7 vs -194. 112. -377. -283. -162. -120. -46.3
## 8 am -396. 200. -719. -481. -357. -319. -67.7
## 9 gear 53.1 135. -143. -23.9 16.5 172. 231.
## 10 carb -25.9 45.2 -101. -48.8 -23.8 -9.36 45.7
## hist
## 1 ▂▂▂▁▇
## 2 ▅▁▂▇▅
## 3 ▂▁▇▂▃
## 4 ▁▁▁▁▇
## 5 ▇▅▂▁▅
## 6 ▂▇▂▂▂
## 7 ▂▅▂▇▂
## 8 ▃▁▇▂▂
## 9 ▂▅▅▁▇
## 10 ▂▅▇▁▅
## Elapsed: 0.326 s
res <- obj$predict(X = new_X_test)
new_y_train <- c(y_train, y_test[c(1, 2)])
plot(c(new_y_train, res$preds), type='l',
main="",
ylab="",
ylim = c(min(c(res$upper, res$lower, y)),
max(c(res$upper, res$lower, y))))
lines(c(new_y_train, res$upper), col="gray60")
lines(c(new_y_train, res$lower), col="gray60")
lines(c(new_y_train, res$preds), col = "red")
lines(c(new_y_train, new_y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")
mean((new_y_test >= as.numeric(res$lower)) * (new_y_test <= as.numeric(res$upper)))
## [1] 0.75