library(learningmachine)
## Loading required package: randtoolbox
## Loading required package: rngWELL
## This is randtoolbox. For an overview, type 'help("randtoolbox")'.
## Loading required package: tseries
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Loading required package: memoise
## Loading required package: foreach
## Loading required package: skimr
## Loading required package: snow
## Loading required package: doSNOW
## Loading required package: iterators
## Loading required package: ggplot2
## Loading required package: lattice
X <- as.matrix(mtcars[,-1])
y <- mtcars$mpg
set.seed(123)
(index_train <- base::sample.int(n = nrow(X),
                                 size = floor(0.8*nrow(X)),
                                 replace = FALSE))
##  [1] 31 15 19 14  3 10 18 22 11  5 20 29 23 30  9 28  8 27  7 32 26 17  4  1 24
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 25 10
dim(X_test)
## [1]  7 10

0 - lm regression

obj <- learningmachine::Regressor$new(method = "lm", pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
obj$get_method()
## [1] "lm"
t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.01 s
print(sqrt(mean((obj$predict(X_test)$preds - y_test)^2)))
## [1] 3.548852
(res <- obj$predict(X = X_test))
## $preds
##       Mazda RX4 Wag             Valiant          Merc 450SE          Merc 450SL 
##            21.67584            19.80291            14.75149            15.70693 
## Lincoln Continental       Toyota Corona    Pontiac Firebird 
##            12.03666            28.20630            13.55241 
## 
## $lower
##       Mazda RX4 Wag             Valiant          Merc 450SE          Merc 450SL 
##           10.675844            8.802908            3.751488            4.706932 
## Lincoln Continental       Toyota Corona    Pontiac Firebird 
##            1.036659           17.206298            2.552412 
## 
## $upper
##       Mazda RX4 Wag             Valiant          Merc 450SE          Merc 450SL 
##            32.67584            30.80291            25.75149            26.70693 
## Lincoln Continental       Toyota Corona    Pontiac Firebird 
##            23.03666            39.20630            24.55241
plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$fit(X_train, y_train, 
        pi_method = "jackknifeplus")
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.06 s
obj$set_level(95L)

res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1

1 - ranger regression

obj <- learningmachine::Regressor$new(method = "ranger", pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  1.527 s
print(sqrt(mean((obj$predict(X_test)$preds - y_test)^2)))
## [1] 2.344976
t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.013 s
obj$set_level(95)

res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1

2 - KRR & ranger regression on Boston

# Boston dataset (dataset has an ethical problem)
library(MASS)
data("Boston")

set.seed(13)
train_idx <- sample(nrow(Boston), 0.8 * nrow(Boston))
X_train <- as.matrix(Boston[train_idx, -ncol(Boston)])
X_test <- as.matrix(Boston[-train_idx, -ncol(Boston)])
y_train <- Boston$medv[train_idx]
y_test <- Boston$medv[-train_idx]

KRR

obj <- learningmachine::Regressor$new(method = "krr", pi_method = "none")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
obj$get_method()
## [1] "krr"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.108 s
print(sqrt(mean((obj$predict(X_test) - y_test)^2)))
## [1] 2.888748
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] 0.906853
## 
## $R_squared_adj
## [1] 0.8930926
## 
## $Residuals
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -8.33671 -1.10461 -0.02411  0.12105  1.46980  9.29398 
## 
## $citests
##              estimate         lower         upper      p-value signif
## crim    -0.0544164530  -0.075529516 -0.0333033896 1.509961e-06    ***
## zn      -0.0046418100  -0.009605911  0.0003222915 6.652190e-02      .
## indus   -0.0245357110  -0.051187606  0.0021161842 7.077114e-02      .
## chas     7.1730075477   6.375361838  7.9706532575 5.512518e-33    ***
## nox     -9.4958030753 -12.095806303 -6.8957998474 8.811259e-11    ***
## rm       4.7080249286   3.939353604  5.4766962526 1.787394e-21    ***
## age     -0.0439718628  -0.053082559 -0.0348611667 7.779486e-16    ***
## dis     -1.4214523042  -1.573146091 -1.2697585171 2.257304e-34    ***
## rad      0.1810040336   0.155692359  0.2063157080 8.827967e-26    ***
## tax     -0.0115644823  -0.013066430 -0.0100625342 5.303780e-28    ***
## ptratio -0.4819300831  -0.582754790 -0.3811053766 1.242164e-15    ***
## black   -0.0002461991  -0.001967809  0.0014754112 7.772335e-01       
## lstat   -0.4091458985  -0.475209948 -0.3430818492 9.126116e-22    ***
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             102    
## Number of columns          13     
## _______________________           
## Column type frequency:            
##   numeric                  13     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable      mean       sd       p0       p25        p50      p75
##  1 crim          -0.0544    0.107    -0.416   -0.0696  -0.00478    0.00661
##  2 zn            -0.00464   0.0253   -0.0524  -0.0200  -0.00301    0.00416
##  3 indus         -0.0245    0.136    -0.308   -0.109   -0.0396     0.0220 
##  4 chas           7.17      4.06    -17.7      5.97     7.12       9.16   
##  5 nox           -9.50     13.2     -78.3    -15.0     -7.32      -2.30   
##  6 rm             4.71      3.91     -3.43     1.83     4.48       7.72   
##  7 age           -0.0440    0.0464   -0.138   -0.0778  -0.0506    -0.00674
##  8 dis           -1.42      0.772    -3.43    -1.80    -1.32      -0.933  
##  9 rad            0.181     0.129    -0.0944   0.0827   0.173      0.261  
## 10 tax           -0.0116    0.00765  -0.0388  -0.0148  -0.00963   -0.00718
## 11 ptratio       -0.482     0.513    -2.13    -0.671   -0.441     -0.209  
## 12 black         -0.000246  0.00877  -0.0263  -0.00450  0.0000316  0.00344
## 13 lstat         -0.409     0.336    -1.62    -0.474   -0.310     -0.189  
##        p100 hist 
##  1  0.107   ▁▁▂▇▅
##  2  0.0728  ▃▇▇▂▁
##  3  0.499   ▂▇▂▁▁
##  4 14.8     ▁▁▁▇▅
##  5 15.5     ▁▁▁▇▃
##  6 12.4     ▃▇▇▇▅
##  7  0.0628  ▂▇▆▃▂
##  8  0.0716  ▂▂▇▇▃
##  9  0.492   ▂▇▇▃▂
## 10  0.00304 ▁▁▃▇▂
## 11  1.01    ▁▁▇▃▁
## 12  0.0391  ▁▇▇▁▁
## 13  0.0311  ▁▁▂▆▇
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  1.01 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] 0.906853
## 
## $R_squared_adj
## [1] 0.8930926
## 
## $Residuals
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -8.33671 -1.10461 -0.02411  0.12105  1.46980  9.29398 
## 
## $citests
##              estimate         lower        upper      p-value signif
## crim    -0.0544164530  -0.077388205 -0.033868739 3.636383e-06    ***
## zn      -0.0046418100  -0.008950983  0.000193778 4.932471e-02      *
## indus   -0.0245357110  -0.049120109  0.000544111 5.559257e-02      .
## chas     7.1730075477   6.366223778  7.870788737 1.484058e-34    ***
## nox     -9.4958030753 -12.303031437 -7.145086697 1.011413e-10    ***
## rm       4.7080249286   3.856816287  5.491698401 1.329440e-19    ***
## age     -0.0439718628  -0.053220093 -0.035161838 9.023576e-16    ***
## dis     -1.4214523042  -1.569696745 -1.285297810 3.493404e-36    ***
## rad      0.1810040336   0.155700757  0.206461984 2.383885e-25    ***
## tax     -0.0115644823  -0.013086037 -0.010031979 3.950118e-27    ***
## ptratio -0.4819300831  -0.582929860 -0.390711993 2.146847e-16    ***
## black   -0.0002461991  -0.001876356  0.001546006 7.785258e-01       
## lstat   -0.4091458985  -0.478128045 -0.343430510 6.000266e-21    ***
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             102    
## Number of columns          13     
## _______________________           
## Column type frequency:            
##   numeric                  13     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable      mean       sd       p0       p25        p50      p75
##  1 crim          -0.0544    0.107    -0.416   -0.0696  -0.00478    0.00661
##  2 zn            -0.00464   0.0253   -0.0524  -0.0200  -0.00301    0.00416
##  3 indus         -0.0245    0.136    -0.308   -0.109   -0.0396     0.0220 
##  4 chas           7.17      4.06    -17.7      5.97     7.12       9.16   
##  5 nox           -9.50     13.2     -78.3    -15.0     -7.32      -2.30   
##  6 rm             4.71      3.91     -3.43     1.83     4.48       7.72   
##  7 age           -0.0440    0.0464   -0.138   -0.0778  -0.0506    -0.00674
##  8 dis           -1.42      0.772    -3.43    -1.80    -1.32      -0.933  
##  9 rad            0.181     0.129    -0.0944   0.0827   0.173      0.261  
## 10 tax           -0.0116    0.00765  -0.0388  -0.0148  -0.00963   -0.00718
## 11 ptratio       -0.482     0.513    -2.13    -0.671   -0.441     -0.209  
## 12 black         -0.000246  0.00877  -0.0263  -0.00450  0.0000316  0.00344
## 13 lstat         -0.409     0.336    -1.62    -0.474   -0.310     -0.189  
##        p100 hist 
##  1  0.107   ▁▁▂▇▅
##  2  0.0728  ▃▇▇▂▁
##  3  0.499   ▂▇▂▁▁
##  4 14.8     ▁▁▁▇▅
##  5 15.5     ▁▁▁▇▃
##  6 12.4     ▃▇▇▇▅
##  7  0.0628  ▂▇▆▃▂
##  8  0.0716  ▂▂▇▇▃
##  9  0.492   ▂▇▇▃▂
## 10  0.00304 ▁▁▃▇▂
## 11  1.01    ▁▁▇▃▁
## 12  0.0391  ▁▇▇▁▁
## 13  0.0311  ▁▁▂▆▇
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  1.556 s

ranger

obj <- learningmachine::Regressor$new(method = "ranger", pi_method="splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.093 s
print(sqrt(mean((obj$predict(X_test)$preds - y_test)^2)))
## [1] 3.79469
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] 0.8392681
## 
## $R_squared_adj
## [1] 0.8155236
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -9.1590 -1.7302 -0.2022  0.4068  2.0991 12.6480 
## 
## $Coverage_rate
## [1] 100
## 
## $citests
##              estimate         lower         upper      p-value signif
## crim     -46.07120224 -210.45915529  118.31675081 5.794688e-01       
## zn        -0.01770371   -0.20552612    0.17011870 8.520499e-01       
## indus    -15.27494058  -26.85443276   -3.69544841 1.023790e-02      *
## chas       0.00000000           NaN           NaN          NaN       
## nox     -521.68056125 -688.90286706 -354.45825544 1.310563e-08    ***
## rm       275.37923329  224.35153031  326.40693626 2.514291e-18    ***
## age       -1.75704619   -2.46954300   -1.04454938 3.780961e-06    ***
## dis       20.57598690   -4.55485374   45.70682754 1.074525e-01       
## rad        2.13991832    0.02367618    4.25616046 4.753842e-02      *
## tax       -0.60978051   -0.79942682   -0.42013420 5.448371e-09    ***
## ptratio  -32.87462570  -38.77698867  -26.97226272 4.440300e-19    ***
## black     -0.26384711   -0.49997135   -0.02772287 2.889086e-02      *
## lstat    -44.64193590  -57.99309865  -31.29077315 1.652805e-09    ***
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             102    
## Number of columns          13     
## _______________________           
## Column type frequency:            
##   numeric                  13     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable      mean      sd       p0      p25       p50     p75     p100
##  1 crim           -46.1    837.    -5846.    -80.5     -4.90   114.    2107.   
##  2 zn              -0.0177   0.956    -4.47    0        0        0        5.22 
##  3 indus          -15.3     59.0    -299.     -4.20     1.66     8.67    41.1  
##  4 chas             0        0         0       0        0        0        0    
##  5 nox           -522.     851.    -3931.   -965.    -334.       4.36   852.   
##  6 rm             275.     260.      -11.5    73.4    180.     378.     870.   
##  7 age             -1.76     3.63    -26.2    -2.88    -1.16     0        5.56 
##  8 dis             20.6    128.     -234.    -33.8     -2.58    12.7    636.   
##  9 rad              2.14    10.8      -6.31    0        0        0.638   91.8  
## 10 tax             -0.610    0.966    -3.85   -0.621   -0.233   -0.116    0.525
## 11 ptratio        -32.9     30.0    -140.    -44.7    -26.5    -13.8      4.66 
## 12 black           -0.264    1.20     -4.11   -1.07     0.0418   0.521    2.22 
## 13 lstat          -44.6     68.0    -335.    -56.4    -21.1     -5.23    45.9  
##    hist 
##  1 ▁▁▁▇▁
##  2 ▁▁▇▁▁
##  3 ▁▁▁▁▇
##  4 ▁▁▇▁▁
##  5 ▁▁▃▇▇
##  6 ▇▅▁▂▂
##  7 ▁▁▁▇▆
##  8 ▁▇▁▁▁
##  9 ▇▁▁▁▁
## 10 ▁▁▁▃▇
## 11 ▁▁▂▇▇
## 12 ▁▂▃▇▂
## 13 ▁▁▁▃▇
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.92 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] 0.8392681
## 
## $R_squared_adj
## [1] 0.8155236
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -9.1590 -1.7302 -0.2022  0.4068  2.0991 12.6480 
## 
## $Coverage_rate
## [1] 100
## 
## $citests
##              estimate        lower         upper      p-value signif
## crim     -46.07120224 -226.0963824   86.64772635 5.649169e-01       
## zn        -0.01770371   -0.1911345    0.16043942 8.439189e-01       
## indus    -15.27494058  -27.0066412   -4.76012553 8.326543e-03     **
## chas       0.00000000    0.0000000    0.00000000          NaN       
## nox     -521.68056125 -694.5379226 -351.49586075 3.697818e-08    ***
## rm       275.37923329  225.1057152  332.38712155 6.551598e-17    ***
## age       -1.75704619   -2.5409652   -1.08938031 6.883580e-06    ***
## dis       20.57598690   -3.0778308   44.65777193 9.417985e-02      .
## rad        2.13991832    0.4743989    4.36379845 3.340076e-02      *
## tax       -0.60978051   -0.7988964   -0.42355307 5.710666e-09    ***
## ptratio  -32.87462570  -38.9972525  -27.71671691 6.732061e-20    ***
## black     -0.26384711   -0.4737498   -0.03739985 1.967444e-02      *
## lstat    -44.64193590  -57.9757644  -32.27758919 7.154857e-10    ***
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             102    
## Number of columns          13     
## _______________________           
## Column type frequency:            
##   numeric                  13     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable      mean      sd       p0      p25       p50     p75     p100
##  1 crim           -46.1    837.    -5846.    -80.5     -4.90   114.    2107.   
##  2 zn              -0.0177   0.956    -4.47    0        0        0        5.22 
##  3 indus          -15.3     59.0    -299.     -4.20     1.66     8.67    41.1  
##  4 chas             0        0         0       0        0        0        0    
##  5 nox           -522.     851.    -3931.   -965.    -334.       4.36   852.   
##  6 rm             275.     260.      -11.5    73.4    180.     378.     870.   
##  7 age             -1.76     3.63    -26.2    -2.88    -1.16     0        5.56 
##  8 dis             20.6    128.     -234.    -33.8     -2.58    12.7    636.   
##  9 rad              2.14    10.8      -6.31    0        0        0.638   91.8  
## 10 tax             -0.610    0.966    -3.85   -0.621   -0.233   -0.116    0.525
## 11 ptratio        -32.9     30.0    -140.    -44.7    -26.5    -13.8      4.66 
## 12 black           -0.264    1.20     -4.11   -1.07     0.0418   0.521    2.22 
## 13 lstat          -44.6     68.0    -335.    -56.4    -21.1     -5.23    45.9  
##    hist 
##  1 ▁▁▁▇▁
##  2 ▁▁▇▁▁
##  3 ▁▁▁▁▇
##  4 ▁▁▇▁▁
##  5 ▁▁▃▇▇
##  6 ▇▅▁▂▂
##  7 ▁▁▁▇▆
##  8 ▁▇▁▁▁
##  9 ▇▁▁▁▁
## 10 ▁▁▁▃▇
## 11 ▁▁▂▇▇
## 12 ▁▂▃▇▂
## 13 ▁▁▁▃▇
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  1.433 s

3 - KRR regression on mtcars

X <- as.matrix(mtcars[,-1])
y <- mtcars$mpg

set.seed(123)
(index_train <- base::sample.int(n = nrow(X),
                                 size = floor(0.7*nrow(X)),
                                 replace = FALSE))
##  [1] 31 15 19 14  3 10 18 22 11  5 20 29 23 30  9 28  8 27  7 32 26 17
X_train <- X[index_train, ]
y_train <- y[index_train]
X_test <- X[-index_train, ]
y_test <- y[-index_train]
dim(X_train)
## [1] 22 10
dim(X_test)
## [1] 10 10
obj <- learningmachine::Regressor$new(method = "krr", pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.005 s
print(obj$predict(X_test))
## $preds
##  [1] 22.151349 21.802194 12.541365 10.124759 13.408181 14.155816  7.421184
##  [8] 16.879536 13.615153 12.749565
## 
## $lower
##  [1] 12.1513495 11.8021941  2.5413650  0.1247588  3.4081805  4.1558157
##  [7] -2.5788160  6.8795365  3.6151533  2.7495651
## 
## $upper
##  [1] 32.15135 31.80219 22.54137 20.12476 23.40818 24.15582 17.42118 26.87954
##  [9] 23.61515 22.74957
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.01 s
obj$set_level(95)
obj$set_pi_method("splitconformal")
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.006 s
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] -0.8614864
## 
## $R_squared_adj
## [1] 17.75338
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.1513  0.5083  3.0680  3.4751  5.9929  8.8586 
## 
## $Coverage_rate
## [1] 100
## 
## $citests
##           estimate        lower        upper      p-value signif
## cyl   -23.98943109  -46.4975109   -1.4813513 3.918219e-02      *
## disp   -0.61133395   -0.9655770   -0.2570909 3.597927e-03     **
## hp     -0.07828878   -0.3785573    0.2219797 5.698268e-01       
## drat  310.94399534  160.4146969  461.4732937 1.163859e-03     **
## wt   -197.39979731 -240.1776661 -154.6219286 2.500030e-06    ***
## qsec  -19.50660485  -54.1139966   15.1007869 2.342132e-01       
## vs     69.84795566  -85.8899529  225.5858643 3.368080e-01       
## am    137.97019623   -0.2148915  276.1552839 5.028830e-02      .
## gear  191.57905165  134.3446800  248.8134233 3.424783e-05    ***
## carb    3.39227959  -22.2875140   29.0720732 7.718555e-01       
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             10     
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable      mean      sd      p0      p25       p50      p75      p100
##  1 cyl            -24.0     31.5    -64.0   -40.9    -34.1      -0.849   37.6   
##  2 disp            -0.611    0.495   -1.66   -0.934   -0.429    -0.307   -0.0817
##  3 hp              -0.0783   0.420   -1.00   -0.218   -0.0402    0.235    0.359 
##  4 drat           311.     210.    -159.    195.     369.      464.     534.    
##  5 wt            -197.      59.8   -280.   -252.    -196.     -144.    -124.    
##  6 qsec           -19.5     48.4    -73.4   -60.0    -29.3      12.8     60.3   
##  7 vs              69.8    218.    -218.   -104.      86.1     103.     421.    
##  8 am             138.     193.    -161.     99.8    162.      201.     516.    
##  9 gear           192.      80.0     74.7   142.     178.      224.     367.    
## 10 carb             3.39    35.9    -56.3    -6.54     3.71     36.0     41.3   
##    hist 
##  1 ▃▇▂▃▂
##  2 ▂▂▂▆▇
##  3 ▂▁▆▃▇
##  4 ▂▁▆▃▇
##  5 ▇▁▇▂▇
##  6 ▇▇▂▂▅
##  7 ▆▂▇▁▃
##  8 ▂▁▇▁▁
##  9 ▂▇▃▂▂
## 10 ▃▁▆▂▇
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.216 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -0.8614864
## 
## $R_squared_adj
## [1] 17.75338
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.1513  0.5083  3.0680  3.4751  5.9929  8.8586 
## 
## $Coverage_rate
## [1] 100
## 
## $citests
##           estimate        lower        upper      p-value signif
## cyl   -23.98943109  -40.5922255   -6.2447901 2.292950e-02      *
## disp   -0.61133395   -0.9172916   -0.3424231 2.416713e-03     **
## hp     -0.07828878   -0.3397646    0.1496909 5.462365e-01       
## drat  310.94399534  179.0656056  417.8607418 6.414323e-04    ***
## wt   -197.39979731 -231.8809681 -164.5228586 1.115553e-06    ***
## qsec  -19.50660485  -44.6930749    9.1315753 1.891311e-01       
## vs     69.84795566  -48.5034409  190.0256695 2.806199e-01       
## am    137.97019623   27.1241565  250.3954442 3.846436e-02      *
## gear  191.57905165  147.4748755  241.7819503 2.296739e-05    ***
## carb    3.39227959  -17.8115551   22.1933920 7.472011e-01       
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             10     
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable      mean      sd      p0      p25       p50      p75      p100
##  1 cyl            -24.0     31.5    -64.0   -40.9    -34.1      -0.849   37.6   
##  2 disp            -0.611    0.495   -1.66   -0.934   -0.429    -0.307   -0.0817
##  3 hp              -0.0783   0.420   -1.00   -0.218   -0.0402    0.235    0.359 
##  4 drat           311.     210.    -159.    195.     369.      464.     534.    
##  5 wt            -197.      59.8   -280.   -252.    -196.     -144.    -124.    
##  6 qsec           -19.5     48.4    -73.4   -60.0    -29.3      12.8     60.3   
##  7 vs              69.8    218.    -218.   -104.      86.1     103.     421.    
##  8 am             138.     193.    -161.     99.8    162.      201.     516.    
##  9 gear           192.      80.0     74.7   142.     178.      224.     367.    
## 10 carb             3.39    35.9    -56.3    -6.54     3.71     36.0     41.3   
##    hist 
##  1 ▃▇▂▃▂
##  2 ▂▂▂▆▇
##  3 ▂▁▆▃▇
##  4 ▂▁▆▃▇
##  5 ▇▁▇▂▇
##  6 ▇▇▂▂▅
##  7 ▆▂▇▁▃
##  8 ▂▁▇▁▁
##  9 ▂▇▃▂▂
## 10 ▃▁▆▂▇
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.337 s
obj$set_pi_method("kdejackknifeplus")
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.1)
##   |                                                                              |                                                                      |   0%  |                                                                              |===                                                                   |   5%  |                                                                              |=======                                                               |  10%  |                                                                              |==========                                                            |  14%  |                                                                              |=============                                                         |  19%  |                                                                              |=================                                                     |  24%  |                                                                              |====================                                                  |  29%  |                                                                              |=======================                                               |  33%  |                                                                              |===========================                                           |  38%  |                                                                              |==============================                                        |  43%  |                                                                              |=================================                                     |  48%  |                                                                              |=====================================                                 |  52%  |                                                                              |========================================                              |  57%  |                                                                              |===========================================                           |  62%  |                                                                              |===============================================                       |  67%  |                                                                              |==================================================                    |  71%  |                                                                              |=====================================================                 |  76%  |                                                                              |=========================================================             |  81%  |                                                                              |============================================================          |  86%  |                                                                              |===============================================================       |  90%  |                                                                              |===================================================================   |  95%  |                                                                              |======================================================================| 100%
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.023 s
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] -3.076311
## 
## $R_squared_adj
## [1] 37.6868
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.660   6.009   6.743   6.944   7.966  10.753 
## 
## $Coverage_rate
## [1] 100
## 
## $citests
##          estimate        lower       upper      p-value signif
## cyl   -36.7817740  -50.6623622 -22.9011858 2.038888e-04    ***
## disp   -0.2133047   -0.6537255   0.2271161 3.017009e-01       
## hp     -0.2920633   -0.8288709   0.2447443 2.495994e-01       
## drat  259.9789584  141.9967301 377.9611867 7.545111e-04    ***
## wt   -125.6032827 -159.9084338 -91.2981317 1.675653e-05    ***
## qsec    6.1547882  -22.2053730  34.5149494 6.352182e-01       
## vs     35.1176737  -92.7670167 163.0023641 5.498770e-01       
## am     85.8109695  -32.8651723 204.4871113 1.363325e-01       
## gear  264.4099446  185.2573493 343.5625399 3.479849e-05    ***
## carb  -24.1859057  -56.7206453   8.3488338 1.269314e-01       
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             10     
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable     mean      sd      p0      p25        p50      p75    p100
##  1 cyl            -36.8    19.4    -52.8   -48.3    -40.9      -37.7    13.0  
##  2 disp            -0.213   0.616   -1.19   -0.561   -0.212     -0.146   0.781
##  3 hp              -0.292   0.750   -1.73   -0.461    0.00750    0.135   0.531
##  4 drat           260.    165.     -74.7   161.     265.       381.    485.   
##  5 wt            -126.     48.0   -202.   -152.    -125.      -105.    -30.9  
##  6 qsec             6.15   39.6    -55.5   -20.7     -1.84      33.6    64.2  
##  7 vs              35.1   179.    -232.   -108.      68.7       95.1   292.   
##  8 am              85.8   166.    -205.     94.4    122.       144.    346.   
##  9 gear           264.    111.     122.    206.     242.       301.    529.   
## 10 carb           -24.2    45.5    -73.7   -54.9    -47.2       22.6    39.5  
##    hist 
##  1 ▇▅▂▁▂
##  2 ▂▃▇▁▃
##  3 ▃▁▂▇▆
##  4 ▂▃▁▇▆
##  5 ▃▂▇▂▂
##  6 ▂▇▅▅▅
##  7 ▇▁▇▅▅
##  8 ▃▁▇▆▂
##  9 ▇▇▇▁▂
## 10 ▇▇▁▂▇
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  1.093 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -3.076311
## 
## $R_squared_adj
## [1] 37.6868
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.660   6.009   6.743   6.944   7.966  10.753 
## 
## $Coverage_rate
## [1] 100
## 
## $citests
##          estimate        lower       upper      p-value signif
## cyl   -36.7817740  -45.3808553 -26.1949547 3.635150e-05    ***
## disp   -0.2133047   -0.5752521   0.1492085 2.781632e-01       
## hp     -0.2920633   -0.7217967   0.1034058 1.987160e-01       
## drat  259.9789584  162.2017490 343.4030751 3.241203e-04    ***
## wt   -125.6032827 -152.6736671 -98.7351447 7.604780e-06    ***
## qsec    6.1547882  -14.7007260  29.0726634 5.949372e-01       
## vs     35.1176737  -62.1816637 130.3916175 4.928333e-01       
## am     85.8109695  -10.0596731 174.5354755 1.017444e-01       
## gear  264.4099446  208.5642717 335.6537285 1.897459e-05    ***
## carb  -24.1859057  -50.4118778   2.2305024 1.052301e-01       
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             10     
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable     mean      sd      p0      p25        p50      p75    p100
##  1 cyl            -36.8    19.4    -52.8   -48.3    -40.9      -37.7    13.0  
##  2 disp            -0.213   0.616   -1.19   -0.561   -0.212     -0.146   0.781
##  3 hp              -0.292   0.750   -1.73   -0.461    0.00750    0.135   0.531
##  4 drat           260.    165.     -74.7   161.     265.       381.    485.   
##  5 wt            -126.     48.0   -202.   -152.    -125.      -105.    -30.9  
##  6 qsec             6.15   39.6    -55.5   -20.7     -1.84      33.6    64.2  
##  7 vs              35.1   179.    -232.   -108.      68.7       95.1   292.   
##  8 am              85.8   166.    -205.     94.4    122.       144.    346.   
##  9 gear           264.    111.     122.    206.     242.       301.    529.   
## 10 carb           -24.2    45.5    -73.7   -54.9    -47.2       22.6    39.5  
##    hist 
##  1 ▇▅▂▁▂
##  2 ▂▃▇▁▃
##  3 ▃▁▂▇▆
##  4 ▂▃▁▇▆
##  5 ▃▂▇▂▂
##  6 ▂▇▅▅▅
##  7 ▇▁▇▅▅
##  8 ▃▁▇▆▂
##  9 ▇▇▇▁▂
## 10 ▇▇▁▂▇
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.914 s

xgboost

obj <- learningmachine::Regressor$new(method = "xgboost", pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.485 s
print(obj$predict(X_test))
## $preds
##  [1] 18.13500 18.13500 17.13105 17.13105 14.64118 14.64118 14.03685 21.29947
##  [9] 15.33300 14.03685
## 
## $lower
##  [1] 12.135002 12.135002 11.131052 11.131052  8.641179  8.641179  8.036854
##  [8] 15.299475  9.333004  8.036854
## 
## $upper
##  [1] 24.13500 24.13500 23.13105 23.13105 20.64118 20.64118 20.03685 27.29947
##  [9] 21.33300 20.03685
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] 0.2881145
## 
## $R_squared_adj
## [1] 7.406969
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -3.6369  0.3926  2.2088  1.5079  2.8650  5.1631 
## 
## $Coverage_rate
## [1] 100
## 
## $citests
##        estimate       lower      upper   p-value signif
## cyl   0.0000000         NaN        NaN       NaN       
## disp -0.1859971  -0.6067516  0.2347575 0.3434364       
## hp    0.0000000         NaN        NaN       NaN       
## drat 28.9866074 -18.4823056 76.4555203 0.2004909       
## wt    0.0000000         NaN        NaN       NaN       
## qsec -1.7295559  -5.6420830  2.1829713 0.3434364       
## vs    0.0000000         NaN        NaN       NaN       
## am    0.0000000         NaN        NaN       NaN       
## gear  0.0000000         NaN        NaN       NaN       
## carb  0.0000000         NaN        NaN       NaN       
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             10     
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable   mean     sd     p0 p25 p50 p75 p100 hist 
##  1 cyl            0      0       0      0   0   0   0  ▁▁▇▁▁
##  2 disp          -0.186  0.588  -1.86   0   0   0   0  ▁▁▁▁▇
##  3 hp             0      0       0      0   0   0   0  ▁▁▇▁▁
##  4 drat          29.0   66.4     0      0   0   0 200. ▇▁▁▁▁
##  5 wt             0      0       0      0   0   0   0  ▁▁▇▁▁
##  6 qsec          -1.73   5.47  -17.3    0   0   0   0  ▁▁▁▁▇
##  7 vs             0      0       0      0   0   0   0  ▁▁▇▁▁
##  8 am             0      0       0      0   0   0   0  ▁▁▇▁▁
##  9 gear           0      0       0      0   0   0   0  ▁▁▇▁▁
## 10 carb           0      0       0      0   0   0   0  ▁▁▇▁▁
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.385 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] 0.2881145
## 
## $R_squared_adj
## [1] 7.406969
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -3.6369  0.3926  2.2088  1.5079  2.8650  5.1631 
## 
## $Coverage_rate
## [1] 100
## 
## $citests
##        estimate      lower    upper   p-value signif
## cyl   0.0000000  0.0000000  0.00000       NaN       
## disp -0.1859971 -0.5579912  0.00000 0.2237346       
## hp    0.0000000  0.0000000  0.00000       NaN       
## drat 28.9866074  0.0000000 77.01909 0.1742390       
## wt    0.0000000  0.0000000  0.00000       NaN       
## qsec -1.7295559 -5.1886676  0.00000 0.2237346       
## vs    0.0000000  0.0000000  0.00000       NaN       
## am    0.0000000  0.0000000  0.00000       NaN       
## gear  0.0000000  0.0000000  0.00000       NaN       
## carb  0.0000000  0.0000000  0.00000       NaN       
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             10     
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable   mean     sd     p0 p25 p50 p75 p100 hist 
##  1 cyl            0      0       0      0   0   0   0  ▁▁▇▁▁
##  2 disp          -0.186  0.588  -1.86   0   0   0   0  ▁▁▁▁▇
##  3 hp             0      0       0      0   0   0   0  ▁▁▇▁▁
##  4 drat          29.0   66.4     0      0   0   0 200. ▇▁▁▁▁
##  5 wt             0      0       0      0   0   0   0  ▁▁▇▁▁
##  6 qsec          -1.73   5.47  -17.3    0   0   0   0  ▁▁▁▁▇
##  7 vs             0      0       0      0   0   0   0  ▁▁▇▁▁
##  8 am             0      0       0      0   0   0   0  ▁▁▇▁▁
##  9 gear           0      0       0      0   0   0   0  ▁▁▇▁▁
## 10 carb           0      0       0      0   0   0   0  ▁▁▇▁▁
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.378 s
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.011 s
obj$set_level(95)
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
obj$set_pi_method("kdesplitconformal")
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.019 s
obj$set_level(95)
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1
obj$set_pi_method("bootjackknifeplus")
t0 <- proc.time()[3]
obj$fit(X_train, y_train, nrounds=10, verbose=FALSE)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.007 s
obj$set_level(95)
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1

4 - RVFL regression

obj <- learningmachine::Regressor$new(method = "rvfl", 
                                      nb_hidden = 50L,
                                      pi_method = "splitconformal")
obj$get_type()
## [1] "regression"
obj$get_name()
## [1] "Regressor"
t0 <- proc.time()[3]
obj$fit(X_train, y_train, reg_lambda = 0.01)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.053 s
print(obj$predict(X_test))
## $preds
##           Mazda RX4       Mazda RX4 Wag      Hornet 4 Drive             Valiant 
##           21.350888           19.789387           13.106761            9.695310 
##          Merc 450SE          Merc 450SL Lincoln Continental       Toyota Corona 
##           11.131161           12.568682            2.044672           19.289805 
##          Camaro Z28    Pontiac Firebird 
##           14.847878           12.282272 
## 
## $lower
##           Mazda RX4       Mazda RX4 Wag      Hornet 4 Drive             Valiant 
##          12.3508879          10.7893873           4.1067608           0.6953102 
##          Merc 450SE          Merc 450SL Lincoln Continental       Toyota Corona 
##           2.1311611           3.5686817          -6.9553279          10.2898053 
##          Camaro Z28    Pontiac Firebird 
##           5.8478777           3.2822719 
## 
## $upper
##           Mazda RX4       Mazda RX4 Wag      Hornet 4 Drive             Valiant 
##            30.35089            28.78939            22.10676            18.69531 
##          Merc 450SE          Merc 450SL Lincoln Continental       Toyota Corona 
##            20.13116            21.56868            11.04467            28.28981 
##          Camaro Z28    Pontiac Firebird 
##            23.84788            21.28227
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE)
## $R_squared
## [1] -1.505856
## 
## $R_squared_adj
## [1] 23.55271
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -1.548   1.461   5.000   4.349   7.949   8.405 
## 
## $Coverage_rate
## [1] 100
## 
## $citests
##         estimate       lower        upper      p-value signif
## cyl   137.649985   39.777048  235.5229227 1.115728e-02      *
## disp   -2.406399   -4.650678   -0.1621204 3.825959e-02      *
## hp     -0.527573   -1.402043    0.3468975 2.054686e-01       
## drat  707.372951  246.095138 1168.6507638 7.059500e-03     **
## wt   -500.429007 -565.047979 -435.8100352 2.910469e-08    ***
## qsec  -89.930939 -124.899691  -54.9621860 2.537870e-04    ***
## vs    234.198406 -127.886990  596.2838006 1.774484e-01       
## am   -235.789718 -512.422513   40.8430776 8.592503e-02      .
## gear   52.646721   -6.640614  111.9340567 7.547657e-02      .
## carb  -17.100561  -87.819649   53.6185270 5.976705e-01       
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             10     
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable     mean     sd      p0      p25      p50       p75     p100
##  1 cyl            138.    137.     -8.40   75.8     91.1     98.6     394.   
##  2 disp            -2.41    3.14   -8.46   -1.32    -1.08    -0.775    -0.300
##  3 hp              -0.528   1.22   -3.40   -0.695   -0.188    0.0137    0.893
##  4 drat           707.    645.     55.7   388.     482.     563.     1939.   
##  5 wt            -500.     90.3  -698.   -538.    -500.    -458.     -377.   
##  6 qsec           -89.9    48.9  -145.   -128.    -102.     -64.0       2.67 
##  7 vs             234.    506.   -121.    -13.2     36.8     53.2    1269.   
##  8 am            -236.    387.   -653.   -450.    -397.    -168.      519.   
##  9 gear            52.6    82.9  -107.     -4.69    66.2    112.      170.   
## 10 carb           -17.1    98.9  -117.    -64.6    -60.6    -17.5     171.   
##    hist 
##  1 ▂▇▁▁▂
##  2 ▂▁▁▁▇
##  3 ▁▁▁▇▂
##  4 ▅▇▁▁▃
##  5 ▂▁▆▇▃
##  6 ▇▆▁▂▃
##  7 ▇▁▁▁▂
##  8 ▆▇▂▁▃
##  9 ▂▅▅▅▇
## 10 ▇▂▁▁▂
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.819 s
t0 <- proc.time()[3]
obj$summary(X_test, y=y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -1.505856
## 
## $R_squared_adj
## [1] 23.55271
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -1.548   1.461   5.000   4.349   7.949   8.405 
## 
## $Coverage_rate
## [1] 100
## 
## $citests
##         estimate       lower         upper      p-value signif
## cyl   137.649985   67.927710  224.49681122 7.317784e-03     **
## disp   -2.406399   -4.543447   -0.83146440 3.164812e-02      *
## hp     -0.527573   -1.228274    0.08917815 1.509239e-01       
## drat  707.372951  379.807480 1129.02366858 4.912929e-03     **
## wt   -500.429007 -557.338124 -452.13988493 1.682493e-08    ***
## qsec  -89.930939 -115.877565  -59.82831064 1.427513e-04    ***
## vs    234.198406  -17.180072  581.09310604 1.592806e-01       
## am   -235.789718 -437.517547   13.80315000 7.084922e-02      .
## gear   52.646721    6.427697   94.54115818 4.386846e-02      *
## carb  -17.100561  -69.147781   45.17009752 5.720359e-01       
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             10     
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable     mean     sd      p0      p25      p50       p75     p100
##  1 cyl            138.    137.     -8.40   75.8     91.1     98.6     394.   
##  2 disp            -2.41    3.14   -8.46   -1.32    -1.08    -0.775    -0.300
##  3 hp              -0.528   1.22   -3.40   -0.695   -0.188    0.0137    0.893
##  4 drat           707.    645.     55.7   388.     482.     563.     1939.   
##  5 wt            -500.     90.3  -698.   -538.    -500.    -458.     -377.   
##  6 qsec           -89.9    48.9  -145.   -128.    -102.     -64.0       2.67 
##  7 vs             234.    506.   -121.    -13.2     36.8     53.2    1269.   
##  8 am            -236.    387.   -653.   -450.    -397.    -168.      519.   
##  9 gear            52.6    82.9  -107.     -4.69    66.2    112.      170.   
## 10 carb           -17.1    98.9  -117.    -64.6    -60.6    -17.5     171.   
##    hist 
##  1 ▂▇▁▁▂
##  2 ▂▁▁▁▇
##  3 ▁▁▁▇▂
##  4 ▅▇▁▁▃
##  5 ▂▁▆▇▃
##  6 ▇▆▁▂▃
##  7 ▇▁▁▁▂
##  8 ▆▇▂▁▃
##  9 ▂▅▅▅▇
## 10 ▇▂▁▁▂
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.78 s
t0 <- proc.time()[3]
obj$fit(X_train, y_train)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.008 s
obj$set_level(95)
res <- obj$predict(X = X_test)

plot(c(y_train, res$preds), type='l',
     main="",
     ylab="",
     ylim = c(min(c(res$upper, res$lower, y)),
              max(c(res$upper, res$lower, y))))
lines(c(y_train, res$upper), col="gray60")
lines(c(y_train, res$lower), col="gray60")
lines(c(y_train, res$preds), col = "red")
lines(c(y_train, y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")

mean((y_test >= as.numeric(res$lower)) * (y_test <= as.numeric(res$upper)))
## [1] 1

update RVFL model

previous_coefs <- drop(obj$model$coef)
newx <- X_test[1, ]
newy <- y_test[1]

new_X_test <- X_test[-1, ]
new_y_test <- y_test[-1]

t0 <- proc.time()[3]
obj$update(newx, newy)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.008 s
summary(previous_coefs)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.68212 -0.26567 -0.05157  0.00700  0.21046  2.19222
summary(drop(obj$model$coef) - previous_coefs)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.030666 -0.002610  0.004189  0.002917  0.011386  0.025243
plot(drop(obj$model$coef) - previous_coefs, type='l')
abline(h = 0, lty=2, col="red")

start <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE)
## $R_squared
## [1] -1.809339
## 
## $R_squared_adj
## [1] 12.23735
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -1.168   2.513   5.541   5.058   8.185   8.703 
## 
## $Coverage_rate
## [1] 100
## 
## $citests
##          estimate       lower        upper      p-value signif
## cyl   111.6701473   17.076928  206.2633669 2.615518e-02      *
## disp   -1.7983224   -3.876380    0.2797349 8.106884e-02      .
## hp     -0.4167545   -1.501658    0.6681495 4.015523e-01       
## drat  569.9102780  148.862037  990.9585186 1.420088e-02      *
## wt   -504.1496696 -583.757006 -424.5423330 4.741273e-07    ***
## qsec -107.9102921 -138.571336  -77.2492482 3.936777e-05    ***
## vs    145.0280002 -173.164419  463.2204193 3.239468e-01       
## am   -319.6910568 -566.618653  -72.7634604 1.745263e-02      *
## gear   57.7630332  -18.934712  134.4607782 1.206459e-01       
## carb  -42.9572292 -108.690903   22.7764447 1.702409e-01       
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             9      
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable     mean     sd      p0      p25       p50       p75     p100
##  1 cyl            112.    123.    -13.5    64.5     93.6      93.9     426.   
##  2 disp            -1.80    2.70   -8.94   -1.41    -0.805    -0.689    -0.361
##  3 hp              -0.417   1.41   -3.54   -0.679   -0.0942   -0.0556    1.19 
##  4 drat           570.    548.     36.8   371.     439.      501.     1972.   
##  5 wt            -504.    104.   -742.   -523.    -497.     -461.     -382.   
##  6 qsec          -108.     39.9  -152.   -143.    -115.      -93.0     -35.9  
##  7 vs             145.    414.   -116.    -23.9     51.1      81.2    1231.   
##  8 am            -320.    321.   -575.   -479.    -395.     -368.      465.   
##  9 gear            57.8    99.8  -113.      1.22    35.2     130.      196.   
## 10 carb           -43.0    85.5  -129.    -79.6    -77.9     -22.5     165.   
##    hist 
##  1 ▅▇▁▁▂
##  2 ▁▁▁▁▇
##  3 ▂▁▂▇▃
##  4 ▅▇▁▁▂
##  5 ▂▁▂▇▃
##  6 ▇▅▅▂▂
##  7 ▇▁▁▁▁
##  8 ▇▁▁▁▁
##  9 ▃▇▇▇▇
## 10 ▇▅▁▁▂
cat("Elapsed: ", proc.time()[3] - start, "s \n")
## Elapsed:  0.158 s
start <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -1.809339
## 
## $R_squared_adj
## [1] 12.23735
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -1.168   2.513   5.541   5.058   8.185   8.703 
## 
## $Coverage_rate
## [1] 100
## 
## $citests
##          estimate       lower        upper      p-value signif
## cyl   111.6701473   56.594459  201.0414388 1.629833e-02      *
## disp   -1.7983224   -3.653135   -0.7503408 4.130189e-02      *
## hp     -0.4167545   -1.276126    0.2910989 3.277078e-01       
## drat  569.9102780  332.259718  964.1132085 7.668526e-03     **
## wt   -504.1496696 -578.789199 -452.5053839 2.773501e-07    ***
## qsec -107.9102921 -129.483384  -82.5423190 1.836083e-05    ***
## vs    145.0280002  -28.272020  432.6517253 2.524377e-01       
## am   -319.6910568 -475.989615  -90.6464455 1.166475e-02      *
## gear   57.7630332    1.751775  117.2749634 8.565636e-02      .
## carb  -42.9572292  -84.450432   16.1551891 1.327138e-01       
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             9      
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable     mean     sd      p0      p25       p50       p75     p100
##  1 cyl            112.    123.    -13.5    64.5     93.6      93.9     426.   
##  2 disp            -1.80    2.70   -8.94   -1.41    -0.805    -0.689    -0.361
##  3 hp              -0.417   1.41   -3.54   -0.679   -0.0942   -0.0556    1.19 
##  4 drat           570.    548.     36.8   371.     439.      501.     1972.   
##  5 wt            -504.    104.   -742.   -523.    -497.     -461.     -382.   
##  6 qsec          -108.     39.9  -152.   -143.    -115.      -93.0     -35.9  
##  7 vs             145.    414.   -116.    -23.9     51.1      81.2    1231.   
##  8 am            -320.    321.   -575.   -479.    -395.     -368.      465.   
##  9 gear            57.8    99.8  -113.      1.22    35.2     130.      196.   
## 10 carb           -43.0    85.5  -129.    -79.6    -77.9     -22.5     165.   
##    hist 
##  1 ▅▇▁▁▂
##  2 ▁▁▁▁▇
##  3 ▂▁▂▇▃
##  4 ▅▇▁▁▂
##  5 ▂▁▂▇▃
##  6 ▇▅▅▂▂
##  7 ▇▁▁▁▁
##  8 ▇▁▁▁▁
##  9 ▃▇▇▇▇
## 10 ▇▅▁▁▂
cat("Elapsed: ", proc.time()[3] - start, "s \n")
## Elapsed:  0.306 s
res <- obj$predict(X = new_X_test)
 
new_y_train <- c(y_train, newy)

plot(c(new_y_train, res$preds), type='l',
    main="",
    ylab="",
    ylim = c(min(c(res$upper, res$lower, y)),
             max(c(res$upper, res$lower, y))))
lines(c(new_y_train, res$upper), col="gray60")
lines(c(new_y_train, res$lower), col="gray60")
lines(c(new_y_train, res$preds), col = "red")
lines(c(new_y_train, new_y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")

mean((new_y_test >= as.numeric(res$lower)) * (new_y_test <= as.numeric(res$upper)))
## [1] 1

update RVFL model (Pt.2)

newx <- X_test[2, ]
newy <- y_test[2]

new_X_test <- X_test[-c(1, 2), ]
new_y_test <- y_test[-c(1, 2)]
t0 <- proc.time()[3]
obj$update(newx, newy)
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.005 s
t0 <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE)
## $R_squared
## [1] -3.356623
## 
## $R_squared_adj
## [1] 11.16545
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -1.950   5.030   6.374   6.369   8.774  11.528 
## 
## $Coverage_rate
## [1] 75
## 
## $citests
##          estimate       lower        upper      p-value signif
## cyl    40.8981137    6.878148   74.9180798 2.494779e-02      *
## disp   -0.7335494   -1.206939   -0.2601595 8.026181e-03     **
## hp     -0.8233606   -2.198927    0.5522055 1.998737e-01       
## drat  549.7206897  416.053783  683.3875968 2.570765e-05    ***
## wt   -469.9351032 -535.877454 -403.9927527 6.344763e-07    ***
## qsec -116.6183871 -156.767393  -76.4693814 2.380078e-04    ***
## vs   -194.4213942 -288.046178 -100.7966103 1.732503e-03     **
## am   -395.7216847 -562.762331 -228.6810387 8.143911e-04    ***
## gear   53.0732573  -59.833653  165.9801679 3.030574e-01       
## carb  -25.9448064  -63.759959   11.8703467 1.487567e-01       
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             8      
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable     mean      sd      p0     p25      p50      p75     p100
##  1 cyl             40.9    40.7    -40.5    23.9    56.3     69.9     77.8  
##  2 disp            -0.734   0.566   -1.64   -1.03   -0.571   -0.372   -0.139
##  3 hp              -0.823   1.65    -3.99   -1.18   -0.974   -0.196    1.25 
##  4 drat           550.    160.     170.    549.    606.     642.     643.   
##  5 wt            -470.     78.9   -543.   -537.   -489.    -437.    -336.   
##  6 qsec          -117.     48.0   -179.   -143.   -131.     -99.1    -29.9  
##  7 vs            -194.    112.    -377.   -283.   -162.    -120.     -46.3  
##  8 am            -396.    200.    -719.   -481.   -357.    -319.     -67.7  
##  9 gear            53.1   135.    -143.    -23.9    16.5    172.     231.   
## 10 carb           -25.9    45.2   -101.    -48.8   -23.8     -9.36    45.7  
##    hist 
##  1 ▂▂▂▁▇
##  2 ▅▁▂▇▅
##  3 ▂▁▇▂▃
##  4 ▁▁▁▁▇
##  5 ▇▅▂▁▅
##  6 ▂▇▂▂▂
##  7 ▂▅▂▇▂
##  8 ▃▁▇▂▂
##  9 ▂▅▅▁▇
## 10 ▂▅▇▁▅
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.177 s
t0 <- proc.time()[3]
obj$summary(new_X_test, y=new_y_test, show_progress=FALSE, type_ci="bootstrap")
## $R_squared
## [1] -3.356623
## 
## $R_squared_adj
## [1] 11.16545
## 
## $Residuals
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -1.950   5.030   6.374   6.369   8.774  11.528 
## 
## $Coverage_rate
## [1] 75
## 
## $citests
##          estimate       lower        upper      p-value signif
## cyl    40.8981137   13.611534   65.5300212 1.761953e-02      *
## disp   -0.7335494   -1.115045   -0.4043467 4.895727e-03     **
## hp     -0.8233606   -1.910531    0.1687140 1.645431e-01       
## drat  549.7206897  443.074920  626.9047838 7.440063e-06    ***
## wt   -469.9351032 -516.601161 -418.4594690 3.026569e-07    ***
## qsec -116.6183871 -145.417748  -84.7362399 1.335105e-04    ***
## vs   -194.4213942 -267.427325 -127.3499666 9.655321e-04    ***
## am   -395.7216847 -515.026287 -273.6581298 3.580841e-04    ***
## gear   53.0732573  -37.486884  134.4552153 2.655631e-01       
## carb  -25.9448064  -53.884043    2.7424530 1.155572e-01       
## 
## $signif_codes
## [1] "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1"
## 
## $effects
## ── Data Summary ────────────────────────
##                            Values 
## Name                       effects
## Number of rows             8      
## Number of columns          10     
## _______________________           
## Column type frequency:            
##   numeric                  10     
## ________________________          
## Group variables            None   
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable     mean      sd      p0     p25      p50      p75     p100
##  1 cyl             40.9    40.7    -40.5    23.9    56.3     69.9     77.8  
##  2 disp            -0.734   0.566   -1.64   -1.03   -0.571   -0.372   -0.139
##  3 hp              -0.823   1.65    -3.99   -1.18   -0.974   -0.196    1.25 
##  4 drat           550.    160.     170.    549.    606.     642.     643.   
##  5 wt            -470.     78.9   -543.   -537.   -489.    -437.    -336.   
##  6 qsec          -117.     48.0   -179.   -143.   -131.     -99.1    -29.9  
##  7 vs            -194.    112.    -377.   -283.   -162.    -120.     -46.3  
##  8 am            -396.    200.    -719.   -481.   -357.    -319.     -67.7  
##  9 gear            53.1   135.    -143.    -23.9    16.5    172.     231.   
## 10 carb           -25.9    45.2   -101.    -48.8   -23.8     -9.36    45.7  
##    hist 
##  1 ▂▂▂▁▇
##  2 ▅▁▂▇▅
##  3 ▂▁▇▂▃
##  4 ▁▁▁▁▇
##  5 ▇▅▂▁▅
##  6 ▂▇▂▂▂
##  7 ▂▅▂▇▂
##  8 ▃▁▇▂▂
##  9 ▂▅▅▁▇
## 10 ▂▅▇▁▅
cat("Elapsed: ", proc.time()[3] - t0, "s \n")
## Elapsed:  0.326 s
res <- obj$predict(X = new_X_test)
 
new_y_train <- c(y_train, y_test[c(1, 2)])

plot(c(new_y_train, res$preds), type='l',
    main="",
    ylab="",
    ylim = c(min(c(res$upper, res$lower, y)),
             max(c(res$upper, res$lower, y))))
lines(c(new_y_train, res$upper), col="gray60")
lines(c(new_y_train, res$lower), col="gray60")
lines(c(new_y_train, res$preds), col = "red")
lines(c(new_y_train, new_y_test), col = "blue")
abline(v = length(y_train), lty=2, col="black")

mean((new_y_test >= as.numeric(res$lower)) * (new_y_test <= as.numeric(res$upper)))
## [1] 0.75