dbarts
dbarts copied to clipboard
predict$( ) : RStudio crashes sometimes when test data matrix has one row and one column
I attach a dbarts object and test data matrix to this message.
The example code below sometimes crashes when there is one row in the test data matrix, and sometimes it does not crash.
library(dbarts)
load(file = "......./debugsampler.RData" )
load(file = "....../debugtempbind.RData" )
sampler$predict(as.matrix(rep(tempbind[1,1],100),100,1))
sampler$predict(as.matrix(rep(tempbind[1,1],2),2,1))
sampler$predict(as.matrix(rep(tempbind[1,1],1),1,1))
testpredvec <- sampler$predict(x.test = as.matrix(tempbind[,]), offset.test = NULL)
RStudio: Version 1.3.1056 R version 4.1.0 (2021-05-18) Platform: x86_64-w64-mingw32/x64 (64-bit)
EDIT: I ran this code on another computer and it did not crash.
Also, the predicted value when it does not crash is ``-0.977252'', which is less than the sum of the minimum terminal node values obtained from each tree. Maybe this is a separate issue.
n.trees <- [email protected]
tempsum <- 0
for(i in 1:n.trees){
treeexample1 <- sampler$getTrees(treeNums = i,
chainNums = 1,
sampleNums = 1)
tempsum <- tempsum + min(treeexample1[treeexample1$var== -1, ]$value)
}
tempsum
I think the prediction should be 0.1020762
getPredictionsForTree <- function(tree, x) {
predictions <- rep(NA_real_, nrow(x))
getPredictionsForTreeRecursive <- function(tree, indices) {
if (tree$var[1] == -1) {
# Assigns in the calling environment by using <<-
predictions[indices] <<- tree$value[1]
return(1)
}
goesLeft <- x[indices, tree$var[1]] <= tree$value[1]
headOfLeftBranch <- tree[-1,]
n_nodes.left <- getPredictionsForTreeRecursive(
headOfLeftBranch, indices[goesLeft])
headOfRightBranch <- tree[seq.int(2 + n_nodes.left, nrow(tree)),]
n_nodes.right <- getPredictionsForTreeRecursive(
headOfRightBranch, indices[!goesLeft])
return(1 + n_nodes.left + n_nodes.right)
}
getPredictionsForTreeRecursive(tree, seq_len(nrow(x)))
return(predictions)
}
getPredictionsForTree(treeexample1,as.matrix(rep(tempbind[1,1],100),100,1) )
n.trees <- [email protected]
tempsum <- 0
for(i in 1:n.trees){
treeexample1 <- sampler$getTrees(treeNums = i,
chainNums = 1,
sampleNums = 1)
tempsum <- tempsum + getPredictionsForTree(treeexample1,as.matrix(rep(tempbind[1,1],100),100,1) )
}
tempsum
EDIT: This appears to be because the tree predictions must be re-scaled to the original outcome scale:
tempmax <- max(sampler$data@y)
tempmin <- min(sampler$data@y)
(tempsum+ 0.5)*(tempmax - tempmin) +tempmin