dbarts icon indicating copy to clipboard operation
dbarts copied to clipboard

predict$( ) : RStudio crashes sometimes when test data matrix has one row and one column

Open EoghanONeill opened this issue 1 year ago • 1 comments

I attach a dbarts object and test data matrix to this message.

dbarts_debug_example.zip

The example code below sometimes crashes when there is one row in the test data matrix, and sometimes it does not crash.

library(dbarts)

load(file = "......./debugsampler.RData" )

load(file = "....../debugtempbind.RData" )

sampler$predict(as.matrix(rep(tempbind[1,1],100),100,1))

sampler$predict(as.matrix(rep(tempbind[1,1],2),2,1))

sampler$predict(as.matrix(rep(tempbind[1,1],1),1,1))

testpredvec <- sampler$predict(x.test = as.matrix(tempbind[,]), offset.test = NULL)


RStudio: Version 1.3.1056 R version 4.1.0 (2021-05-18) Platform: x86_64-w64-mingw32/x64 (64-bit)

EDIT: I ran this code on another computer and it did not crash.

EoghanONeill avatar Oct 04 '23 05:10 EoghanONeill

Also, the predicted value when it does not crash is ``-0.977252'', which is less than the sum of the minimum terminal node values obtained from each tree. Maybe this is a separate issue.

n.trees <- [email protected]

tempsum <- 0

for(i in 1:n.trees){

  treeexample1 <- sampler$getTrees(treeNums = i,
                                   chainNums = 1,
                                   sampleNums = 1)
  
  
  tempsum <- tempsum + min(treeexample1[treeexample1$var== -1, ]$value)
  
}

tempsum

I think the prediction should be 0.1020762




getPredictionsForTree <- function(tree, x) {
  predictions <- rep(NA_real_, nrow(x))
  getPredictionsForTreeRecursive <- function(tree, indices) {
    if (tree$var[1] == -1) {
      # Assigns in the calling environment by using <<-
      predictions[indices] <<- tree$value[1]
      return(1)
    }
    goesLeft <- x[indices, tree$var[1]] <= tree$value[1]
    headOfLeftBranch <- tree[-1,]
    n_nodes.left <- getPredictionsForTreeRecursive(
      headOfLeftBranch, indices[goesLeft])
    headOfRightBranch <- tree[seq.int(2 + n_nodes.left, nrow(tree)),]
    n_nodes.right <- getPredictionsForTreeRecursive(
      headOfRightBranch, indices[!goesLeft])
    return(1 + n_nodes.left + n_nodes.right)
  }

  getPredictionsForTreeRecursive(tree, seq_len(nrow(x)))
  return(predictions)
}


getPredictionsForTree(treeexample1,as.matrix(rep(tempbind[1,1],100),100,1) )



n.trees <- [email protected]

tempsum <- 0

for(i in 1:n.trees){
  
  treeexample1 <- sampler$getTrees(treeNums = i,
                                   chainNums = 1,
                                   sampleNums = 1)
  
  
  tempsum <- tempsum + getPredictionsForTree(treeexample1,as.matrix(rep(tempbind[1,1],100),100,1) )
  
}

tempsum

EDIT: This appears to be because the tree predictions must be re-scaled to the original outcome scale:

tempmax <- max(sampler$data@y)
tempmin <- min(sampler$data@y)
(tempsum+ 0.5)*(tempmax - tempmin) +tempmin

EoghanONeill avatar Oct 04 '23 07:10 EoghanONeill