Hmisc
Hmisc copied to clipboard
Error from applying `soprobMarkovOrdm()` to a `blrm` model for binary outcome
Hi there,
I was trying to apply the Bayesian first order Markov model (from this link) to a binary longitudinal data set that I have.
However, I am getting the following error when I tried to use soprobMarkovOrdm()
get the state occupancy probabilities (SOP):
Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) :
contrasts can be applied only to factors with 2 or more levels
Using the uprobB()
function from the reference link above (with modifications in order to adapt it to my data), I was able to locate the cause of this error. It is caused by the application of the predict()
functions to calculate the post time 1 conditional probabilities (i.e. pp <- predict(fit, edata, type='fitted.ind', posterior.summary='all')
) as edata
has more than 2 row of data (since the outcome has 2 levels).
I was able to get the uprobB()
function to run by replacing:
pp <- predict(fit, edata, type='fitted.ind', posterior.summary='all')
with the following (plus additional modifications, see uprobB_new()
below with comment # my modification
)
pp0 <- predict(fit, edata[1, ], type = 'fitted.ind', posterior.summary = 'all')
pp1 <- predict(fit, edata[2, ], type = 'fitted.ind', posterior.summary = 'all')
Here are the code I use for this analysis along with a sub-sample of my data
library(tidyverse);library(rmsb);library(Hmisc)
# modified version of uproB()
uprobB_new <- function(fit, data, times, ylevels,
pvarname = "yprev", gap = FALSE, absorb = NULL) {
if(! length(fit$draws)) stop('fit does not have posterior draws')
nd <- nrow(fit$draws)
k <- length(ylevels)
ss <- length(times)
P <- array(NA, c(nd, ss, k),
dimnames=list(paste('draw', 1 : nd), as.character(times),
as.character(ylevels)))
# Never uncondition on initial state
data$time <- times[1]
# if(gap) data$gap <- times[1]
p <- predict(fit, data, type = 'fitted.ind', posterior.summary='all')
if(k == 2) p <- c(1 - p, p) # predict doesn't predict all levels if Y binary
P[, 1, ] <- p
rnameprev <- paste("t-1", ylevels)
rnameprevna <- paste("t-1", setdiff(ylevels, absorb))
# cp: matrix of conditional probabilities of Y conditioning on previous time Y
# Columns = k conditional probabilities conditional on a single previous state
# Rows = all possible previous states
# This is for a single posterior draw
cp <- matrix(NA, nrow=k, ncol=k,
dimnames=list(paste('t-1', ylevels), paste('t', ylevels)))
data <- as.list(data)
data[[pvarname]] <- setdiff(ylevels, absorb) # don't request estimates after absorbing state
edata <- expand.grid(data)
for(it in 2 : ss) {
edata$time <- times[it]
# if(gap) edata$gap <- times[it] - times[it - 1]
# pp <- predict(fit, edata, type='fitted.ind', posterior.summary='all')
pp0 <- predict(fit, edata[1, ], type = 'fitted.ind', posterior.summary = 'all') # my modification
pp1 <- predict(fit, edata[2, ], type = 'fitted.ind', posterior.summary = 'all') # my modification
for(idraw in 1 : nd) {
# cp <- rbind(c(1., rep(0., k - 1)), pp[idraw, ,])
pp <- c(pp0[idraw, ], pp1[idraw, ]) # my modification
cp <- cbind(1 - pp, pp) # my modification
# Compute unconditional probabilities of being in all possible states
# at current time t
P[idraw, it, ] <- t(cp) %*% P[idraw, it - 1, ]
}
}
P
}
dd <- structure(list(time = c(0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0,
4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12,
16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4,
8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16,
0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12,
16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4,
8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16,
0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12,
16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4,
8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16,
0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12,
16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4,
8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16,
0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12,
16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4,
8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16,
0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12,
16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4,
8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16,
0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12,
16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4,
8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16,
0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12,
16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4,
8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16,
0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12,
16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16, 0, 4,
8, 12, 16, 0, 4, 8, 12, 16, 0, 4, 8, 12, 16),
y = c(0, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0,
1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1,
0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1,
0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0,
0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1,
1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
yprev = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0,
1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0,
1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,
0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
row.names = c(NA,
-500),
class = c("tbl_df", "tbl", "data.frame"))
# fit blrm model
mm <- blrm(y ~ yprev + pol(time),
data = dd,
loo = FALSE,
iter = 1000,
seed = 2022,
# warmup = 200,
chains = 1, refresh = 50)
# calculate the SOPs using the soprobMarkovOrdm() function (will return error)
# ppred <- soprobMarkovOrdm(
# object = mm,
# data = data.frame(yprev = '0'),
# times = sort(unique(dd$time)),
# ylevels = c(0, 1),
# absorb = NULL,
# tvarname = "time",
# pvarname= "yprev",
# gap = NULL
# )
# calculate OPs using a modified version of uproB()
ppred <- uprobB_new(fit = mm,
data = data.frame(yprev=0),
times = c(sort(unique(dd$time))),
ylevels = c(0, 1),
pvarname='yprev',
absorb = NULL,
gap = NULL)
# derive summary statistic
g <- function(x) {
w <- c(mean(x)*100, HPDint(x)*100)
w <- round(w, 2)
names(w) <- c('p_predicted', 'lower_predicted', 'upper_predicted')
w
}
ppred_summ <- apply(ppred[, , 2], 2, g)
ppred_summ <- as_tibble(t(ppred_summ)) %>%
mutate(time = sort(unique(dd$time))) %>%
select(time, p_predicted, lower_predicted, upper_predicted)
# merge observed and predicted probability for comparisons
prop_summ <- dd %>%
group_by(time) %>%
summarise(N = n(),
n_observed = sum(y == 1),
p_observed = round(n_observed/N*100, 2)) %>%
left_join(ppred_summ, by = "time")
This doesn't appear to be an Hmisc
-related problem.