global test and pairwise comparisons giving pvals of all NA or all 1
Hello.
Thank you for creating ANCOM BC2. I am trying to use the method to analyze species-level read counts from shotgun sequencing data from a crossover study in which I have 3 treatment groups (repeated measure) and several other factors to include in the model. I am using the code below. The results of the primary analysis look fine. However, in the global test I get W = 0 and p-val = NA for all taxa. For pairwise comparisons all p-vals = 1 (lfc and SE seem to be calculated fine).
Any guidance on what might be going wrong is appreciated. Thank you
out = ancombc2(data = phylo, fix_formula = "Treatment + Sequence + Phase", rand_formula = "(1|Subject)", p_adj_method = "BH", pseudo_sens = TRUE, prv_cut = 1, lib_cut = 0, s0_perc = 0.05, group = "Treatment", struc_zero = TRUE, neg_lb = FALSE, alpha = 0.20, n_cl = 1, verbose = TRUE, global = TRUE, pairwise = TRUE, dunnet = TRUE, trend = FALSE, iter_control = list(tol = 0.01, max_iter = 20, verbose = TRUE), em_control = list(tol = 1e-05, max_iter = 100), lme_control = lme4::lmerControl(), mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), trend_control = list(contrast = NULL, node = NULL, solver = "ECOS", B = 100)
Could you please provide more information regarding your data?
@Maggie8888 Same thing happened to me. The problem is that global test is not working with random effects. I was able to reproduce the error using the codes below using the same structure/format of my phyloseq object:
# Metadata with 10 subjects
sampledataSIM <- data.frame(
SampleID = c("SUB_1", "SUB_2", "SUB_3", "SUB_4", "SUB_5",
"SUB_6", "SUB_7", "SUB_8", "SUB_9", "SUB_10"),
group3 = c("Orange", "Red Apples", "Red Apples",
"Banana", "Orange", "Red Apples",
"Red Apples", "Orange", "Orange", "Banana"),
lib_size = c(52671, 55804, 87918, 56231, 146509, 76254, 63534, 49799, 35272, 66063),
Participant = c(rep("A", 5), rep("B", 5)),
row.names = "SampleID"
)
taxonomySIM <- data.frame(
Kingdom = rep("Bacteria", 20),
Phylum0 = c(rep("Firmicutes", 5), rep("Bacteroidota", 5),
rep("Actinobacteriota", 5), rep("Verrucomicrobiota", 5)),
Phylum = c("Firmicutes_A", "Firmicutes_C", "Firmicutes_B", "Firmicutes", "Firmicutes_A",
"Bacteroidota", "Bacteroidota", "Bacteroidota", "Bacteroidota", "Bacteroidota",
"Actinobacteriota", "Actinobacteriota", "Actinobacteriota", "Actinobacteriota", "Actinobacteriota",
"Verrucomicrobiota", "Verrucomicrobiota", "Verrucomicrobiota", "Verrucomicrobiota", "Verrucomicrobiota"),
Class = c("Clostridia", "Bacilli", "Clostridia", "Clostridia", "Clostridia", "Bacteroidia", "Bacteroidia", "Bacteroidia",
"Bacteroidia", "Bacteroidia", "Actinomycetia", "Actinomycetia", "Actinomycetia", "Actinomycetia", "Actinomycetia",
"Verrucomicrobiae", "Verrucomicrobiae", "Verrucomicrobiae", "Verrucomicrobiae", "Verrucomicrobiae"),
Order = c("Lachnospirales", "Staphylococcales", "Clostridiales", "Clostridiales", "Clostridiales", "Bacteroidales",
"Bacteroidales", "Bacteroidales", "Bacteroidales", "Bacteroidales", "Actinomycetales", "Actinomycetales",
"Actinomycetales", "Actinomycetales", "Actinomycetales", "Verrucomicrobiales", "Verrucomicrobiales", "Verrucomicrobiales",
"Verrucomicrobiales", "Verrucomicrobiales"),
Family = c("Lachnospiraceae", "Gemellaceae", "Lachnospiraceae", "Lachnospiraceae", "Lachnospiraceae", "Bacteroidaceae",
"Prevotellaceae", "Rikenellaceae", "Porphyromonadaceae", "Bacteroidaceae", "Actinomycetaceae", "Corynebacteriaceae",
"Micrococcaceae", "Nocardiaceae", "Actinomycetaceae", "Akkermansiaceae", "Akkermansiaceae", "Akkermansiaceae",
"Akkermansiaceae", "Akkermansiaceae"),
Genus = c("Mediterraneibacter", "Gemella", "Clostridium", "Ruminococcus", "Blautia", "Bacteroides", "Prevotella", "Alistipes",
"Porphyromonas", "Parabacteroides", "Actinomyces", "Corynebacterium", "Micrococcus", "Nocardia", "Streptomyces",
"Akkermansia", "Akkermansia", "Akkermansia", "Akkermansia", "Akkermansia"),
Species = c("sp014287475", NA, NA, NA, "blautia_sp", NA, NA, NA, NA, NA, "odontolytica", NA, NA, "nocardia_sp", "strepto_sp", NA, NA, "akermansia_sp1", NA, NA),
row.names = paste0("ASV_", 1:20) )
set.seed(123)
otu_table_dataSIM <- matrix(
sample(1:500, 200, replace = TRUE),
nrow = 20,
ncol = 10,
dimnames = list(paste0("ASV_", 1:20), row.names(sampledataSIM))
)
physeqSIM <- phyloseq(
otu_table(otu_table_dataSIM, taxa_are_rows = TRUE),
tax_table(as.matrix(taxonomySIM)),
sample_data(sampledataSIM)
)
out_test_Phylum_global <- ancombc2(
data = physeqSIM,
assay_name = "counts",
tax_level = "Phylum",
fix_formula = "group3 + lib_size",
rand_formula = "(1|Participant)",
prv_cut = 0.1,
lib_cut = 100,
s0_perc = 0.05,
struc_zero = FALSE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 1,
verbose = TRUE,
group = "group3",
global = TRUE,
pairwise = TRUE,
dunnet = TRUE
)
out_test_Phylum_global$res_global
taxon W p_val q_val diff_abn passed_ss
1 Firmicutes_A 0 NA 1 FALSE TRUE
2 Firmicutes_C 0 NA 1 FALSE TRUE
3 Firmicutes_B 0 NA 1 FALSE TRUE
4 Firmicutes 0 NA 1 FALSE TRUE
5 Bacteroidota 0 NA 1 FALSE TRUE
6 Actinobacteriota 0 NA 1 FALSE TRUE
7 Verrucomicrobiota 0 NA 1 FALSE TRUE
Without random effect
out_test_Phylum_global.noRand <- ancombc2(
data = physeqSIM,
assay_name = "counts",
tax_level = "Phylum",
fix_formula = "group3 + lib_size + Participant",
prv_cut = 0.1,
lib_cut = 100,
s0_perc = 0.05,
struc_zero = FALSE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 1,
verbose = TRUE,
group = "group3",
global = TRUE,
pairwise = TRUE,
dunnet = TRUE
)
out_test_Phylum_global.noRand$res_global
taxon W p_val q_val diff_abn passed_ss
1 Firmicutes_A 2.7792759 3.086342e-01 0.797146546 FALSE TRUE
2 Firmicutes_C 38.1562819 1.875257e-03 0.009376287 TRUE FALSE
3 Firmicutes_B 198.9391955 3.431785e-05 0.000240225 TRUE FALSE
4 Firmicutes 64.7502914 5.328986e-04 0.003197392 TRUE FALSE
5 Bacteroidota 0.6975067 9.189361e-01 0.918936076 FALSE TRUE
6 Actinobacteriota 3.1051242 2.657155e-01 0.797146546 FALSE TRUE
7 Verrucomicrobiota 6.6093236 7.891593e-02 0.315663728 FALSE TRUE
Increasing the number of observations 10x and reducing the number of parameters didn't solve the problem
# Replicate metadata
n_replicates <- 10
sampledataSIM10x <- sampledataSIM %>%
rownames_to_column("SampleID") %>%
slice(rep(1:n(), each = n_replicates)) %>%
mutate(SampleID = paste0(SampleID, "_", rep(1:n_replicates, times = nrow(sampledataSIM)))) %>%
column_to_rownames("SampleID")
# Expand the OTU table to match the number of replicates
otu_table_dataSIM10x <- otu_table_dataSIM %>%
as.data.frame() %>%
replicate(n_replicates, ., simplify = FALSE) %>% # Replicate the entire data frame
bind_cols() %>% # Combine the replicated data frames into one
setNames(rep(colnames(otu_table_dataSIM), n_replicates)) %>% # Assign new column names
as.matrix()
# Adjust column names to ensure uniqueness
colnames(otu_table_dataSIM10x) <- paste0(rep(colnames(otu_table_dataSIM), n_replicates), "_", rep(1:n_replicates, each = ncol(otu_table_dataSIM)))
# Ensure the OTU table matches new dimensions
dimnames(otu_table_dataSIM10x) <- list(rownames(otu_table_dataSIM10x), rownames(sampledataSIM10x))
# Create new phyloseq object
physeqSIM10x <- phyloseq(
otu_table(otu_table_dataSIM10x, taxa_are_rows = TRUE),
tax_table(as.matrix(taxonomySIM)),
sample_data(sampledataSIM10x)
)
out_test_Phylum_global.10x <- ancombc2(
data = physeqSIM10x,
assay_name = "counts",
tax_level = "Phylum",
fix_formula = "group3",
rand_formula = "(1|Participant)",
prv_cut = 0.1,
lib_cut = 100,
s0_perc = 0.05,
struc_zero = FALSE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 1,
verbose = TRUE,
group = "group3",
global = TRUE,
pairwise = FALSE,
dunnet = FALSE
)
out_test_Phylum_global.10x$res_global
taxon W p_val q_val diff_abn passed_ss
1 Firmicutes_A 0 NA 1 FALSE TRUE
2 Firmicutes_C 0 NA 1 FALSE TRUE
3 Firmicutes_B 0 NA 1 FALSE TRUE
4 Firmicutes 0 NA 1 FALSE TRUE
5 Bacteroidota 0 NA 1 FALSE TRUE
6 Actinobacteriota 0 NA 1 FALSE TRUE
7 Verrucomicrobiota 0 NA 1 FALSE TRUE
These issues might arise because the current implementation of the global test likely assumes a simpler fixed-effect structure. Using packages like lme4 or lmerTest for random effects modeling can potentially address these limitations, as they provide robust handling of mixed-effects models, appropriate degrees of freedom approximations, and tools for conducting LRTs that align with mixed-model assumptions.