ggtree
ggtree copied to clipboard
ggtree adds unexpected branch/node
Hello,
I am struggling with the following code I'm using to annotate my tree. It seems that ggtree adds an extra branch/node to the tree that should not be there, which disrupts all the bootstrap values.
My current code and the tree display are as follows:
# Load libraries
library(ggtree)
library(ggplot2)
library(ape)
library(tidytree)
library(treeio)
library(dplyr)
library(extrafont)
loadfonts()
# Read in your tree file (replace "treefile.nwk" with the path to your tree file)
tree <- read.tree("18S_PutativeSeqs_iqtree.treefile")
# Define the outgroup
outgroup <- "Chromera_velia_strain_CMS22_DQ174731.1"
# Reroot the tree with "Chromera_velia" as the outgroup
tree <- root(tree, outgroup, resolve.root = TRUE)
# Find the index of the tip labels to be renamed
index1 <- which(tree$tip.label == "Cryptosporidium_muris_strain_Calf_genotype_IDVS-811_AF093496.1")
index2 <- which(tree$tip.label == "Ancora_cf._sagittata_isolate_AncoraWSBS2010_KX982504.1")
index3 <- which(tree$tip.label == "Nematopsis_temporariae_isolate_Rd_E8_24L4_KT717659.1")
# Rename the tip labels
tree$tip.label[index1] <- "Cryptosporidium_muris_AF093496.1"
tree$tip.label[index2] <- "Ancora_cf._sagittata__KX982504.1"
tree$tip.label[index3] <- "Nematopsis_temporariae_KT717659.1"
# This shortens your tree to fit tip labels. Adjust the factor for a better fit.
xlim_adj <- max(ggtree(tree)$data$x) * 1.5
# Define the data for geom_hilight
highlight_data <- data.frame(
node_light = c(111, 180, 154, 169, 179, 150, 151, 142, 134, 120, 122), # Node IDs
fill_color = c("#00BC00", "#B66DFF", "#D82632", "#FFCA99", "#999999", "#00AACC", "#00AACC", "#00AACC", "pink", "brown", "#598541")) # Fill colors
# Plot the tree with new labels
p <- ggtree(tree, ladderize = TRUE) + #ggtree with "ladderize = TRUE" makes the tree look more "balanced" by reordering the nodes
geom_tiplab(hjust = 0, size = 5, align=TRUE, linesize=.5, offset=0.001, fontface="italic", family="Times New Roman") + #geom_tiplab affects the way the "tips" are displayed (e.g., font, size, color, italics, etc.).
geom_treescale(y = -0.95, fontsize = 3.9) +
geom_text2(aes(label = as.numeric(label),
subset = !is.na(as.numeric(label)) & as.numeric(label) > 49 & as.numeric(label) <101),
vjust = -0.5, hjust = -0.2, size = 3, check_overlap = TRUE) + # geom_text2 affects the text indicating the BS values. Here we choose not to display the values below fifty.
#labs(title = "18S Phylogenetic tree") +
theme(legend.text = element_text(size=8)) + # Position of the legend
xlim(0, xlim_adj*2) +
#geom_text(aes(label=ifelse(!isTip, node, '')), hjust=-0.5, vjust=0.5, size=3) + # see the nodes number
geom_cladelabel(node = 111, label = "Terrestrial Gregarines 2", offset = .36, align = TRUE, fontsize = 5, color = "#00BC00", family = "Times New Roman") +
geom_cladelabel(node = 180, label = "Capitellid Gregarines", offset = .36, align = TRUE, fontsize = 5, color = "#B66DFF", family = "Times New Roman") +
geom_cladelabel(node = 154, label = "Cephaloidophoroidea", offset = .36, align = TRUE, fontsize = 5, color = "#D82632", family = "Times New Roman") +
geom_cladelabel(node = 169, label = "Lecudinoidea", offset = .36, align = TRUE, fontsize = 5, color = "#FFCA99", family = "Times New Roman") +
geom_cladelabel(node = 179, label = "Incertae sedis", offset = .36, align = TRUE, fontsize = 5, color = "#999999", family = "Times New Roman") +
geom_cladelabel(node = 150, label = "Archigregarine", offset = .36, align = TRUE, fontsize = 5, color = "#00AACC", family = "Times New Roman") +
geom_cladelabel(node = 151, label = "Archigregarine", offset = .36, align = TRUE, fontsize = 5, color = "#00AACC", family = "Times New Roman") +
geom_cladelabel(node = 142, label = "Archigregarine", offset = .36, align = TRUE, fontsize = 5, color = "#00AACC", family = "Times New Roman") +
geom_cladelabel(node = 134, label = "Core Apicomplexa", offset = .36, align = TRUE, fontsize = 5, color = "pink", family = "Times New Roman") +
geom_cladelabel(node = 120, label = "Cryptosporidium", offset = .36, align = TRUE, fontsize = 5, color = "brown", family = "Times New Roman") +
geom_cladelabel(node = 122, label = "Actinocephaloidea", offset = .36, align = TRUE, fontsize = 5, color = "#598541", family = "Times New Roman") +
geom_hilight(data = highlight_data, aes(node = node_light, fill = fill_color), alpha = 0.4, align="both") +
scale_fill_identity(guide = FALSE)
# Display the tree
p
Part of the tree for better readibility:
The expected bootstrap values:
Any help or insights on why the ggtree function might be causing this issue would be greatly appreciated!