add function to plot GC skew
As long reads start to produce more circular genomes it might be useful to plot the GC skew (https://en.wikipedia.org/wiki/GC_skew) for a given contig. Example code and plot below:
`library(ggplot2) library(dplyr) library(seqinr)
gcskew <- function(x) { if (!is.character(x) || length(x) > 1) stop("single string expected") tmp <- tolower(s2c(x)) nC <- sum(tmp == "c") nG <- sum(tmp == "g") if (nC + nG == 0) return(NA) return(100 * (nG - nC)/(nC + nG)) } plotgcskew<- function(fasta="EcoliK12MG1655_reference.fasta",step=10000,wsize=10000){ myseq <- read.fasta(file = fasta,as.string = TRUE) cols <- c("LINE1"="blue","LINE2"="black") starts <- seq(from = 1, to = nchar(myseq), by = step) starts <- starts[-length(starts)] n <- length(starts) result <- numeric(n) for (i in seq_len(n)) { result[i] <- gcskew(substr(myseq, starts[i], starts[i] + wsize - 1)) }
Plot the result:
position <- starts/1000 # in Kbp GCskew <- result gcskew_data<-data.frame(position,GCskew) %>% mutate(cumGC=cumsum(GCskew), normGCskew=(GCskew-max(GCskew))/(max(GCskew)-min(GCskew)), normcumGC=(cumGC-max(cumGC))/(max(cumGC)-min(cumGC))) ggplot(data = gcskew_data,aes(x = position,y = normGCskew))+geom_line(aes(col="gc skew"))+geom_line(aes(col="Cumulative GC skew",y = normcumGC))+ggtitle("GC skew plot")+xlab("Position (kbp)") }`

Probably this?
`library(ggplot2) library(dplyr) library(seqinr)
gcskew <- function(x) { if (!is.character(x) || length(x) > 1) stop("single string expected") tmp <- tolower(s2c(x)) nC <- sum(tmp == "c") nG <- sum(tmp == "g") if (nC + nG == 0) return(NA) return(100 * (nG - nC) / (nC + nG)) }
plotgcskew <- function(fasta = "chr26.fna", step = 10000, wsize = 10000) { myseq <- read.fasta(file = fasta, as.string = TRUE) cols <- c("LINE1" = "blue", "LINE2" = "black") starts <- seq(from = 1, to = nchar(myseq), by = step) starts <- starts[-length(starts)] n <- length(starts) result <- numeric(n) for (i in seq_len(n)) { result[i] <- gcskew(substr(myseq, starts[i], starts[i] + wsize - 1)) }
position <- starts / 1000 # in Kbp GCskew <- result gcskew_data <- data.frame(position, GCskew) %>% mutate(cumGC = cumsum(GCskew), normGCskew = (GCskew - max(GCskew)) / (max(GCskew) - min(GCskew)), normcumGC = (cumGC - max(cumGC)) / (max(cumGC) - min(cumGC)))
ggplot(data = gcskew_data, aes(x = position, y = normGCskew)) + geom_line(aes(col = "GC skew")) + geom_line(aes(x = position, y = normcumGC, col = "Cumulative GC skew")) + ggtitle("GC skew plot") + xlab("Position (kbp)") }
Call the function to generate the plot
plotgcskew() `
You're welcome to make a PR
I am not in the business or no need for PR.
The code original code wasn't work so I asked ChatGPT to fix it. The revised code worked and I pasted it here. Feel free to delete it if bothering you.