###### statistical analyses pipeline using R ######To analyze and plot two genomic features #Data were first saved as comma-separated values (.csv) #R commands are : ALL<-read.csv("FILENAME.csv", header = FALSE) column1<-ALL[,1] column2<-ALL[,2] cor.test(column1,column2,method="spearman") plot (column1/1000, column2*100,xlab ="x-axis",ylab ="y-axis") dev.print(postscript, file="PLOTNAME.eps", width=3, height=3.5) q() n ######To analyze and plot two genomic features after the removal of introns or repeats ALL<-read.csv("FILENAME.csv", header = FALSE) column1<-ALL[,1] column2<-ALL[,2] intron<-ALL[,3] x=column1/(1-intron) y=column2*(1-intron) cor.test(y,x,method="spearman") plot (y/1000,x*100,xlab ="x-axis",ylab ="y-axis") dev.print(postscript, file="PLOTNAME.eps", width=3, height=3.5) q() n #####To construct beeswarm plot (with variances) of sequence lengths #Beeswarm library needs to be pre-installed: install.packages('beeswarm') library(beeswarm) ALL<-read.csv("FILENAME.csv", header = FALSE) column1<-ALL[,1] column2<-ALL[,2] column3<-ALL[,3] column4<-ALL[,4] length<-ALL[,5] x=column1*length/1000 y=column2*length/1000 z=column3*length/1000 w=column4*length/1000 var(x) var(y) var(z) var(w) distro<-list(x,y,z,w) beeswarm(distro,corral="wrap", col=c(2,3,4),cex=1.7, pch=c(16,16,16),xlab="", ylab= "Length", labels =c("column1","column2","column3","column4")) dev.print(postscript, file="PLOTNAME.eps", width=4.5, height=5) q() n ==