###### statistical analyses pipeline using R

######To analyze and plot two genomic features

#Data were first saved as comma-separated values (.csv)
#R commands are :

ALL<-read.csv("FILENAME.csv", header = FALSE)

column1<-ALL[,1]
column2<-ALL[,2]

cor.test(column1,column2,method="spearman")

plot (column1/1000, column2*100,xlab ="x-axis",ylab ="y-axis")

dev.print(postscript, file="PLOTNAME.eps", width=3, height=3.5)

q()

n

######To analyze and plot two genomic features after the removal of introns or repeats


ALL<-read.csv("FILENAME.csv", header = FALSE)

column1<-ALL[,1]
column2<-ALL[,2]
intron<-ALL[,3]
x=column1/(1-intron)
y=column2*(1-intron)

cor.test(y,x,method="spearman")

plot (y/1000,x*100,xlab ="x-axis",ylab ="y-axis")

dev.print(postscript, file="PLOTNAME.eps", width=3, height=3.5)

q()

n


#####To construct beeswarm plot (with variances) of sequence lengths

#Beeswarm library needs to be pre-installed: install.packages('beeswarm')


library(beeswarm)

ALL<-read.csv("FILENAME.csv", header = FALSE)

column1<-ALL[,1]
column2<-ALL[,2]
column3<-ALL[,3]
column4<-ALL[,4]
length<-ALL[,5]

x=column1*length/1000
y=column2*length/1000
z=column3*length/1000
w=column4*length/1000

var(x)
var(y)
var(z)
var(w)

distro<-list(x,y,z,w)

beeswarm(distro,corral="wrap",  col=c(2,3,4),cex=1.7, pch=c(16,16,16),xlab="", ylab= "Length", labels =c("column1","column2","column3","column4"))

dev.print(postscript, file="PLOTNAME.eps", width=4.5, height=5)
q()
n
==