setwd("/Users/acortes/Documents/Research/CommonBean.Uppsala.2014/GWAS_env") samples<-read.table("di_small_R.txt",sep="\t",header=TRUE) mds_ori<-read.table("MDS_fin_R.txt",sep="\t",header=TRUE) div<-read.table("per_marker_div.txt",sep="\t",header=TRUE) centro<-read.table("centro.txt",sep="\t",header=TRUE) chr_label<-c("Pv01","Pv02","Pv03","Pv04","Pv05","Pv06","Pv07","Pv08","Pv09","Pv10","Pv11") glm_pool<-read.table("glm_pool.txt",sep="\t",header=TRUE) centro$pos_cum_start<-NA centro$pos_cum_end<-NA centro$pos_cum_mid<-NA for(i in 1:11){ if(i==1){ centro[which(centro$chr==i),"pos_cum_start"]<-(centro[which(centro$chr==i),]$start) centro[which(centro$chr==i),"pos_cum_end"]<-(centro[which(centro$chr==i),]$end) centro[which(centro$chr==i),"pos_cum_mid"]<-(centro[which(centro$chr==i),]$mid) } if(i!=1){ centro[which(centro$chr==i),"pos_cum_start"]<-(centro[which(centro$chr==i),]$start+chr[chr$chr==i-1,"len_cum"]/1000000) centro[which(centro$chr==i),"pos_cum_end"]<-(centro[which(centro$chr==i),]$end+chr[chr$chr==i-1,"len_cum"]/1000000) centro[which(centro$chr==i),"pos_cum_mid"]<-(centro[which(centro$chr==i),]$mid+chr[chr$chr==i-1,"len_cum"]/1000000) } } #Hist for DI samples2<-read.table("di_R.txt",sep="\t",header=TRUE) samples2<-read.table("di_new.txt",sep="\t",header=TRUE) layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8)) par(mar=c(0,5,1,1)) boxplot(samples2[samples2$di>-3.5&samples2$di<4,"di"],horizontal=TRUE,outline=FALSE,ylim=c(-4,4),xaxt="n",frame=F,col="lightgray") par(mar=c(5,5,1,1)) hist(samples2[samples2$di>-3.5&samples2$di<4,"di"],breaks=8,xlim=c(-4,4),xlab=expression("Drought Index (Cortes"~~italic("et al.")~~"2013)"),las=1,main="",col="lightgray",border="white") abline(v=2,lty=2,col="darkgray") abline(v=-2,lty=2,col="darkgray") shapiro.test(samples2[samples2$di>-3.5&samples2$di<4,"di"]) #MDS analysis mds<-merge(data.frame(Taxa=samples[,"Taxa"]),mds_ori,by="Taxa",all.x=T) par(mfrow=c(1,1)) par(mar=c(5,5,2,2)+0.1) plot(PC2~PC1,mds,main="",xlab="PCoA1 (25 %)",ylab="PCoA2 (10 %)",las=1,col="white") points(PC2~PC1,mds[mds$k=="K1",],pch=1) points(PC2~PC1,mds[mds$k=="K2",],pch=2) points(PC2~PC1,mds[mds$k=="K3",],pch=3) points(PC2~PC1,mds[mds$k=="K4",],pch=4) points(PC2~PC1,mds[mds$k=="K5",],pch=16) abline(v=0,lty=2,col="darkgray") abline(h=0,lty=2,col="darkgray") legend(-0.2,-0.2,c("Mesoamerica","Guatemala","Colombia","Ecuador - North Peru","Andean"),pch=c(1,2,3,4,16),box.col="white") #Chr analysis chr<-data.frame(chr=1:11,len=tapply(div$EndChrPosition,div$Chromosome,max)) #chr<-merge(chr,data.frame(chr=names(tapply(purp$ones,purp$chr,sum)),markers=tapply(purp$ones,purp$chr,sum)),by="chr",all.x=T) chr$mid<-chr$len/2 for(i in (1:nrow(chr))){chr[i,"len_cum"]<-sum(chr[1:i,"len"]);chr[i,"mid_cum"]<-(chr[i,"len_cum"]-chr[i,"mid"])} #Significant markers glm_pool$p_log<-(-log10(glm_pool$p)) glm_pool_sig<-glm_pool[glm_pool$p_log>(-log10(0.001/24000)),] glm_pool_sig<-glm_pool_sig[!is.na(glm_pool_sig$p_log),] glm_pool_sig$pos_cum<-NA for(i in 1:11){ if(i==1){glm_pool_sig[which(glm_pool_sig$Chr==i),"pos_cum"]<-(glm_pool_sig[which(glm_pool_sig$Chr==i),]$Pos)} if(i!=1){glm_pool_sig[which(glm_pool_sig$Chr==i),"pos_cum"]<-(glm_pool_sig[which(glm_pool_sig$Chr==i),]$Pos+chr[chr$chr==i-1,"len_cum"])} } write.table(glm_pool_sig,"glm_pool_sig_ass.txt",sep="\t",col.names = TRUE,row.names = FALSE) glm_pool_sig<-read.table("glm_pool_sig_ass.txt",sep="\t",header=TRUE) glm_pool_sig_regions<-read.table("glm_pool_sig_ass_regions.txt",sep="\t",header=TRUE) #Plot physical map cex_valu<-0.8 pdf("figs/map_F_v4.pdf",width=11,height=8,paper="special") par(mfrow=c(1,1)) par(mar=c(3,5,3,5)+0.1) plot(StartChrPosition/1000000~I(Chromosome-0),div,main="",xlab="",ylab="Mb",xlim=as.roman(c(1,11)),ylim=rev(range(StartChrPosition/1000000)),yaxs = "i",pch="-",axes=F,cex=cex_valu) points(Pos/1000000~I(Chr+0.1),glm_pool_sig,col="red",pch="-",cex=cex_valu) points(Pos/1000000~I(Chr+0.2),glm_pool_sig_regions,col=Color,pch="-",cex=cex_valu) points(mid~I(chr-0.1),centro,col="gray",pch=19) for(i in 1:11){segments(i-0.1,centro[centro$chr==i,"end"],i-0.1,centro[centro$chr==i,"start"],col="gray",lty=1)} axis(side=3,at=c(1:11),labels=chr_label,col="white",cex.axis=0.8,cex.lab=0.8) axis(side=2,at=seq(0,60,by=5),las=1,cex.axis=0.8) axis(side=4,at=seq(0,60,by=5),las=1,cex.axis=0.8) text(12,29,"Mb",srt=270,xpd=TRUE,cex.lab=0.8) dev.off graphics.off() for(i in glm_pool_sig[,"pos_cum"]/1000000){ polygon(c(i-1,i+1,i+1,i-1,i-1),c(-10,-10,10,10,-10), col = rgb(235,235,235,255,max=255), border = NA) } #LD Analysis - TBD ld<-read.table("ld_1.txt",sep="\t",header=TRUE) ld$Dist_bp<-as.numeric(ld$Dist_bp) #ld<-ld[ld$R.2!=1,] ld<-ld[ld$Dist_bp<500,] ld<-ld[!is.na(ld$Dist_bp),] par(mfrow=c(1,1)) par(mar=c(5,5,2,2)+0.1) plot(R.2~Dist_bp,ld,pch=19,cex=0.5) f<-function(x,a,b){(a*exp(-b*x))} fit<-nls(R.2~f(Dist_bp,a,b),ld,start=c(a=1,b=0),na.action=na.exclude) co<-coef(fit) x<-seq(1,chr[chr$chr==1,"len"],round(max(ld$Dist_bp)/50)) curve(f(x,a=co[1],b=co[1]),col=i,add=TRUE,lty=j,lwd=2,from=min(x),to=max(x)) ############################ ############################ #Windows window_size=1000000 step=200000 for(i in 1:11){ purp_chr<-div[div$Chromosome==i,] ini<-1 end<-window_size+1 while((end+step)<=chr[chr$chr==i,"len"]){ purp_temp<-purp_chr[purp_chr$StartChrPosition>=ini&purp_chr$StartChrPositionglm_pool_sig[i,"Pos"],]} if(i!=1){tot<-rbind(tot,sum[sum$lg==glm_pool_sig[i,"Chr"]&(sum$window-500000)glm_pool_sig[i,"Pos"],])} } write.table(tot,"statistics_per_window_ass.txt",sep="\t",col.names = TRUE,row.names = FALSE) sum_ass<-read.table("statistics_per_window_ass.txt",sep="\t",header=TRUE) sum_ass$id<-paste(sum_ass$lg,sum_ass$window,sep="_") sum$id<-paste(sum$lg,sum$window,sep="_") sum$ass<-FALSE sum[which((sum$id %in% sum_ass$id)),"ass"]<-TRUE write.table(sum,"statistics_per_window_F.txt",sep="\t",col.names = TRUE,row.names = FALSE) sum<-read.table("statistics_per_window_F.txt",sep="\t",header=TRUE) anova(lm(density~ass,sum)) anova(lm(pi~ass,sum)) anova(lm(theta~ass,sum)) anova(lm(tajima~ass,sum)) par(mfrow=c(4,1)) par(mgp=c(4,1,0)) cex=1.2 par(mar=c(1,6,2,2)+0.1) means<-tapply(sum[,"density"],sum$ass,mean,na.rm=T) se<-3*tapply(sum[,"density"],sum$ass,function(x)sd(x,na.rm=T)/sqrt(length(x))) mp <- barplot(t(means),xpd = FALSE,ylim=c(20,90),ylab="SNP Density",xlab="Associated",cex.axis=cex,cex.names=cex,cex.lab=cex,las=1,col="white",axisnames=FALSE) arrows(mp, t(means) - t(se), mp, t(means) + t(se), mp, angle = 90, length = 0.2, code=3, col="black", lty=1, lwd=1) legend("topleft","A",bty="n",cex=cex) par(mar=c(1,6,1,2)+0.1) means<-tapply(sum[,"pi"],sum$ass,mean,na.rm=T) se<-3*tapply(sum[,"pi"],sum$ass,function(x)sd(x,na.rm=T)/sqrt(length(x))) mp <- barplot(t(means),xpd = FALSE,ylim=c(0.3,0.35),ylab=expression(pi),xlab="Associated",cex.axis=cex,cex.names=cex,cex.lab=cex,las=1,col="white",axisnames=FALSE) arrows(mp, t(means) - t(se), mp, t(means) + t(se), mp, angle = 90, length = 0.2, code=3, col="black", lty=1, lwd=1) legend("topleft","B",bty="n",cex=cex) par(mar=c(1,6,1,2)+0.1) means<-tapply(sum[,"theta"],sum$ass,mean,na.rm=T) se<-3*tapply(sum[,"theta"],sum$ass,function(x)sd(x,na.rm=T)/sqrt(length(x))) mp <- barplot(t(means),xpd = FALSE,ylim=c(0.2,0.204),ylab=expression(theta),xlab="Associated",cex.axis=cex,cex.names=cex,cex.lab=cex,las=1,col="white",axisnames=FALSE) arrows(mp, t(means) - t(se), mp, t(means) + t(se), mp, angle = 90, length = 0.2, code=3, col="black", lty=1, lwd=1) legend("topleft","C",bty="n",cex=cex) par(mar=c(4,6,1,2)+0.1) means<-tapply(sum[,"tajima"],sum$ass,mean,na.rm=T) se<-1.5*tapply(sum[,"tajima"],sum$ass,function(x)sd(x,na.rm=T)/sqrt(length(x))) mp <- barplot(t(means),xpd = FALSE,ylim=c(0.6,0.8),ylab="Tajima's D",xlab="Associated",cex.axis=cex,cex.names=cex,cex.lab=cex,las=1,col=c("white"),names.arg=c("No Associated","Associated")) arrows(mp, t(means) - t(se), mp, t(means) + t(se), mp, angle = 90, length = 0.2, code=3, col="black", lty=1, lwd=1) legend("topleft","D",bty="n",cex=cex) ############################ ############################ #AA glm_pool_pc<-read.table("glm_pool_pc.txt",sep="\t",header=TRUE) glm_k_pc<-read.table("glm_k_pc.txt",sep="\t",header=TRUE) glm_pool<-read.table("glm_pool.txt",sep="\t",header=TRUE) glm_k<-read.table("glm_k.txt",sep="\t",header=TRUE) glm_pc<-read.table("glm_pc.txt",sep="\t",header=TRUE) mlm_pool_pc<-read.table("mlm_pool_pc.txt",sep="\t",header=TRUE) mlm_k_pc<-read.table("mlm_k_pc.txt",sep="\t",header=TRUE) mlm_pool<-read.table("mlm_pool.txt",sep="\t",header=TRUE) mlm_k<-read.table("mlm_k.txt",sep="\t",header=TRUE) mlm_pc<-read.table("mlm_pc.txt",sep="\t",header=TRUE) #Manhattan mods<-list(glm_pool_pc,glm_k_pc,glm_pool,glm_k,glm_pc) mods<-list(mlm_pool_pc,mlm_k_pc,mlm_pool,mlm_k,mlm_pc) traits<-c("di") #Legends + 0.5 & axis by 2 in GLMs for(j in 1:length(traits)){ pdf(paste("figs/",j,"_",traits[j],"_mlm.pdf",sep=""),width=12.4,height=11,paper="special") #postscript(paste("MHs/figs/",j,"_",traits[j],"_mlm.eps",sep=""),width=8.4,height=11,horizontal=FALSE,paper = "special") m<-rbind(c(1,1,1,1,1,1,2),c(3,3,3,3,3,3,4),c(5,5,5,5,5,5,6),c(7,7,7,7,7,7,8), c(9,9,9,9,9,9,10)) layout(m) #par(mar=c(4,5,1,1)+0.1) par(mar=c(2,2,1,0.5)+0.1) par(cex=0.7) par(xpd=FALSE) cex_p=0.6 #Make manhattan sex counted=1 for(k in 1:5){ marker_out<-mods[[k]] marker_out<-marker_out[marker_out$Trait==traits[j],] marker_out$chr<-marker_out$Chr marker_out$pos<-marker_out$Pos marker_out$sex_chi_log<-(-log10(marker_out$p)) max_sex_chi_log<-max(marker_out$sex_chi_log,na.rm=T)+1 marker_out$pos_cum<-NA for(i in 1:11){ if(i==1){marker_out[which(marker_out$chr==i),"pos_cum"]<-(marker_out[which(marker_out$chr==i),]$pos)} if(i!=1){marker_out[which(marker_out$chr==i),"pos_cum"]<-(marker_out[which(marker_out$chr==i),]$pos+chr[chr$chr==i-1,"len_cum"])} } plot(sex_chi_log~I(pos_cum/1000000),marker_out,main="",col="white",ylab=expression(-log[10]~~(p-value)),xlab="Mb",las=1,axes=FALSE,type="l",ylim=c(0,max_sex_chi_log)) axis(side=2,at=seq(0,max_sex_chi_log,by=1),las=1) axis(side=1,at=seq(0,520,by=20),las=1,labels=T) if((max_sex_chi_log-1)>=(-log10(0.001/24000))){abline(h=(-log10(0.001/24000)),lty=2,col="darkgray")} #Color manhattan for(i in 1:11){ if(i%%2==0){ points(sex_chi_log~I(pos_cum/1000000),marker_out[marker_out$chr==i,],col="darkgray",lwd=2,pch=20,cex=cex_p) text(I(chr[chr$chr==i,]$mid_cum/1000000),max_sex_chi_log,labels=chr_label[i],col="darkgray",font=2) } if(i%%2!=0){ points(sex_chi_log~I(pos_cum/1000000),marker_out[marker_out$chr==i,],col="black",lwd=2,pch=20,cex=cex_p) text(I(chr[chr$chr==i,]$mid_cum/1000000),max_sex_chi_log,labels=chr_label[i],col="black",font=2) } abline(v=chr[i,"len_cum"]/1000000,lty=2, col="darkgray") } legend(-20,max_sex_chi_log,toupper(letters[counted]),bty="n",cex=1);counted<-counted+1 observed<-sort(marker_out$p) lobs<-(-(log10(observed))) expected<-c(1:length(observed)) lexp<-(-(log10(expected/(length(expected)+1)))) plot(lexp,lobs,pch=20,ylab=expression(-log[10]~~(p-value)),xlab=expression(Exp.-log[10]~~(p-value)),main="",ylim=c(0,max_sex_chi_log),las=1,cex=cex_p) lines(c(0,max_sex_chi_log),c(0,max_sex_chi_log)) if(max_sex_chi_log>=-log10(0.001/24000)){abline(h=-log10(0.001/24000),lty=2,col="darkgray")} legend(-0.5,max_sex_chi_log+0,toupper(letters[counted]),bty="n",cex=1);counted<-counted+1 } dev.off graphics.off() } #Only one - Make manhattan - For Main Fig pdf(paste("figs/glm_pool_F.pdf",sep=""),width=12.4,height=5,paper="special") par(mfrow=c(1,1)) par(mar=c(5,5,2,2)+0.1) par(cex=0.7) par(xpd=FALSE) cex_p=0.6 marker_out<-glm_pool marker_out$chr<-marker_out$Chr marker_out$pos<-marker_out$Pos marker_out$sex_chi_log<-(-log10(marker_out$p)) max_sex_chi_log<-max(marker_out$sex_chi_log,na.rm=T)+1 marker_out$pos_cum<-NA for(i in 1:11){ if(i==1){marker_out[which(marker_out$chr==i),"pos_cum"]<-(marker_out[which(marker_out$chr==i),]$pos)} if(i!=1){marker_out[which(marker_out$chr==i),"pos_cum"]<-(marker_out[which(marker_out$chr==i),]$pos+chr[chr$chr==i-1,"len_cum"])} } plot(sex_chi_log~I(pos_cum/1000000),marker_out,main="",col="white",ylab=expression(-log[10]~~(p-value)),xlab="Mb",las=1,axes=FALSE,type="l",ylim=c(0,max_sex_chi_log)) sided=0.5 for(i in glm_pool_sig[,"pos_cum"]/1000000){ polygon(c(i-sided,i+sided,i+sided,i-sided,i-sided),c(-20,-20,20,20,-20), col = rgb(235,235,235,255,max=255), border = NA) } axis(side=2,at=seq(0,max_sex_chi_log,by=1),las=1) axis(side=1,at=seq(0,520,by=20),las=1,labels=T) if((max_sex_chi_log-1)>=(-log10(0.001/24000))){abline(h=(-log10(0.001/24000)),lty=2,col="darkgray")} #Color manhattan for(i in 1:11){ if(i%%2==0){ points(sex_chi_log~I(pos_cum/1000000),marker_out[marker_out$chr==i,],col="darkgray",lwd=2,pch=20,cex=cex_p) text(I(chr[chr$chr==i,]$mid_cum/1000000),max_sex_chi_log,labels=chr_label[i],col="darkgray",font=2) } if(i%%2!=0){ points(sex_chi_log~I(pos_cum/1000000),marker_out[marker_out$chr==i,],col="black",lwd=2,pch=20,cex=cex_p) text(I(chr[chr$chr==i,]$mid_cum/1000000),max_sex_chi_log,labels=chr_label[i],col="black",font=2) } abline(v=chr[i,"len_cum"]/1000000,lty=1, col="lightgray",lwd=0.8) } dev.off graphics.off()