rm(list=ls()) library(bbmle) # get data data0 <- read.table("quantile_table.txt", sep="", stringsAsFactors=F, header=T) # limit to outlier type of interest - pick one data0 <- data0[data0$study=="RNA",] # outliers from RNAseq study #data0 <- data0[data0$study=="454" & data0$SW==0, ] # UK outliers from 454 study #data0 <- data0[data0$study=="RAD" & data0$type=="OUT", ] # outliers from RAD study #data0 <- data0[data0$study=="EXP", ] # loci showing gene expression differences # look at shared or non-shared outliers only - pick one data1 <- data0[(data0$SP + data0$SW + data0$UK)==1, ] # non-shared only #data1 <- data0[(data0$SP + data0$SW + data0$UK)==2, ] # shared (between 2 locations) only ######### # model 1: null model with expectation 0.2 for probability of being an outlier in all 3 original sites minusLL <- -sum(dbinom(as.numeric(data1$S),1,0.2,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(data1$ANG),1,0.2,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(data1$T),1,0.2,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(data1$B),1,0.2,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(data1$OCK),1,0.2,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(data1$W),1,0.2,log=T),na.rm=T) 2*minusLL ######### # model 2: model with probability to be an outlier always the same # function to calculate -LL one_p <- function(s1,s2,s3,s4,s5,s6,p){ minusLL <- -sum(dbinom(as.numeric(s1),1,p,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s2),1,p,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s3),1,p,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s4),1,p,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s5),1,p,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s6),1,p,log=T),na.rm=T) return(minusLL) } theta.init <- list(p=0.2) # starting value mle.one_p <- mle2(one_p,theta.init,method="L-BFGS-B", lower=c(p=0.00001),upper=c(p=0.99999), data=list(s1=data1$S,s2=data1$ANG,s3=data1$T,s4=data1$B,s5=data1$OCK,s6=data1$W)) summary(mle.one_p) AIC(mle.one_p) ######### # model 4: one probability (po) for loci previously shown to be outliers in the focal site # another probability (pn) for loci at any other site two_p <- function(r1,r2,r3,s1,s2,s3,s4,s5,s6,po,pn){ minusLL <- -sum(dbinom(as.numeric(s1[r1==1]),1,po,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s2[r2==1]),1,po,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s3[r3==1]),1,po,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s1[r1==0]),1,pn,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s2[r2==0]),1,pn,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s3[r3==0]),1,pn,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s4),1,pn,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s5),1,pn,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s6),1,pn,log=T),na.rm=T) return(minusLL) } theta.init <- list(po=0.2,pn=0.2) mle.two_p <- mle2(two_p,theta.init,method="L-BFGS-B",lower=c(po=0.00001,pn=0.00001),upper=c(po=0.99999,pn=0.99999), data=list(r1=data1$SP,r2=data1$SW,r3=data1$UK, s1=data1$S,s2=data1$ANG,s3=data1$T,s4=data1$B,s5=data1$OCK,s6=data1$W)) summary(mle.two_p) AIC(mle.two_p) ######### # model 3: like 4, but pn fixed at 0.2 mle.two_p <- mle2(two_p,theta.init,fixed=c(pn=0.2),method="L-BFGS-B",lower=c(po=0.00001),upper=c(po=0.99999), data=list(r1=data1$SP,r2=data1$SW,r3=data1$UK, s1=data1$S,s2=data1$ANG,s3=data1$T,s4=data1$B,s5=data1$OCK,s6=data1$W)) summary(mle.two_p) AIC(mle.two_p) ######### # model 6: one probability (po) for loci previously shown to be outliers in focal site or same country # another probability (pn) for different country two_p <- function(r1,r2,r3,s1,s2,s3,s4,s5,s6,psc,pdc){ minusLL <- -sum(dbinom(as.numeric(s1[r1==1]),1,psc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s2[r2==1]),1,psc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s3[r3==1]),1,psc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s1[r1==0]),1,pdc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s2[r2==0]),1,pdc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s3[r3==0]),1,pdc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s4[r1==1]),1,psc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s5[r2==1]),1,psc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s6[r3==1]),1,psc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s4[r1==0]),1,pdc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s5[r2==0]),1,pdc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s6[r3==0]),1,pdc,log=T),na.rm=T) return(minusLL) } theta.init <- list(psc=0.2,pdc=0.2) mle.two_p <- mle2(two_p,theta.init,method="L-BFGS-B",lower=c(psc=0.00001,pdc=0.00001),upper=c(psc=0.99999,pdc=0.99999), data=list(r1=data1$SP,r2=data1$SW,r3=data1$UK, s1=data1$S,s2=data1$ANG,s3=data1$T,s4=data1$B,s5=data1$OCK,s6=data1$W)) summary(mle.two_p) AIC(mle.two_p) ######### # model 5: same as 6, but pdc fixed at 0.2 mle.two_p <- mle2(two_p,theta.init,fixed=c(pdc=0.2),method="L-BFGS-B",lower=c(psc=0.00001),upper=c(psc=0.99999), data=list(r1=data1$SP,r2=data1$SW,r3=data1$UK, s1=data1$S,s2=data1$ANG,s3=data1$T,s4=data1$B,s5=data1$OCK,s6=data1$W)) summary(mle.two_p) AIC(mle.two_p) ######### # model 8: one probability for loci previously shown to be outliers in the focal site # another probability for loci at another other site in same country # another probability for different country three_p <- function(r1,r2,r3,s1,s2,s3,s4,s5,s6,po,psc,pdc){ minusLL <- -sum(dbinom(as.numeric(s1[r1==1]),1,po,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s2[r2==1]),1,po,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s3[r3==1]),1,po,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s1[r1==0]),1,pdc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s2[r2==0]),1,pdc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s3[r3==0]),1,pdc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s4[r1==1]),1,psc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s5[r2==1]),1,psc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s6[r3==1]),1,psc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s4[r1==0]),1,pdc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s5[r2==0]),1,pdc,log=T),na.rm=T) minusLL <- minusLL-sum(dbinom(as.numeric(s6[r3==0]),1,pdc,log=T),na.rm=T) return(minusLL) } theta.init <- list(po=0.2,psc=0.2,pdc=0.2) mle.three_p <- mle2(three_p,theta.init,method="L-BFGS-B",lower=c(po=0.00001,psc=0.00001,pdc=0.00001), upper=c(po=0.99999,psc=0.99999,pdc=0.99999), data=list(r1=data1$SP,r2=data1$SW,r3=data1$UK, s1=data1$S,s2=data1$ANG,s3=data1$T,s4=data1$B,s5=data1$OCK,s6=data1$W)) summary(mle.three_p) AIC(mle.three_p) # model 7: same as 8, but pdc fixed theta.init <- list(po=0.2,psc=0.2) mle.three_p <- mle2(three_p,theta.init,fixed=c(pdc=0.2),method="L-BFGS-B",lower=c(po=0.00001,psc=0.00001), upper=c(po=0.99999,psc=0.99999), data=list(r1=data1$SP,r2=data1$SW,r3=data1$UK, s1=data1$S,s2=data1$ANG,s3=data1$T,s4=data1$B,s5=data1$OCK,s6=data1$W)) summary(mle.three_p) AIC(mle.three_p)