setwd("/Users/matteo/Desktop/") library(DESeq2) library(dplyr) library(gplots) library(ggplot2) annotation<-read.table("gene_info.txt",header=TRUE) #Hmel2.5 gene annotation in chromosomal coordinates #Normalize gene counts with DESeq2 AdultstoMP<-read.table("Brains_combined_genecounts.txt",header=TRUE, row.names = 1) #read gene counts table (conditionAdult <- factor(c(rep("H.melpomene", 12), rep("H.cydno", 11),rep("F1 Hybrid", 4),rep("H.melpomene", 5), rep("H.cydno", 5),rep("F1 Hybrid", 12)))) (coldataAdult <- data.frame(row.names=colnames(AdultstoMP), conditionAdult)) ddsAdult<- DESeqDataSetFromMatrix(countData=AdultstoMP, colData=coldataAdult, design=~conditionAdult) transformedAdult<- rlog(ddsAdult) #rlog transformation = normalize counts for library size + log transform dds <- estimateSizeFactors(ddsAdult) normalized<-as.data.frame(counts(dds, normalized=TRUE)) AdultstoMP_rows <- as.data.frame(t(normalized)) AdultstoMP_rows$group<-(c(rep("MP", 12), rep("CP", 11),rep("F1", 4),rep("MP", 5), rep("CP", 5),rep("F1", 12))) #Loop one-way ANOVA for every gene + Tukey test datalist = list() for (i in 2:ncol(AdultstoMP_rows)) { formula <- as.formula(paste(colnames(AdultstoMP_rows)[i], " ~ group", sep="")) model <- aov(formula, data = AdultstoMP_rows) test<-(TukeyHSD(model, conf.level = 0.95) ) gene<- colnames(AdultstoMP_rows)[i] result<-as.data.frame(t(test$group[,4])) result$gene <- gene datalist[[i]] <- result } #Put data together results<-do.call(rbind.data.frame, datalist) results<-results[,c(4,2,1,3)] colnames(results)[1] <- "gene_id" #Add mean expression values mean<-cbind(AdultstoMP,rowMeans(normalized[,c(1:12,28:32)]), na.rm=TRUE) mean<-cbind(mean,rowMeans(normalized[,c(13:23,33:37)]), na.rm=TRUE) mean<-cbind(mean,rowMeans(normalized[,c(24:27,38:49)]), na.rm=TRUE) mean<-mean[,c(50,52,54)] mean$gene_id <- rownames(mean) results<-inner_join(results,mean,by="gene_id") colnames(results)[5] <- "mean_MP" colnames(results)[6] <- "mean_CP" colnames(results)[7] <- "mean_F1" #Categorize transgressive and intermediate intermediate_or_transgressive<-results[results$`F1-CP`<0.05 & results$`MP-F1`<0.05,] intermediate_or_transgressive<-intermediate_or_transgressive[(!is.na(intermediate_or_transgressive$gene)),] transgressive1<-intermediate_or_transgressive[intermediate_or_transgressive$`mean_F1`< intermediate_or_transgressive$`mean_MP` & intermediate_or_transgressive$`mean_F1`< intermediate_or_transgressive$`mean_CP`,] transgressive1<-transgressive1[(!is.na(transgressive1$gene)),] transgressive2<-intermediate_or_transgressive[intermediate_or_transgressive$`mean_F1`> intermediate_or_transgressive$`mean_MP` & intermediate_or_transgressive$`mean_F1`> intermediate_or_transgressive$`mean_CP`,] transgressive2<-transgressive2[(!is.na(transgressive2$gene)),] intermediate1<-intermediate_or_transgressive[intermediate_or_transgressive$`mean_F1`> intermediate_or_transgressive$`mean_MP` & intermediate_or_transgressive$`mean_F1`< intermediate_or_transgressive$`mean_CP`,] intermediate1<-intermediate1[(!is.na(intermediate1$gene)),] intermediate2<-intermediate_or_transgressive[intermediate_or_transgressive$`mean_F1`< intermediate_or_transgressive$`mean_MP` & intermediate_or_transgressive$`mean_F1`> intermediate_or_transgressive$`mean_CP`,] intermediate2<-intermediate2[(!is.na(intermediate2$gene)),] transgressive1$status<-"transgressive" transgressive2$status<-"transgressive" intermediate1$status<-"intermediate" intermediate2$status<-"intermediate" transgressive1<-transgressive2[,c(1:4,8)] transgressive2<-transgressive2[,c(1:4,8)] intermediate1<-intermediate1[,c(1:4,8)] intermediate2<-intermediate2[,c(1:4,8)] #Add "species-like' categories melpomene_like<-results[results$`F1-CP`<0.05 & results$`MP-F1`>0.05,] melpomene_like<-melpomene_like[(!is.na(melpomene_like$gene)),] cydno_like<-results[results$`F1-CP`>0.05 & results$`MP-F1`<0.05,] # cydno_like<-cydno_like[(!is.na(cydno_like$gene)),] no_difference<-results[results$`F1-CP`>0.05 & results$`MP-F1`>0.05,] no_difference<-no_difference[(!is.na(no_difference$gene)),] melpomene_like$status<-"melpomene_like" cydno_like$status<-"cydno_like" no_difference$status<-"no_difference" colnames(melpomene_like)[1] <- "gene_id" colnames(cydno_like)[1] <- "gene_id" colnames(no_difference)[1] <- "gene_id" melpomene_like<-melpomene_like[,c(1:4,8)] cydno_like<-cydno_like[,c(1:4,8)] no_difference<-no_difference[,c(1:4,8)] #Make table with all info new2<-rbind(melpomene_like,cydno_like,intermediate1,intermediate2,transgressive1,transgressive2,no_difference) #write.table(new2, file="Allgenes_allclassifications.txt", row.names = FALSE, quote=FALSE)