setwd("/users/dianarennison/desktop/PhD Data/Microbiome/2019_analysis/Pathogen") library(dplyr) a <- read.csv("90000_rarefied_biom_table_no_singleton_OTUs_family_genus.csv") a[1:10,1:10] a2 <- as.data.frame(t(a)) #write.csv(a2,"temp_fam.csv") #removed annoying header line a2 <- read.csv("temp_fam.csv", header=TRUE) a2[1:10, 11040: 11048] dim(a2) b <- a2[-c(47:48),] taxa <- a2[c(47:48),-11048] #write.csv(b,"temp_path.csv") #removed annoying header line b <- read.csv("temp_path.csv", header=TRUE) str(b[,1:10]) b[1:10,1:10] b1 <- b[,-c(1, 11048)] b1$SUM <- rowSums(b1,na.rm=TRUE) dim(b1) b1[1:10,11040:11047] c <- b1[,1:11046]/b1[, 11047] c[1:10,1:10] dim(c) OUT_ID <- b[,1] d <- cbind(OUT_ID,c) d[1:10,1:10] e <- rbind(d,taxa) e[,1:10] e2 <- as.data.frame(t(e)) dim(e2) write.csv(e,"normalized_OTUs_no_singletons_family_genus.csv") #this is the file we will use to look at pathogenic status. added metadata here otus <- read.csv("normalized_OTUs_no_singletons_family_genus.csv") otus[1:10,1:10] dim(otus) otus2 <- as.data.frame(t(otus)) otus2[1:10,1:10] names(otus2) <-NULL otus3 <- otus2[-1,] dim(otus3) otus3[1:10,1:10] head(otus3) library(dplyr) otus3 <- tibble::rownames_to_column(otus3, "VALUE") ids <- c("OTU_ID","Enos3","LQL1","LQL2","Enos2", "LabL1", "LabB2", "Enos1", "PriB3", "PaxB5", "PrB5", "F13", "PaxL5", "PriL4", "Enos5", "PriB2", "F12", "PriL", "LQB3", "LQB1", "LQL5", "PaxL4", "LQB4", "Oys5", "PriL1", "PriB4", "PaxL2", "LabL2", "PaxL3", "PaxB1","LQL4", "LabB1", "PriL3", "F11", "LQL3", "PaxB3", "PaxL1", "LQB2", "Oys2", "PriB1", "LabL3", "LabB3", "Oys4", "Oys3", "Oys1", "PaxB2", "PaxB4", "Family", "Genus") names(otus3) <- ids #analysis for genus p <- read.csv("pathogenic.csv") #list of pathogenic taxa head(p) path_Genus <- unique(p$Genus) pathogenic_otus <- otus3[(otus3$Genus %in% path_Genus),] dim(otus3) dim(pathogenic_otus) head(pathogenic_otus) final <- rbind(otus3[1:3,],pathogenic_otus) head(final) dim(final) final2 <- final[,-c(48:49)] head(final2) final3 <- as.data.frame(t(final2)) final3[1:10,1:10] dim(final3) final3 <- tibble::rownames_to_column(final3, "VALUE") colnames(final3) <- as.character(unlist(final3[1,])) #write.csv(final3,"pathogen_counts_genus.csv") data <- read.csv("pathogen_counts_genus.csv") data[1:10,1:10] dim(data) library(reshape) datalong <- melt(data, id.vars = c("OTU_ID","Water","Species","Lake")) head(datalong) dim(datalong) #finally we do the anlaysis for genus! output1 <- datalong %>% group_by(OTU_ID) %>% summarise(all_sum = sum(value)) output1 <- as.data.frame(output1) output1$Water <- datalong$Water[match(output1$OTU_ID, datalong$OTU_ID)] #add meta data output1$Species <- datalong$Species[match(output1$OTU_ID, datalong$OTU_ID)] #add meta data output1$Lake <- datalong$Lake[match(output1$OTU_ID, datalong$OTU_ID)] #add meta data output1 $combined <- paste(output1 $Species, output1 $Lake, sep = "_") head(output1) output1a <- na.omit(output1) output2 <- output1a %>% group_by(combined) %>% summarise(lake_mean = mean(all_sum)) output2 <- as.data.frame(output2) output2$Water <- output1a$Water[match(output2$combined, output1a$combined)] #add meta data a <- lm(output2$lake_mean ~ output2$Water) summary(a) Call: lm(formula = output2$lake_mean ~ output2$Water) Residuals: Min 1Q Median 3Q Max -0.13472 -0.06009 0.01483 0.04938 0.10789 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 0.20500 0.03322 6.172 0.000831 *** output2$WaterSalt 0.15642 0.09395 1.665 0.146959 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.08788 on 6 degrees of freedom Multiple R-squared: 0.316, Adjusted R-squared: 0.202 F-statistic: 2.772 on 1 and 6 DF, p-value: 0.147 difference dif <- read.csv("genus_path_difference.csv") t.test(dif$Difference, mu=0) > t.test(dif$Difference, mu=0) One Sample t-test data: dif$Difference t = 4.7094, df = 6, p-value = 0.003294 alternative hypothesis: true mean is not equal to 0 95 percent confidence interval: 0.0751503 0.2376994 sample estimates: mean of x 0.1564248