# Required libraries -------------------------- library(readxl) library(dplyr) library(tidyr) library(randomForest) library(factoextra) library(emmeans) library(car) library(ggplot2) library(Hmisc) library(NBZIMM) # if you ran into problem installing NBZIMM, try the following library(remotes) install_github("nyiuab/NBZIMM", force=T, build_vignettes=F) # if you still can't install NBZIMM, run the function below # NBZIMM:lme.zig lme.zig <- function (fixed, random, data, correlation, zi_fixed = ~1, zi_random = NULL, #zi.random = FALSE, niter = 30, epsilon = 1e-05, verbose = TRUE, ...) { if (!requireNamespace("nlme")) install.packages("nlme") if (!requireNamespace("MASS")) install.packages("MASS") library(nlme) library(MASS) start.time <- Sys.time() if (missing(data)) stop("'data' should be specified") family <- gaussian() m <- mcall <- Call <- match.call() # data0 <- environment(fixed) # tf <- two.fm(formula = fixed, data = data0) # fixed <- tf$fmc # xz <- xz0 <- tf$Z # offsetz <- tf$offsetz # if (colnames(xz)[1]=="(Intercept)") xz <- xz[, -1, drop = FALSE] nm <- names(m)[-1L] keep <- is.element(nm, c("weights", "data", "subset", "na.action")) for (i in nm[!keep]) m[[i]] <- NULL allvars <- if (is.list(random)) allvars <- c(all.vars(fixed), names(random), unlist(lapply(random, function(x) all.vars(formula(x))))) else c(all.vars(fixed), all.vars(random)) Terms <- if (missing(data)) terms(fixed) else terms(fixed, data = data) off <- attr(Terms, "offset") if (length(off <- attr(Terms, "offset"))) allvars <- c(allvars, as.character(attr(Terms, "variables"))[off + 1]) if (!missing(correlation) && !is.null(attr(correlation, "formula"))) allvars <- c(allvars, all.vars(attr(correlation, "formula"))) Call$fixed <- eval(fixed) Call$random <- eval(random) m$formula <- as.formula(paste("~", paste(allvars, collapse = "+"))) environment(m$formula) <- environment(fixed) m$drop.unused.levels <- TRUE m[[1L]] <- quote(stats::model.frame) mf <- eval.parent(m) off <- model.offset(mf) if (is.null(off)) off <- 0 wts <- model.weights(mf) if (is.null(wts)) wts <- rep(1, nrow(mf)) mf$wts <- wts fit0 <- suppressWarnings( glm(formula = fixed, family = family, data = mf, weights = wts) ) w <- fit0$prior.weights eta <- fit0$linear.predictors zz <- eta + fit0$residuals - off wz <- fit0$weights fam <- family nm <- names(mcall)[-1L] keep <- is.element(nm, c("fixed", "random", "data", "subset", "na.action", "control")) for (i in nm[!keep]) mcall[[i]] <- NULL fixed[[2L]] <- quote(zz) mcall[["fixed"]] <- fixed mcall[[1L]] <- quote(nlme::lme) mcall$random <- random mcall$method <- "ML" if (!missing(correlation)) mcall$correlation <- correlation mcall$weights <- quote(nlme::varFixed(~invwt)) mf$zz <- zz mf$invwt <- 1/(wz + 1e-04) mcall$data <- mf y <- fit0$y if (all(y != 0)) stop("invalid response: no zero") zp <- ifelse(y!=0, 0, 0.5) fm <- zp ~ . fm[[3]] <- zi_fixed[[2]] zero.eta <- fit.zig <- NA for (i in seq_len(niter)) { fit <- eval(mcall) etaold <- eta eta <- fitted(fit) + off if (i > 1 & sum((eta - etaold)^2) < epsilon * sum(eta^2)) break mu <- fam$linkinv(eta) mu.eta.val <- fam$mu.eta(eta) mu.eta.val <- ifelse(mu.eta.val == 0, 1e-04, mu.eta.val) varmu <- fam$variance(mu) varmu <- ifelse(varmu == 0, 1e-04, varmu) mf$zz <- eta + (y - mu)/mu.eta.val - off wz <- w * mu.eta.val^2/varmu wz <- ifelse(wz == 0, 1e-04, wz) mf$invwt <- 1/wz mcall$data <- mf if (is.null(zi_random)){ fit.zig <- suppressWarnings(glm(fm, family=binomial, data=data)) zero.eta <- fit.zig$linear.predictors } else{ fit.zig <- suppressWarnings(glmmPQL(fixed=fm, random=zi_random, family=binomial, data=data, verbose=FALSE)) zero.eta <- fitted(fit.zig) if (!is.null(fit.zig$offset)) zero.eta <- zero.eta + fit.zig$offset } # if (!zi.random){ # if (ncol(xz) > 0) # fit.zig <- suppressWarnings(glm(zp ~ ., offset=offsetz, family=binomial, data=data.frame(xz))) # else fit.zig <- suppressWarnings(glm(zp ~ 1, offset=offsetz, family=binomial)) # zero.eta <- fit.zig$linear.predictors # } # else{ # if (ncol(xz) > 0){ # z <- xz # fit.zig <- suppressWarnings(glmmPQL(fixed=zp ~ z + offset(offsetz), random=random, family=binomial, verbose = FALSE)) # } # else fit.zig <- suppressWarnings(glmmPQL(fixed=zp ~ 1 + offset(offsetz), random=random, family=binomial, verbose = FALSE)) # zero.eta <- fitted(fit.zig) + offsetz # } sigma <- sigma(fit) den <- dnorm(y, mu, sigma) zp <- 1/(1 + exp(-zero.eta) * den ) zp <- ifelse(zp > 0.95, 0.95, zp) zp <- ifelse(y != 0, 0, zp) wz <- (1 - zp) * wz mf$invwt <- 1/wz mcall$data <- mf } attributes(fit$logLik) <- NULL fit$call <- Call fit$iter <- i fit$logLik <- as.numeric(NA) fit$zero.indicator <- zp fit$zero.prob <- exp(zero.eta)/(1 + exp(zero.eta)) fit$fit.zero <- fit.zig # fit$xz <- xz0 # fit$offsetz <- offsetz oldClass(fit) <- c("zigmm", oldClass(fit)) stop.time <- Sys.time() minutes <- round(difftime(stop.time, start.time, units = "min"), 3) if (verbose) { cat("Computational iterations:", fit$iter, "\n") cat("Computational time:", minutes, "minutes \n") } fit } #---------------------------------------------- # Data preparation ---------------------------- data <- read.csv("VOC.soybean.ablated.csv") data<-data[,c(1:4,8,9)] colnames(data)<-c("trt","rep","day","trial","amount","compound") data$day<-as.factor(data$day) data$rep<-as.factor(data$rep) data$trial<-as.factor(data$trial) data$compound<-as.factor(data$compound) levels(data$day)[levels(data$day)=="1"] <- "First day" levels(data$day)[levels(data$day)=="2"] <- "Second day" levels(data$day)[levels(data$day)=="3"] <- "Third day" View(data) # number of distinct library ID compoundlevel<-levels(data$compound ) # unify the name of the compounds unifyname <- function(x){ library(dplyr) mutate( x, new_name = case_when( compound == ".alpha.-Farnesene" ~ "E,E-Alpha-Farnesene", compound == "(+)-g- Cadinene" ~ "Gamma-Cadinene", compound == "(E)-a-Farnesene; ( (E,E)-a-?)" ~ "E,E-Alpha-Farnesene", compound == "(E)-a-Farnesene;   ( (E,E)-a-?)" ~ "E,E-Alpha-Farnesene", compound == "(E)-beta-Farnesene" ~ "Beta-Farnesene", compound == "(Z)-3-hexenyl butanoate" ~ "Cis-3-Hexenyl butyrate", compound == "(Z,Z)-.alpha.-Farnesene" ~ "Z,Z-Alpha-Farnesene", compound == "(Z)-3-hexenyl butanoate" ~ "Cis-3-Hexenyl butyrate", compound == "((Z)-a-Farnesene; ( (Z,E)-a?)" ~ "Z,E-Alpha-Farnesene", compound == "1,3,6-Octatriene, 3,7-dimethyl-, (Z)-" ~ "Cis-Beta-Ocimene", compound == "1,6,10-Dodecatriene, 7,11-dimethyl-3-methylene-" ~ "Beta-Farnesene", compound == "1-Octen-3-ol" ~ "1-Octen-3-ol", compound == "11.51 Nonanal" ~ "Nonanal", compound == "11.75 Phenyl ethyl alcohol" ~ "Phenethyl alcohol", compound == "11.86 Menthatriene<1,3,8-para->" ~ "Para-Mentha-1,3,8-triene", compound == "15.09 Hexenyl butanoate<3Z->" ~ "Cis-3-Hexenyl butyrate", compound == "15.35 Methyl salicylate" ~ "Methyl salicylate", compound == "19.30 Ethyl acetophenone" ~ "Para-Ethylacetophenone", compound == "19.76 Indole" ~ "Indole", compound == "1H-Pyrrole-2,5-dione, 3-ethyl-4-methyl-" ~ "Ethylmethylmaleimide", compound == "2(5H)-Furanone, 5-ethyl-" ~ "5-Ethyl-2-5H-furanone", compound == "20.19 Tridecane" ~ "Tridecane", compound == "2-Hexenal" ~ "2-Hexenal", compound == "2-Hexenal, (E)-" ~ "Trans-2-Hexenal", compound == "2-Phenyl ethanol" ~ "Phenethyl alcohol", compound == "21.72 Anthranilate" ~ "Methyl anthranilate", compound == "21.90 Indanol<5->" ~ "5-Indanol", compound == "24.95 Caryophyllene" ~ "Z-Caryophyllene", compound == "25.36 Caryophyllene(E-)" ~ "Beta-Caryophyllene", compound == "25.99 Bergamotene" ~ "Trans-Alpha-Bergamotene", compound == "26.82 Humulene" ~ "Alpha-Caryophyllene", compound == "29.03 Farnesene<(E,E)-alpha->" ~ "E,E-Alpha-Farnesene", compound == "3-Hexen-1-ol, (E)-" ~ "Trans-3-Hexenol", compound == "3- Carene" ~ "3-Carene", compound == "3-Hexen-1-ol" ~ "Cis-3-Hexenol", compound == "4-Hexen-1-ol, acetate" ~ "4-Hexen-1-ol, acetate", compound == "3-Hexen-1-ol, acetate, (Z)-" ~ "3-Hexenyl acetate", compound == "3-Methyl-4-methylene-bicyclo(3.2.1)oct-2-ene" ~ "3-Methyl-4-methylene-bicyclo-3.2.1-oct-2-ene", compound == "3.15 Hexenal<3Z->" ~ "Cis-3-Hexenal", compound == "3.99 Hexenal<2E->" ~ "Trans-2-Hexenal", compound == "4-Oxohex-2-enal" ~ "E-4-Oxo-2-Hexenal", compound == "4.06 Hexenol<3Z->" ~ "Cis-3-Hexenol", compound == "5.63 Cumene" ~ "Cumene", compound == "7.84 Hexenyl acetate<3E->" ~ "Trans-3-Hexenyl Acetate", compound == "7.93 Hexenyl acetate<3Z->" ~ "Cis-3-Hexenyl Acetate", compound == "8.10 Carene" ~ "3-Carene", compound == "8.57 Trimethyl benzene<1,2,4->" ~ "1,2,4-Trimethylbenzene", compound == "8.96 Ocimene<(Z)-beta->" ~ "Cis-Beta-Ocimene", compound == "9.23 Salicylaldehyde" ~ "Salicylaldehyde", compound == "9.42 Ocimene<(E)-beta->" ~ "Trans-Beta-Ocimene", compound == "a-Humulene (a-Caryophyllene)" ~ "Alpha-Caryophyllene", compound == "allo-Ocimene?" ~ "Allo-Ocimene", compound == "b-Caryophyllene" ~ "Beta-Caryophyllene", compound == "Benzaldehyde, 2-hydroxy-" ~ "Salicylaldehyde", compound == "Benzene, 1-ethyl-3-methyl-" ~ "Meta-Ethyltoluene", compound == "Bicyclo[3.1.1]hept-2-ene, 2,6-dimethyl-6-(4-methyl-3-pentenyl)-" ~ "Trans-Alpha-Bergamotene", compound == "Caryophyllene" ~ "Caryophyllene", compound == "cis- Jasmone" ~ "Cis-Jasmone", compound == "cis-.beta.-Farnesene" ~ "Cis-Beta-Farnesene", compound == "cis-b-Ocimene ?" ~ "Cis-Beta-Ocimene", compound == "cis-Jasmone" ~ "Cis-Jasmone", compound == "d- Cadinene (+g-Cadinene?)" ~ "Delta-Cadinene", compound == "g- Muurolene" ~ "Gamma-Muurolene", compound == "Germacrene D" ~ "Germacrene D", compound == "Humulene" ~ "Alpha-Caryophyllene", compound == "Indole" ~ "Indole", compound == "Methyl antranilate (Methyl 2-aminobenzoate)" ~ "Methyl Antranilate ", compound == "Methyl salicylate" ~ "Methyl Salicylate", compound == "Methyl salicylate (Methyl 2-hydroxybenzoate)" ~ "Methyl Salicylate", compound == "Phenylacetonitrile" ~ "Phenylacetonitrile", compound == "Sesquirosefuran" ~ "Sesquirosefuran", compound == "Tridecane" ~ "Tridecane", compound == "a-Humulene (a-Caryophyllene)" ~ "Alpha-Caryophyllene", compound == "allo-Ocimene?" ~ "Allo-Ocimene", compound == "b-Caryophyllene" ~ "Beta-Caryophyllene", compound == "cis-.beta.-Farnesene" ~ "Cis-Beta-Farnesene", compound == "cis-b-Ocimene ?" ~ "Cis-Beta-Ocimene", compound == "d- Cadinene (+g-Cadinene?)" ~ "Delta-Cadinene", compound == "g- Muurolene" ~ "Gamma-Muurolene", compound == "trans-a- Bergamotene" ~ "Trans-Alpha-Bergamotene", compound == "trans-a-Bergamotene" ~ "Trans-Alpha-Bergamotene" )) } data_newname <- unifyname(data) data_newname # omit old library ID data2 <- data_newname[,c(1:5,7)] colnames(data2)[6] <- 'compound' data2=data2[complete.cases(data2), ]# removing all the contaminant VOC # total number of VOC data2$compound<-as.factor(data2$compound) nlevels(data2$compound) #the number of compounds is 50 # the list of voc voc.list<-levels(data2$compound )# obtain the full list of compound # combining rows with the same compounds voc.sum <- function(i){ i = mutate(i, x = paste(trt, day, trial, rep, compound, compound, sep = "_")) #combine the attributes that indicate each observation into one column data3<-i[,c(5,7)] #select only the amount and combined attributes data.sum<-aggregate(amount ~ x, data=data3, FUN=sum) #sum the amount of rows that has the same attributes i<-separate(data.sum, x, c("trt","day","trial","rep","compound"), sep = "_", remove = TRUE,convert = FALSE) } data2 <- voc.sum(data2) data2_trialremoved<-data2[,-3] # create a frame with full factor combinations newframe<-expand.grid(compound = voc.list, trt = c("Control", "Ablated","Normal"), day = c("First day","Second day","Third day"), rep = c(1:8)) data2_fulltable<-merge(newframe, data2_trialremoved, all = TRUE) data2_fulltable[is.na(data2_fulltable)] <- 0 # expand rows of compound into distinct columns data_column<-spread(data2_fulltable, compound, amount, convert = FALSE) View(data_column) #---------------------------------------------- # Random forest ------------------------------- # making predictor (combing facter treatment and day) data_column2 <- dplyr::mutate(data_column, predictor = paste(trt, day, sep = " ")) data_column2<-data_column2[,-c(1:3)] data_column2<-dplyr::select(data_column2, "predictor", everything()) # make the column name syntactically valid data_RF <- data_column2 colnames(data_RF) <- gsub("-", "_", colnames(data_RF)) colnames(data_RF) <- gsub(" ", "0", colnames(data_RF)) colnames(data_RF) <- make.names(colnames(data_RF), unique = TRUE,allow_ = TRUE) data_RF$predictor<-as.factor(data_RF$predictor) data_RF[is.na(data_RF)] <- 0 #View(data_RF) mtry <- sqrt(50) # for mtry (see below) it is recommended to use the sqrt of the number of variables rf <- randomForest(predictor ~ ., data = data_RF, mtry=mtry, importance=T, do.trace=1000, ntree=10000) # variable importance plot (result may change slightly for each random forest run) vip.ggplot <- function(x){ vip<-varImpPlot(x, n.var=20, type=1, #type of importance measure (1=mean decrease in accuracy, 2=mean decrease in node impurity) pch=19, #adding points col=1, #plotting color cex=.5) #A numerical value giving the amount by which plotting text and symbols should be magnified relative to the default vip <- as.data.frame(vip) colnames(vip) vip$varnames <- rownames(vip) # row names to column rownames(vip) <- NULL vip$varnames <- as.factor(vip$varnames) vip$MeanDecreaseAccuracy <- as.numeric(vip$MeanDecreaseAccuracy) vip<-vip %>% dplyr::arrange(desc(MeanDecreaseAccuracy)) vip <- vip[c(1:20),] vip.gg<-ggplot(data=vip, aes(x=MeanDecreaseAccuracy, y=reorder(varnames, MeanDecreaseAccuracy))) + geom_point() + labs(title = "VIT of soybean", x = "MDA", y = "")+ theme_minimal()+ theme(plot.title=element_text(size = 6, hjust = 0.5), axis.title.y = element_blank(), axis.title.x = element_text(size = 6, hjust = 0.5), axis.text.x = element_text(size = 5, hjust = 0.5), axis.text.y = element_text(size = 5), panel.background= element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1)) vip.gg } vip.plot <- vip.ggplot(rf) vip.plot #---------------------------------------------- # PCA ----------------------------------------- # preparation load("list.soybean.ablated.MDAlargerthan2.RData") #this is a list of important VOC (MDA <2) selected according to random forest analysis list.pca <- as.vector(list_VOC_soybean_ablated_MDAlargerthan2) # View(list_VOC_MDAlargerthan2) data_PCA<-data_RF[,c("predictor", list.pca)]# select only columns of important VOC (MDA <2) colnames(data_PCA) <- gsub("0", " ", colnames(data_PCA)) colnames(data_PCA) <- gsub("_", "-", colnames(data_PCA)) colnames(data_PCA) <- gsub("X", "", colnames(data_PCA)) colnames(data_PCA) <- gsub("\\.", ",", colnames(data_PCA)) colnames(data_PCA) #View(data_PCA) # PCA pca<-prcomp(data_PCA[,-1], center = TRUE, scale. = TRUE) attributes(pca) pca$center pca$scale # loading plot loadingplot <- fviz_pca_var(pca, col.var = "contrib", gradient.cols = c("#adcbe3","#2a4d69"), repel = TRUE, geom =c("arrow", "text"), labelsize = 2, arrowsize = 0.3, select.var = list(contrib=10))+ theme(plot.title = element_blank(), axis.title=element_text(size=7), axis.title.x = element_text(hjust = 0.5), axis.text.x = element_text(size = 6, hjust = 0.5), axis.text.y = element_text(size = 6, hjust = 0.5), legend.title = element_blank(), legend.text = element_text(size = 6), legend.key.size = unit(8,"points"), legend.position = c(.99, .99), legend.justification = c("right", "top"), legend.box.just = "right", panel.background= element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1)) loadingplot # centoidplot centroidplot<-fviz_pca_ind(pca, habillage = data_PCA$predictor, # color by groups palette = c("#d9a739","#d9a739","#d9a739","#7e9953","#7e9953","#7e9953", "#bd0808","#bd0808", "#bd0808"), addEllipses = TRUE, ellipse.alpha =0.05, ellipse.levels=.95, alpha.ind = 0.5, geom.ind=c("point"), pointsize =1.5)+ theme(plot.title = element_blank(), axis.title=element_text(size=7), axis.title.x = element_text(hjust = 0.5), #position the title to the center axis.text.x = element_text(size = 6, hjust = 0.5), axis.text.y = element_text(size = 6, hjust = 0.5), legend.title = element_blank(), legend.text = element_text(size = 5), legend.key.size = unit(5,"points"), # change the size of the legend legend.position = "bottom", panel.background= element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1))+ scale_shape_manual(values=c(0,15,12,1,19,13,2,17,14)) centroidplot #---------------------------------------------- # Total VOC ------------------------------------ data.tot<-aggregate(amount ~ trt + day +rep, data=data2_fulltable, FUN=sum) #sum the amount of rows that has the same attributes #View(data.tot) #adding the biomass data data.bm <- read.csv("VOC.soybean.ablated.biomass.csv") #View(data.bm) data.bm <- data.bm[,c(2:4)] colnames(data.bm) <-c("trt","rep","biomass_soybean") #View(data.bm) data.tot.bm <- merge(data.tot,data.bm, all = TRUE) data.tot.bm=data.tot.bm[complete.cases(data.tot.bm), ] data.tot.bm$amount_per_hr <- data.tot.bm$amount/10 data.tot.bm$amount_per_hr_per_g <- data.tot.bm$amount_per_hr/data.tot.bm$biomass_soybean data.tot.bm$rep <- as.factor(data.tot.bm$rep) levels(data.tot.bm$trt)[levels(data.tot.bm$trt)=="Normal"] <- "Intact" data.tot.bm$trt <- factor(data.tot.bm$trt, levels = c("Control","Ablated","Intact")) data.tot.bm$day <- factor(data.tot.bm$day, levels = c("First day","Second day", "Third day")) levels(data.tot.bm$day)[levels(data.tot.bm$day)=="First day"] <- "day1" levels(data.tot.bm$day)[levels(data.tot.bm$day)=="Second day"] <- "day2" levels(data.tot.bm$day)[levels(data.tot.bm$day)=="Third day"] <- "day3" #View(data.tot.bm) # testing model assumptions qqnorm(data.tot.bm$amount_per_hr)#check normality qqline(data.tot.bm$amount_per_hr)#check normality library(rcompanion)#check variance plotNormalHistogram(data.tot.bm$amount_per_hr)#check variance # the data for amount_per_hr violate model assumptions # transformation trdata.tot.bm1 <- data.tot.bm library(bestNormalize) #normalize the data using this package BNobject_soybean_gox_totalVOCtotalVOC_amount_per_hr <- bestNormalize(trdata.tot.bm1$amount_per_hr) BNobject_soybean_gox_totalVOCtotalVOC_amount_per_hr orderNorm_obj_soybean_gox_totalVOCtotalVOC_amount_per_hr <- sqrt_x(trdata.tot.bm1$amount_per_hr) trdata.tot.bm1$amount_per_hr <- predict(orderNorm_obj_soybean_gox_totalVOCtotalVOC_amount_per_hr) # test the assumptions again qqnorm(trdata.tot.bm1$amount_per_hr)#check normality qqline(trdata.tot.bm1$amount_per_hr)#check normality library(rcompanion)#check variance plotNormalHistogram(trdata.tot.bm1$amount_per_hr)#check variance # separate the data for individual models trdata.tot.bm1_day1 <- subset(trdata.tot.bm1, day == "day1") trdata.tot.bm1_day2 <- subset(trdata.tot.bm1, day == "day2") trdata.tot.bm1_day3 <- subset(trdata.tot.bm1, day == "day3") # day1 lm.day1 = lm(amount_per_hr ~ trt, data=trdata.tot.bm1_day1)#ANOVA ANOVA.day1<-Anova(lm.day1, type="II")#ANOVA ANOVA.day1 ls.day1 = emmeans(lm.day1,~trt)#multiple comparison Mulcomp.day1<-CLD(ls.day1, by=NULL, alpha=.05, Letters=letters, reverse = TRUE)#multiple comparison Mulcomp.day1$.group <- gsub(" ", "", Mulcomp.day1$.group)#removing space in grouping info Mulcomp.day1$trt <- factor(Mulcomp.day1$trt, levels = c("Control","Ablated","Intact")) Mulcomp.day1 <- arrange(Mulcomp.day1, trt) Mulcomp.day1 # day2 lm.day2 = lm(amount_per_hr ~ trt, data=trdata.tot.bm1_day2)#ANOVA ANOVA.day2<-Anova(lm.day2, type="II")#ANOVA ANOVA.day2 ls.day2 = emmeans(lm.day2,~trt)#multiple comparison Mulcomp.day2<-CLD(ls.day2, by=NULL, alpha=.05, Letters=letters, reverse = TRUE)#multiple comparison Mulcomp.day2$.group <- gsub(" ", "", Mulcomp.day2$.group)#removing space in grouping info Mulcomp.day2$trt <- factor(Mulcomp.day2$trt, levels = c("Control","Ablated","Intact")) Mulcomp.day2 <- arrange(Mulcomp.day2, trt) Mulcomp.day2 #day3 lm.day3 = lm(amount_per_hr ~ trt, data=trdata.tot.bm1_day3)#ANOVA ANOVA.day3<-Anova(lm.day3, type="II")#ANOVA ANOVA.day3 ls.day3 = emmeans(lm.day3,~trt)#multiple comparison Mulcomp.day3<-CLD(ls.day3, by=NULL, alpha=.05, Letters=letters, reverse = TRUE)#multiple comparison Mulcomp.day3$.group <- gsub(" ", "", Mulcomp.day3$.group)#removing space in grouping info Mulcomp.day3$trt <- factor(Mulcomp.day3$trt, levels = c("Control","Ablated","Intact")) Mulcomp.day3 <- arrange(Mulcomp.day3, trt) Mulcomp.day3 #preparing for bar plot of amount_per_hr library(dplyr) rank<-group_by(data.tot.bm, trt, day) %>% dplyr::summarise(count = n(), mean = mean(amount_per_hr, na.rm = TRUE), sd = sd(amount_per_hr, na.rm = TRUE), se = sd/sqrt(n())) rank rank <- as.data.frame(rank) rank$trt <- factor(rank$trt, levels = c("Control","Ablated","Intact")) rank$day <- factor(rank$day, levels = c("day1","day2","day3")) rank <- arrange(rank, day, trt) ymax <- max(rank$mean) ymax ymax2 <- as.numeric(ymax + subset(rank, mean==ymax, select=se)) ymax2 ylim <- 1.1*ymax2 # defining the limit of y value #new grouping base on comparison gp <- c(Mulcomp.day1$.group,Mulcomp.day2$.group,Mulcomp.day3$.group) #making the bar plot of amount_per_hr sb.total <- function(rank.data, raw.data, grouping){ rank_plot<-ggplot(data=rank.data, aes(x=day, y=mean, fill =trt)) + #assign the dataframe geom_bar(stat="identity", colour="black", position=position_dodge())+theme_minimal() + #making the barchart side by side geom_errorbar(aes(ymin=mean-se, ymax=mean+se), color= "black", width=.2,position=position_dodge(.9)) + # adding error bar geom_text(aes(label=grouping, y=mean+se), vjust=-0.3, color="black", position = position_dodge(0.9), size=2.5) + #adding and positioning the labels labs( title = "Soybean (total VOC)",y = expression("ng "*hour^-1*plant^-1)) + #changing the titles and xy label theme(title=element_text(size = 6), axis.title.x = element_blank(), #position the title to the center axis.text.x = element_text(size = 5, hjust = 0.5), axis.text.y = element_text(size = 5, hjust = 0.5), legend.title = element_blank(), legend.text = element_text(size = 6), legend.key.size = unit(8,"points"), # change the size of the legend legend.position = c(.99, .99), legend.justification = c("right", "top"), legend.box.just = "right", panel.background= element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1))+ scale_fill_manual(values=c("#F5F5F5","#A6808C","#CCB7AE"))+ scale_y_continuous(expand = c(0, 0), limits = c(0, ylim+350)) rank_plot rank_plot_jitter<-rank_plot + geom_jitter(data = raw.data, aes(x=day, y=amount_per_hr, fill=trt), position=position_jitterdodge(jitter.width = 0, dodge.width = 0.9), show.legend = FALSE, size = 1, alpha=0.2)+ #make it transparent scale_color_manual(values=c("black")) #add color rank_plot_jitter } sb.total.voc.plot <- sb.total(rank.data = rank, raw.data = data.tot.bm, grouping = gp) sb.total.voc.plot #---------------------------------------------- # Individual VOC ------------------------------ data3 <- data2_fulltable data3$amount <-data3$amount/10 # per hour emission levels(data3$day)[levels(data3$day)=="First day"] <- "day1" levels(data3$day)[levels(data3$day)=="Second day"] <- "day2" levels(data3$day)[levels(data3$day)=="Third day"] <- "day3" # Cis-3-Hexenol data_soybean_ablated_Cis3Hexenol <- subset(data3, compound == "Cis-3-Hexenol") # fitting zero-inflated Gaussian model lmezig_soybean_ablated_Cis3Hexenol = lme.zig(fixed = amount ~ trt + day, random = ~ 1 | rep, data = data_soybean_ablated_Cis3Hexenol, zi_fixed = ~ trt + day) summary(lmezig_soybean_ablated_Cis3Hexenol) # box plots bp_soybean_ablated_Cis3Hexenol <- ggplot(data=data_soybean_ablated_Cis3Hexenol, aes(x=day, y=amount, fill =trt)) + geom_boxplot(outlier.shape = NA) + stat_summary(fun.data = "mean_cl_boot", colour="#c04000", geom="point", shape=18, size=2, show.legend = FALSE, position =position_jitterdodge(jitter.width = 0, dodge.width = 0.75)) + labs( title ="Cis-3-Hexenol",y = expression("ng "*hour^-1*plant^-1), fill = "Water treatment") + geom_jitter(aes(x=day, y=amount, fill=trt), position=position_jitterdodge(jitter.width = 0, dodge.width = 0.75), show.legend = FALSE, size = 1, alpha=0.3)+ scale_color_manual(values=c("black")) + theme_minimal()+ theme(title=element_text(size = 6), axis.title=element_text(size=6), axis.title.x = element_blank(), axis.text.x = element_text(size = 5, hjust = 0.5), axis.text.y = element_text(size = 5, hjust = 0.5), legend.title = element_blank(), legend.text = element_text(size = 6), legend.key.size = unit(8,"points"), legend.position = c(.99, .99), legend.justification = c("right", "top"), legend.box.just = "right", panel.background= element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1))+ scale_fill_manual(values=c("#F5F5F5","#A6808C","#CCB7AE")) bp_soybean_ablated_Cis3Hexenol # Cis-Jasmone data_soybean_ablated_Cis_Jasmone <- subset(data3, compound == "Cis-Jasmone") # fitting zero-inflated Gaussian model lmezig_soybean_ablated_Cis_Jasmone = lme.zig(fixed = amount ~ trt + day, random = ~ 1 | rep, data = data_soybean_ablated_Cis_Jasmone, zi_fixed = ~ trt + day) summary(lmezig_soybean_ablated_Cis_Jasmone) # box plots bp_soybean_ablated_Cis_Jasmone <- ggplot(data=data_soybean_ablated_Cis_Jasmone, aes(x=day, y=amount, fill =trt)) + geom_boxplot(outlier.shape = NA) + stat_summary(fun.data = "mean_cl_boot", colour="#c04000", geom="point", shape=18, size=2, show.legend = FALSE, position =position_jitterdodge(jitter.width = 0, dodge.width = 0.75)) + labs( title ="Cis-Jasmone",y = expression("ng "*hour^-1*plant^-1), fill = "Water treatment") + geom_jitter(aes(x=day, y=amount, fill=trt), position=position_jitterdodge(jitter.width = 0, dodge.width = 0.75), show.legend = FALSE, size = 1, alpha=0.3)+ scale_color_manual(values=c("black")) + theme_minimal()+ theme(title=element_text(size = 6), axis.title=element_text(size=6), axis.title.x = element_blank(), axis.text.x = element_text(size = 5, hjust = 0.5), axis.text.y = element_text(size = 5, hjust = 0.5), legend.title = element_blank(), legend.text = element_text(size = 6), legend.key.size = unit(8,"points"), legend.position = c(.99, .99), legend.justification = c("right", "top"), legend.box.just = "right", panel.background= element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1))+ scale_fill_manual(values=c("#F5F5F5","#A6808C","#CCB7AE")) bp_soybean_ablated_Cis_Jasmone # Cis-3-Hexenyl Acetate data_soybean_ablated_Cis3HexenylAcetate <- subset(data3, compound == "Cis-3-Hexenyl Acetate") # fitting zero-inflated Gaussian model lmezig_soybean_ablated_Cis3HexenylAcetate = lme.zig(fixed = amount ~ trt + day, random = ~ 1 | rep, data = data_soybean_ablated_Cis3HexenylAcetate, zi_fixed = ~ trt + day) summary(lmezig_soybean_ablated_Cis3HexenylAcetate) # box plots bp_soybean_ablated_Cis3HexenylAcetate <- ggplot(data=data_soybean_ablated_Cis3HexenylAcetate, aes(x=day, y=amount, fill =trt)) + geom_boxplot(outlier.shape = NA) + stat_summary(fun.data = "mean_cl_boot", colour="#c04000", geom="point", shape=18, size=2, show.legend = FALSE, position =position_jitterdodge(jitter.width = 0, dodge.width = 0.75)) + labs( title ="Cis-3-Hexenyl Acetate",y = expression("ng "*hour^-1*plant^-1), fill = "Water treatment") + geom_jitter(aes(x=day, y=amount, fill=trt), position=position_jitterdodge(jitter.width = 0, dodge.width = 0.75), show.legend = FALSE, size = 1, alpha=0.3)+ scale_color_manual(values=c("black")) + theme_minimal()+ theme(title=element_text(size = 6), axis.title=element_text(size=6), axis.title.x = element_blank(), axis.text.x = element_text(size = 5, hjust = 0.5), axis.text.y = element_text(size = 5, hjust = 0.5), legend.title = element_blank(), legend.text = element_text(size = 6), legend.key.size = unit(8,"points"), legend.position = c(.99, .99), legend.justification = c("right", "top"), legend.box.just = "right", panel.background= element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1))+ scale_fill_manual(values=c("#F5F5F5","#A6808C","#CCB7AE")) bp_soybean_ablated_Cis3HexenylAcetate # E,E-Alpha-Farnesene data_soybean_ablated_EEAlphaFarnesene <- subset(data3, compound == "E,E-Alpha-Farnesene") # fitting zero-inflated Gaussian model lmezig_soybean_ablated_EEAlphaFarnesene = lme.zig(fixed = amount ~ trt + day, random = ~ 1 | rep, data = data_soybean_ablated_EEAlphaFarnesene, zi_fixed = ~ trt + day) summary(lmezig_soybean_ablated_EEAlphaFarnesene) # box plots bp_soybean_ablated_EEAlphaFarnesene <- ggplot(data=data_soybean_ablated_EEAlphaFarnesene, aes(x=day, y=amount, fill =trt)) + geom_boxplot(outlier.shape = NA) + stat_summary(fun.data = "mean_cl_boot", colour="#c04000", geom="point", shape=18, size=2, show.legend = FALSE, position =position_jitterdodge(jitter.width = 0, dodge.width = 0.75)) + labs( title ="Alpha-Farnesene",y = expression("ng "*hour^-1*plant^-1), fill = "Water treatment") + geom_jitter(aes(x=day, y=amount, fill=trt), position=position_jitterdodge(jitter.width = 0, dodge.width = 0.75), show.legend = FALSE, size = 1, alpha=0.3)+ scale_color_manual(values=c("black")) + theme_minimal()+ theme(title=element_text(size = 6), axis.title=element_text(size=6), axis.title.x = element_blank(), axis.text.x = element_text(size = 5, hjust = 0.5), axis.text.y = element_text(size = 5, hjust = 0.5), legend.title = element_blank(), legend.text = element_text(size = 6), legend.key.size = unit(8,"points"), legend.position = c(.99, .99), legend.justification = c("right", "top"), legend.box.just = "right", panel.background= element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=1))+ scale_fill_manual(values=c("#F5F5F5","#A6808C","#CCB7AE")) bp_soybean_ablated_EEAlphaFarnesene #----------------------------------------------