### Bennett and Stone (2019) Environmental Variables Associated with Nothophaeocryptopus gaeumannii Population Structure and Swiss Needle Cast Severity in Western Oregon and Washington, Ecology and Evolution. ### ### R Script library(poppr) library(ape) library(ggplot2) library(RColorBrewer) library(dplyr) # To set pop. parameters and hierarchy: PC<-read.genalex("Bennett_Nothophaeocryptopus_SSR_PopGen_Data_2014-2016.csv", ploidy=1, geo= FALSE, region= FALSE, genclone=TRUE, sep=",") strata(PC)<-data.frame(pop(PC)) #(all strata sep. by "_")# PC<-splitStrata(PC,~Lineage/State/Block/Site/Tree, sep="_") ##Can set any strata level as pop here## PCS<-setPop(PC,~Site) PCL<-setPop(PC,~Lineage) PCT<-setPop(PC,~Tree) PCB<-setPop(PC,~Block) PCSTAT<-setPop(PC,~State) PCLCC<-clonecorrect(PCL,strata=~Lineage) #DIVERSITY poppr(PCL) poppr(PCS) PCSdivci<-diversity_ci(PCS,rarefy=TRUE,raw=TRUE) PCLdivci<-diversity_ci(PCL,rarefy=TRUE,raw=TRUE) #To make UPGMA dendrogram: #UPGMA Tree Fan With Lineages Colored cols<-c("blue","red") set.seed(999) PCLCC_tree <- aboot(PCLCC, tree = "upgma", distance = nei.dist, sample = 10000, showtree = F, cutoff = 70) plot.phylo(ladderize(PCLCC_tree), type = "fan", cex = 0.6, font = 2, adj = 0,label.offset = 0.25, tip.color = cols[pop(PCLCC)]) nodelabels(PCLCC_tree$node.label, adj = c(1, -0.1), frame = "n", cex = 1.5,font = 3, xpd = TRUE) legend('topleft', legend = c("Lineage 1", "Lineage 2"), fill = c("blue","red"), border = FALSE, bty = "n", cex = 1.5) #Use GGtree package to create figure library(ggtree) library(reshape2) library(plyr) library(phylobase) library(ggrepel) par(mar=c(0,0,0,0)) ggtree(PCLCC_tree, layout="circular") + geom_tiplab2(size=2,aes(color=pop(PCLCC)),data=c,show.legend=FALSE)+ geom_tippoint(aes(color=pop(PCLCC)),data=c,size=0,show.legend=TRUE)+ scale_color_manual(values=c("blue","red"),name="Lineage",labels=c("1","2"))+ geom_label_repel(aes(label=label),color="black",data=subset_labels,na.rm=TRUE,parse=TRUE,size=3.5,nudge_x=-2)+ guides(colour=guide_legend(override.aes=list(size=7,shape=16)))+ theme(legend.position="bottom",legend.text=element_text(size=14),legend.title=element_text(size=14),text=element_text(face="bold")) #Calculate number of MLGs shared between OR and WA SHAREDORWA<-mlg.crosspop(PCSTAT,df=TRUE) #Calculate pairwise genetic differentiation between lineages library(mmod) pairwise_Gst_Hedrick(PCL) #Calculate private alleles in lineages, states, etc. LinPriv<-private_alleles(PCL) StatPriv<-private_alleles(PCSTAT) #Info Table for missing data info_table(PCS,type="missing", percent=TRUE) #Non-Metric Multidimensional Scaling (NMDS) ANALYSIS library(vegan) library(poppr) library(ape) library(ggvegan) library(ggplot2) library(ggrepel) library(RColorBrewer) MYPNW<-popsub(PCS,sublist=c("T0-1","T0-2","T0-3","T25-2","T5-5","T5-3","T15-1","T15-2","T25-3","F0-2","F5-1","F15-3","F15-1","F25-2","WDNR49","WDNR66","WDNR68","WDNR70","WDNR71","WDNR32","WDNR63","WDNR64","DNRS25-1","DNRL25-2","G0-1","N0-1","CB25-2","G0-2","N5-2","N15-3","N25-5","CB0-1","CB15-2")) #REMOVE "CB5-2"- ITS AN OUTLIER! #REMOVE "WDNRQ"- NO DISEASE SEVERITY DATA AVAILABLE! #To perform NMDS using package vegan #Distance matrix- for genetic distance between sites MYPNWCC<-clonecorrect(MYPNW) MYPNWSCC<-setPop(MYPNWCC,~Site) MYPNWpop<-genind2genpop(MYPNWSCC) MYPNWSgendist<-as.dist(dist.genpop(MYPNWpop,method=4)) #For this to work- the Row/Column labels of the genetic dist matrix MUST BE IN THE SAME ORDER AS THE ENVIRONMENTAL MATRIX!!!## ordering <- sort(attr(MYPNWSgendist, "Labels")) MYPNWSgendist.mat <- as.matrix(MYPNWSgendist)[ordering,ordering] MYPNWSgendist <- as.dist(MYPNWSgendist.mat) #NMDS with genetic distance matrix (sites) set.seed(999) MYPNWMDS<-metaMDS(MYPNWSgendist,wascores=FALSE,autotransform=FALSE,try=100,display="sites") #View results as scatterplot ordiplot(MYPNWMDS) #Add text labels for sites orditorp(MYPNWMDS,display="site") #Add joint plot overlay with vectors for environmental variables clim<-read.csv("Bennett_SNC_Severity_and_Environmental_Data_OR_WA_2014-2016.csv") #Ordination scores ALClim<-clim[c(1:3,5:34),c(1:6,8:12)] scl<-3 MYPNWMDScores<-as.data.frame(scores(MYPNWMDS)) fits<-envfit(MYPNWMDS,ALClim,perm=999,na.rm=TRUE) ordiplot(MYPNWMDS,type="n") ordipointlabel(MYPNWMDS, display = "sites", font = 2,col = "black",cex=0.75) plot(fits, col = "red",cex=0.75) #See stressplot and goodness-of-fit statistics stressplot(MYPNWMDS,cex.lab=1.5,cex.axis=1.5,cex=1) goodness(MYPNWMDS) #ggplot biplot library(ggplot2) ##OR and WA sites fits.vecs <- as.data.frame(scores(fits, display = "vectors")) fits.vecs <- cbind(fits.vecs, Factor = rownames(fits.vecs)) Sites<-rownames(MYPNWMDScores) #Colors for locations (F, T, etc.) #ordiArrowMul(fits) = 0.2793775 Location<-as.factor(clim[c(1:3,5:34),]$Location) ggplot(MYPNWMDScores) + geom_point(mapping = aes(x = NMDS1, y = NMDS2, colour = Location),size=3) + scale_color_manual(values =brewer.pal(8,"Dark2"))+ coord_fixed(1) + ## need aspect ratio of 1! geom_segment(data = fits.vecs,aes(x = 0, xend = 0.2793775*NMDS1, y = 0, yend = 0.2793775*NMDS2),arrow = arrow(length = unit(0.25, "cm")), colour = "black") + geom_text_repel(data = fits.vecs, aes(x = 0.2793775*NMDS1, y = 0.2793775*NMDS2,label = rownames(fits.vecs)),size = 5)+ theme_bw()+ theme(text=element_text(size=16),panel.grid.minor=element_blank(),panel.grid.major=element_blank()) ###Statistical Analyses with SNC severity data: setwd("~/Documents/SNC") library(cowplot) library(dplyr) library(ggplot2) library(GGally) library(nlme) library(MASS) library(psych) library(Hmisc) ###SITES### #Read in dataset with Sites, Colonization Index (CI), and AFR (Average Foliage Retention) averaged for each site. Elevation, Dew Point Temp, Summer Temp, Distance From Coast can all be variables in model PSFR<-read.csv("Bennett_SNC_Severity_and_Environmental_Data_OR_WA_2014-2016.csv", header=T) PSFR<-na.omit(PSFR) attach(PSFR) #CI~AFR CIFR<-lm(AFR~CI) summary(CIFR) ggplot(CIFR, aes(x=CI, y=AFR)) + geom_point()+ geom_smooth(method=lm,colour="black")+ theme_bw()+ theme(axis.text=element_text(size=12),axis.title=element_text(size=12,face="bold"))+ annotate(geom="text", x=10, y=25, label="R^2 == 0.330~~~P < 0.001",parse=TRUE, color="black",size=5)+ xlab("Colonization Index")+ylab("Average Foliage Retention (%)") #AFR~PL2 PL2FR<-lm(AFR~PL2) summary(PL2FR) PL2FRPlot<-ggplot(PL2FR, aes(x=PL2, y=AFR)) + geom_point()+ geom_smooth(method=lm,colour="black")+ theme_bw()+ theme(axis.text=element_text(size=12),axis.title=element_text(size=16,face="bold"),plot.margin=margin(t = 0.2, r = 0.2, b = 0.2, l = 0.4, unit = "cm"))+ annotate(geom="text", x=0.8, y=32.5, label="R^2 == 0.234~~~P == 0.002",parse=TRUE, color="black",size=5)+ xlab("Relative Proportion of Lineage 2")+ylab("Average Foliage Retention (%)") #AFR~DIST DISTFR<-lm(AFR~DIST) summary(DISTFR) DISTFRPlot<-ggplot(DISTFR, aes(x=DIST, y=AFR)) + geom_point()+ geom_smooth(method=lm,colour="black")+ theme_bw()+ theme(axis.text=element_text(size=12),axis.title=element_text(size=16,face="bold"),plot.margin=margin(t = 0.2, r = 0.2, b = 0.2, l = 0.4, unit = "cm"))+ annotate(geom="text", x=45, y=35, label="R^2 == 0.525~~~P < 0.001",parse=TRUE,color="black",size=5)+ xlab("Distance inland (km)")+ylab("Average Foliage Retention (%)") #CI~DIST CIDIST<-lm(CI~DIST) summary(CIDIST) CIDISTPlot<-ggplot(CIDIST, aes(x=DIST, y=CI)) + geom_point()+ geom_smooth(method=lm,colour="black")+ theme_bw()+ theme(axis.text=element_text(size=12),axis.title=element_text(size=16,face="bold"),plot.margin=margin(t = 0.2, r = 0.2, b = 0.2, l = 0.4, unit = "cm"))+ annotate(geom="text", x=12, y=5, label= "R^2 == 0.143~~~P < 0.05", parse=TRUE, color="black",size=5)+ xlab("Distance inland (km)")+ylab("Colonization Index") #PL2~DIST DISTL2<-lm(PL2~DIST) summary(DISTL2) DISTL2Plot<-ggplot(DISTL2, aes(x=DIST, y=PL2)) + geom_point()+ geom_smooth(method=lm,colour="black")+ theme_bw()+ theme(axis.text=element_text(size=12),axis.title=element_text(size=16,face="bold"),plot.margin=margin(t = 0.2, r = 0.2, b = 0.2, l = 0.4, unit = "cm"))+ annotate(geom="text", x=12, y=-0.1, label = "R^2 == 0.457~~~P < 0.001", parse = TRUE,color="black",size=5)+ xlab("Distance inland (km)")+ylab("Relative Proportion of Lineage 2") #CI~PL2 PL2ci<-lm(CI~PL2) summary(PL2ci) ggplot(PL2ci, aes(x=PL2, y=CI)) + geom_point()+ geom_smooth(method=lm,colour="black")+ theme_bw()+ theme(axis.text=element_text(size=12),axis.title=element_text(size=12,face="bold"))+ annotate(geom="text", x=0.75, y=40, label= "R^2 == -0.030~~~P == 0.857",parse=TRUE, color="black",size=4)+ xlab("Relative proportion of Lineage 2")+ylab("Colonization Index") #PLOT 4 PANELS (2 COLUMNS) plot_grid(DISTL2Plot,DISTFRPlot,CIDISTPlot,PL2FRPlot,nrow=2,ncol=2,labels="AUTO",label_size=20) #View pairwise correlations between all variables cor(dplyr::select(PSFR,CI,AFR,PL2,MADT,TmSummer,TmWinter,ELEV,DIST,PPT)) varmatrix<-as.matrix(dplyr::select(PSFR,CI,AFR,PL2,MADT,TmSummer,TmWinter,ELEV,DIST,Long,Lat,PPT)) print(rcorr(varmatrix,type="pearson"),digits=5) #FOLIAGE RETENTION - DIST #Linear mixed effects model with random variable for plot (site) library(car) Site=PSFR$Site FRDIST=lme(AFR~PL2+DIST,data=PSFR,random=~1|Site) summary(FRDIST) # Here we use predict to predict estimated mean foliage retention # based on our final model, with distance from coast held constant at its mean value preddatFRDIST = data.frame(PL2=PSFR$PL2,DIST= mean(PSFR$DIST),AFR=PSFR$AFR,Site=PSFR$Site) preddatFRDIST$pred = predict(FRDIST, newdata=preddatFRDIST,level=0) preddatFRDIST DesignmatFRDIST <- model.matrix(formula(FRDIST)[-2], preddatFRDIST) predvarFRDIST <- diag(DesignmatFRDIST %*% vcov(FRDIST) %*% t(DesignmatFRDIST)) preddatFRDIST$SE <- sqrt(predvarFRDIST) #Confidence intervals cmult=1.96 DISTupperPL2 = preddatFRDIST$pred+cmult*(preddatFRDIST$SE) DISTlowerPL2 = preddatFRDIST$pred-cmult*(preddatFRDIST$SE) # Let's put these into our original dataset for plotting PSFR = mutate(PSFR, predicted = preddatFRDIST$pred,lower= DISTlowerPL2,upper= DISTupperPL2) #Construct scatterplot with 95% confidence interval for the predicted values # Plot the estimated line, # which is the relationship between AFR and PL2 # while holding DIST at mean(dist) # This shows the slope of the line for the relationship between AFR and PL2 # after accounting for DIST # The slope of the line matches the estimated slope from the model ggplot(PSFR, aes(x = PL2, y = AFR) ) + # Establish x and y axes geom_point() + # Add points from original dataset geom_line(aes(y = preddatFRDIST$pred) ) + # Add a line from the predicted values # Add an approximate confidence interval "ribbon" geom_ribbon(aes(ymin = DISTlowerPL2 , ymax = DISTupperPL2, alpha = .25)) + ylab("Average Foliage Retention (%)") + # y label xlab("Relative Proportion of Lineage 2") + # x label theme_bw()+ # change theme to black and white theme(legend.position="none",axis.text=element_text(size=12),axis.title=element_text(size=12,face="bold")) #ggtitle("Foliage Retention as a Function of Proportion PL2- Trend Line and 95% C.I. w/ DIST Held at its Mean ")