### R-Script 'Honeybees possess a structurally diverse and functionally redundant set of queen pheromones' ### Proceedings of the Royal Society (2019) ### by Sarah A. Princen, Ricardo Caliari Oliveira, Ulrich R. Ernst, Jocelyn G. Millar, Jelle S. van Zweden & Tom Wenseleers ### GC/MS analysis of cuticular profiles of honeybee queens and workers (Fig. 1) # load required packages library(afex) library(MASS) library(car) library(tidyr) library(dplyr) library(broom) library(emmeans) library(export) library(pheatmap) library(doBy) library(multcomp) library(ggplot2) library(ggthemes) library(scales) library(gtools) library(matrixStats) # we read in the raw peak areas and convert everything to relative peak areas dfabs=read.csv("Princen_etal_datafig1.csv",check.names=F,stringsAsFactors=FALSE) # raw peak areas RT=as.numeric(dfabs[1,][-1]) # RTs RI=as.numeric(dfabs[2,][-1]) # RIs COMPOUND_CLASS=dfabs[4,][-1] # compound class COMPOUND_CLUSTER=dfabs[5,][-1] # compound cluster (worker, virgin queen or egg-laying queen specific) treatment=dfabs[6,][-1] # which compounds were included in which treatments dfabs=dfabs[-(1:6),] dfabs[,2:ncol(dfabs)]=as.numeric(as.matrix(dfabs[,2:ncol(dfabs)])) dfabs$CLASS=factor(dfabs$CLASS,levels=c("WORKER","VIRGIN_QUEEN","QUEEN"),labels=c("W","VQ","Q")) rownames(dfabs)=make.unique(as.character(dfabs[,1])) dfabsfull=dfabs # dataframe with factor CLASS + abundances dffactors=dfabs[,1,drop=FALSE] # dataframe with factor CLASS only dfabs=dfabs[,-1] # dataframe with abundances only dfabst=t(dfabs) # same but transposed dfrel = sweep(dfabs,1,rowSums(dfabs,na.rm=TRUE),FUN="/") # relative peak areas (compound abundance / total) dfrel[is.na(dfrel)] = 0 dfrelfull = data.frame(dffactors, dfrel, check.names=FALSE) # relative peak areas + factors dflog10relt = t(log10(dfrel+1E-6)) # log10 transformed relative peak areas (transposed) # we also calculate row z scores on log2 transformed relative peak areas for heatmap : dflog10relt_centered = sweep(dflog10relt,1,rowMeans(dflog10relt),FUN="-") dflog10relt_zscores = sweep(dflog10relt_centered,1,rowSds(dflog10relt_centered),FUN="/") # Cohen's ds & average rel peak areas & SDs per class (cf. Table S1) avgs=summaryBy(.~CLASS, data=data.frame(dfrelfull), FUN=mean) ns=summaryBy(.~CLASS, data=data.frame(dfrelfull), FUN=length) dfs=ns dfs[2:ncol(dfs)]=dfs[2:ncol(dfs)]-1 sds=summaryBy(.~CLASS, data=data.frame(dfrelfull), FUN=sd) sdpooled=sqrt( colSums(sds[,2:ncol(sds)]^2*dfs[2:ncol(dfs)])/colSums(dfs[2:ncol(dfs)]) ) cohensd = data.frame(compound=colnames(dfrel), RT=round(RT,2), RI=round(RI,0), COMPOUND_CLASS=as.character(COMPOUND_CLASS), COMPOUND_CLUSTER=as.character(COMPOUND_CLUSTER), Cohensd_QvsW=as.numeric((avgs[avgs$CLASS=="Q",2:ncol(avgs)]-avgs[avgs$CLASS=="W",2:ncol(avgs)])/sdpooled), Cohensd_QvsVQ=as.numeric((avgs[avgs$CLASS=="Q",2:ncol(avgs)]-avgs[avgs$CLASS=="VQ",2:ncol(avgs)])/sdpooled), Cohensd_VQvsW=as.numeric((avgs[avgs$CLASS=="VQ",2:ncol(avgs)]-avgs[avgs$CLASS=="W",2:ncol(avgs)])/sdpooled), avgrelpeaka_Q=as.numeric(avgs[avgs$CLASS=="Q",2:ncol(avgs)]), SDrelpeaka_Q=as.numeric(sds[sds$CLASS=="Q",2:ncol(sds)]), avgrelpeaka_VQ=as.numeric(avgs[avgs$CLASS=="VQ",2:ncol(avgs)]), SDrelpeaka_VQ=as.numeric(sds[sds$CLASS=="VQ",2:ncol(sds)]), avgrelpeaka_W=as.numeric(avgs[avgs$CLASS=="W",2:ncol(avgs)]), SDrelpeaka_W=as.numeric(sds[sds$CLASS=="W",2:ncol(sds)]) ) # Heatmap of row z scores calculated on log2 transformed relative peak areas (Fig. 1) #### dflog10relt_zscores2=dflog10relt_zscores # we clip heatmap scale between -2 and 2 dflog10relt_zscores2[dflog10relt_zscores2>2] = 2 dflog10relt_zscores2[dflog10relt_zscores2<(-2)] = -2 ph=pheatmap(dflog10relt_zscores2, # display_numbers=avgabst, # scale="row", cutree_rows=3, cutree_cols=3, clustering_method="average", clustering_distance_rows="correlation", clustering_distance_cols="correlation") comporder_heatmap=rownames(dflog10relt_zscores2)[ph$tree_row$order] graph2png(file="Fig1_heatmap.png",width=12,height=21,dpi=600) # in Fig. 1 in MS Q & VQ groups were manually switched around graph2ppt(file="Fig1_heatmap.pptx",width=12,height=21) # Statistical comparison of log10 transformed relative peak areas among queens, virgin queens & workers using robust linear models (Table S1) #### # before we start analyzing the data let's first convert the data with relative # amounts from wide to long format: data=gather(dfrelfull, compound, relquant, 2:ncol(dfrelfull)) data$CLASS=relevel(data$CLASS,ref="W") # specify desired reference level data$compound = factor(data$compound) data$log10relquant = log10(data$relquant+1E-7) # log10 transformed relative amounts datagr=group_by(data, compound) # data object grouped by compound # we now use the dplyr, lsmeans and broom packages to do robust linear model tests per compound by CLASS # on log10 transformed relative peak areas & carry out posthoc tests with FDR p value correction # predicted means and 95% conf lims preds=data.frame(datagr %>% do( data.frame(summary(lsmeans(rlm(log10relquant ~ CLASS, data = ., maxit=500), ~ CLASS ))) )) preds # ANOVA tables per compound, here we use white.adjust=T to take into account slight inequality of variances anova=data.frame(datagr %>% do(tidy(Anova(rlm(log10relquant ~ CLASS, data = ., maxit=500),type="III",white.adjust=T)))) anova # summary coefficient tables per compound coefs=data.frame(datagr %>% do(tidy(rlm(log10relquant ~ CLASS, data = ., maxit=500),type="III",white.adjust=T))) coefs # posthoc tests of pairwise differences between Qs, VQs & Ws, with FDR correction of obtained p values pvals=data.frame(datagr %>% do( data.frame(summary(contrast(lsmeans(rlm(log10relquant ~ CLASS, data = ., maxit=500), ~ CLASS ), method="pairwise",adjust="none"))) )) # or trt.vs.ctrl pvals$padj=p.adjust(pvals$p.value,method="fdr") # we use FDR p value correction sum(pvals$padj<0.05) # 295 sign in total pvals$folddiff=sign(pvals$estimate)*10^abs(pvals$estimate) sum(pvals$padj<0.05&abs(pvals$folddiff)>2) # 254 sign & >2 fold difference pvals$padj[abs(pvals$folddiff)<2]=1 # to leave out asterisks when absolute fold diff < 2 dfsign=as.data.frame(do.call(cbind, by(pvals$padj, pvals$contrast, function (x) as.vector(stars.pval(x))))) dfsign=cbind(compound=pvals[pvals$contrast==levels(pvals$contrast)[[1]],]$compound, dfsign) dfsign = dfsign[match(comporder_heatmap, dfsign$compound),] dfsign=dfsign[,c(1,3,2,4)] # re-order columns # Cohen's d & avg rel peak areas + SDs in order heatmap cohensd = cohensd[match(comporder_heatmap, cohensd$compound),] cohensd = cbind(cohensd, p_QvsW=dfsign[,2], p_QvsVQ=dfsign[,3], p_VQvsW=dfsign[,4]) write.csv(cohensd,"TableS1.csv") # Column plots of relative abundance of compounds used in bioassays (Fig. S1) #### selcomps=c("9-octadecenoic acid, hexadecyl ester","9-hexadecenoic acid, tetradecyl ester", "tetradecanoic acid, tetradecyl ester","9-octadecenoic acid, tetradecyl ester", "tetradecanoic acid, hexadecyl ester","9-hexadecenoic acid, hexadecyl ester", "hexadecanoic acid, hexadecyl ester", "15-nonatriacontene","15-pentatriacontene","15-tritriacontene","15-heptatriacontene") predssel=preds[(preds$compound %in% selcomps),] predssel$compound=factor(predssel$compound,levels=selcomps) # specify order here predssel$CLASS=factor(predssel$CLASS,levels=c("W","Q","VQ")) # specify order here predssel$y=100*((10^predssel$lsmean)-1E-7) predssel$ymin=100*((10^predssel$asymp.LCL)-1E-7) predssel$ymax=100*((10^predssel$asymp.UCL)-1E-7) ggplot(predssel, aes(x=CLASS, y=y+1E-2, fill=CLASS, group=CLASS)) + facet_wrap(~compound, ncol=3) + # scales="free_y", geom_bar(stat="identity", width=0.5, size=5) + geom_linerange(aes(ymin=ymin+1E-2, ymax=ymax+1E-2), size=0.25) + # theme_grey(base_size=13) + theme_few(base_size=18) + labs(y = "Relative amount present (%)", x="CLASS") + scale_fill_manual(values=c("blue3","red3","green4")) + xlab("") + theme(legend.position="none") + coord_cartesian(ylim=c(1E-20,6)) + scale_y_continuous(breaks = c(0.1, 1, 2.5, 5, 7.5, 10), trans="sqrt") + theme(axis.text.x=element_text(angle=30,hjust=1,color="black")) + theme(axis.text.y=element_text(color="black")) + theme(text = element_text(colour = "black")) + theme(panel.border=element_rect(fill=NA, size = 0.25, colour="black")) + theme(axis.ticks = element_line(colour = "black", size = 0.25)) graph2ppt(file="FigS1.pptx", width=12, height=10, vector.graphic=TRUE) # asterisks for significance were added manually