##################### # Analysing model # performance # from several # prediction models ##################### rm(list=ls()) # Preparations library(tidyverse) library(ggpubr) library(gtable) library(grid) library(gridExtra) options(scipen=999) #Reading in data: #workdir <- "C:/Users/trosim/Documents/PhD/Subproject_3/R_Subpro_3/Final_submission/Dryad/" #setwd(workdir) setwd("~/PhD/Subproject_3/Final_submission/Dryad") models <- read.table(file = "Evaluation_values_labels.csv", sep = ";", header=T, dec=",") # Checking structure head(models) tail(models) str(models) summary(models) #Colnames colnames(models) <- c("Type", "Predset", "AUC", "TSS", "Evar", "Dvar", "Label") Predset2 <- models$Predset Predset2 <- as.character(Predset2) Predset2[Predset2 == "Neutral"] <- "Neutral ." Predset2 <- as.factor(Predset2) models <- cbind(models,Predset2) # Summary AUC-values # creating a new column with model performance rank within each ecosystem type auc_models <- models %>% group_by(Type)%>% mutate(auc_rank = order(order(AUC, decreasing=TRUE))) # Create a summary table with key properties of the models auc_model_sums <- auc_models %>% group_by(Predset)%>% summarise(n = n(), min_auc = round(min(AUC),digits=3), max_auc = round(max(AUC), digits=3), mean_auc = round(mean(AUC),digits=3), mean_Evar = round(mean(Evar), digits=1), mean_Dvar = round(mean(Dvar),digits=1), mean_auc_rank = round(mean(auc_rank), digits=1), rank_sum = sum(auc_rank)) # renaming variables auc_model_sums <- auc_model_sums [c("Predset", "n", "mean_auc", "max_auc", "min_auc", "rank_sum", "mean_Evar", "mean_Dvar")] # sorting models in the summary table by model performance measured in AUC-values auc_model_sums <- arrange(auc_model_sums, rank_sum) print(auc_model_sums) hist(models$AUC) hist(models$TSS) plot(models) plot(models$Predset, models$AUC) # Same procedure for model performance measured in TSS-values: # Summary TSS-values tss_models <- models %>% group_by(Type)%>% mutate(tss_rank = order(order(TSS, decreasing=TRUE))) # Create a summary table with key properties of the models tss_model_sums <- tss_models %>% group_by(Predset)%>% summarise(n = n(), min_tss = round(min(TSS),digits=3), max_tss = round(max(TSS), digits=3), mean_tss = round(mean(TSS),digits=3), mean_Evar = round(mean(Evar), digits=1), mean_Dvar = round(mean(Dvar),digits=1), mean_tss_rank = round(mean(tss_rank), digits=1), rank_sum = sum(tss_rank)) # renaming variables tss_model_sums <- tss_model_sums [c("Predset", "n", "mean_tss", "max_tss", "min_tss", "rank_sum", "mean_Evar", "mean_Dvar")] # sorting models in the summary table by model performance measured in TSS-values tss_model_sums <- arrange(tss_model_sums, rank_sum) # Summary for each ecosystem type type_sums <- models %>% group_by(Type)%>% summarise(n = n(), max_auc = max(AUC), max_tss = max(TSS)) type_sums <- type_sums %>% mutate(AUC_rank = order(order(max_auc, decreasing=TRUE)))%>% mutate(TSS_rank = order(order(max_tss, decreasing=TRUE))) type_sums <- arrange(type_sums, AUC_rank) mfrow= c(1,1) ##################### # Data visualisation # Creating publication ready plots # comparing variables and models ##################### # Colour settings # Colours for plotting: Colour palette cbPalette <- c("#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99", "#e31a1c", "#fdbf6f", "#ff7f00", "#cab2d6", "#6a3d9a", "#ffff99", "#b15928") #Two colours: # F = "#D55E00", M = "#0072B2" #Three colours values = c("#0072B2", "#E69F00", "#009E73") values = c("#0072B2", "#E69F00", "red") # Creating box plot with summaries of model performance ##################### # Comparing models # based on performance ##################### # Comparing models, grouping by predictor set, performance = AUC # reordering models by mean performance models$Predset <- factor(models$Predset, levels = c("Basic + CLG", "All*", "Basic + LA", "Basic", "CLG", "Basic + Neutral", "Clim", "LA", "Neutral"),ordered = TRUE) levels(models$Predset) means <- aggregate (AUC ~ Predset, models, mean) means$AUC <- round(means$AUC, digits=3) p1 <- ggplot(models)+ geom_boxplot(outlier.shape = NA, mapping=aes(x= Predset, y = AUC))+ geom_jitter(mapping=aes(x= Predset, y = AUC, colour= Label), position=position_jitter(width = 0.15, height = 0))+ scale_color_manual(values=cbPalette)+ ylim(0.5,1)+ theme_classic ()+ ggpubr::rotate_x_text()+ #geom_text(data = means, aes(x=Predset, label = "Mean", y = 0.98), size = 3)+ geom_text(data = means, aes(x=Predset, label = AUC, y = 0.99), size = 3)+ #geom_text(data = evar, aes(x=Predset, label = DVs, y = 0.945 ), size=3)+ labs(colour = "Ecosystem\ type")+ labs(title = " (a)")+ labs(x = "Predictor set")+ theme(axis.title.x = element_text(vjust=-20))+ theme(plot.margin = unit(c(3,3,3,3), "lines"))+ theme(legend.text=element_text(size=8)) p1 # reordering models by mean performance #levels(models2$Predset) models$Type <- factor(models$Type, levels = c("T22", "T34", "T32", "T1", "T27", "V2", "T14", "V3", "V1"),ordered = TRUE) levels(models$Type) means <- aggregate (AUC ~ Type, models, mean) means$AUC <- round(means$AUC, digits=3) # Comparing models, grouping by ecosystem type, performance = AUC p2 <- ggplot(models)+ geom_boxplot(outlier.shape = NA, mapping=aes(x= Type, y = AUC))+ geom_jitter(mapping=aes(x= Type, y = AUC, colour= Predset2), position=position_jitter(0.2))+ scale_color_manual(values=cbPalette)+ ylim(0.5,1)+ theme_classic ()+ ggpubr::rotate_x_text()+ #geom_text(data = means, aes(x=Predset, label = "Mean", y = 0.98), size = 3)+ geom_text(data = means, aes(x=Type, label = AUC, y = 0.99), size = 3)+ #geom_text(data = evar, aes(x=Predset, label = DVs, y = 0.945 ), size=3)+ labs(colour = "Predictor\ set")+ labs(title = " (b)")+ labs(x = "Ecosystem type")+ theme(axis.title.x = element_text(vjust=-2))+ theme(plot.margin = unit(c(3,3,6,3), "lines"))+ theme(legend.text=element_text(size=8)) p2 p1_2 <- p1 + theme(plot.margin = unit(c(0,3,4,0), "lines")) p2_2 <- p2 + theme(plot.margin = unit(c(0,3,4,0), "lines")) #p2_2 #p1 #p2 #ggarrange(p1,p2, # labels = c("(a)", "(b)"), # ncol = 1, nrow = 2) # Get the gtables gA <- ggplotGrob(p1_2) gB <- ggplotGrob(p2_2) gA # Set the widths and the heights gA$widths <- gB$widths gA$heights <- gB$heights # Arrange the two charts. # The legend boxes are centered grid.newpage() grid.arrange(gA, gB, nrow = 2) # Plot publication quality -------------------------------------------------------------------- ## Setting up plot for pdf pdf('Figure_3_box_plots.pdf', width = 8.3, height = 11.7) ## Device with dimensions of A4 paper grid.arrange(gA, gB, nrow = 2, vp=viewport(width=0.9, height=0.7)) dev.off()