library("phytools") library("adephylo") library("MCMCglmm") ##Telling the script were to find some resources, and renaming some stuff setwd("~/Documents/Canopies/Canopy_Crane_Specimens/") locations <- c("Panama","Malaysia") folders <- c("Panama2012","Malaysia") names(folders) <- locations results <- data.frame() row_counter <- 1 host_levels <- c("Host_species","Host_genus","Host_family") level_names <- c("Species","Genus","Family") names(level_names) <- host_levels ##For each place, grab the community data, tree, and host plant classification ##Basically everything that follows is inside this big ugly for-loop ##Even a library load. Not exactly handsome coding, Daniel! for(cur_loc in locations){ print(paste('Running analysis on',cur_loc,'...',sep=' ')) #cur_specimens <- read.delim(paste(folders[cur_loc],"/gene_trees/concatenated/",cur_loc,"_specimens_genetic_IDS.txt",sep=""),stringsAsFactors=FALSE) cur_specimens <- read.delim(paste(folders[cur_loc],"/gene_trees/concatenated/s1_v2.csv",sep=""),stringsAsFactors=FALSE,sep=',') ## for this to work, you'd need to have the main community summary files with the same name and in the same directory tree cur_species_tree <- read.tree(paste(folders[cur_loc],"/gene_trees/concatenated/treePL_estimate/",cur_loc,"_diaspidid_species.dated.tre",sep="")) cur_tree_genus_to_family <- read.delim(paste(folders[cur_loc],"/phylo_signal/",cur_loc,"_tree_genus_to_family.txt",sep=""),stringsAsFactors=FALSE) ############## ##This function just removes leading or trailing spaces, dots, quotes and pipes from any species name trim.names <- function (x) gsub("^\\s+|\\s+$|\\.|\\'|\\(|\\)|‘|’", "", x) ##use that function to clean up host names, then remove any host that is Undet or a bamboo, or sampled in 2010, pop the host ##genus name off, and get the host classification info cur_specimens$Host_species <- trim.names(cur_specimens$Host_species) if(length(grep("Undet gen|bamboo",cur_specimens$Host_species))>0){ cur_specimens <- cur_specimens[-grep("Undet gen|bamboo",cur_specimens$Host_species),] } getGenus <- function(species){ return(strsplit(species, " ")[[1]][1]) } cur_specimens$Host_genus <- sapply(cur_specimens$Host_species,getGenus) cur_specimens$Host_family <- cur_tree_genus_to_family$Accepted_family[ match(cur_specimens$Host_genus,cur_tree_genus_to_family$Name_submitted)] if(length(grep("2010",cur_specimens$Year))>0){ cur_specimens <- cur_specimens[-grep("2010",cur_specimens$Year),] } ######## ##OK, now we get a list of all of the trees sampled at each place ##that is, including all of the trees that didn't have any diaspidids ##we also clean up these names and get them classified cur_trees_sampled <- read.delim(paste(folders[cur_loc],"/",folders[cur_loc],"_trees_sampled.txt", sep=""),stringsAsFactors=FALSE) cur_trees_sampled$Host_species <- trim.names(cur_trees_sampled$Species) cur_trees_sampled$Host_genus <- sapply(cur_trees_sampled$Host_species,getGenus) cur_trees_sampled$Host_family <- cur_tree_genus_to_family$Accepted_family[ match(cur_trees_sampled$Host_genus,cur_tree_genus_to_family$Name_submitted)] cur_trees_sampled <- cur_trees_sampled[!is.na(cur_trees_sampled$Host_family),] # all_genera <- unique(c(cur_trees_sampled$Host_genus,cur_specimens$Host_genus)) # write.table(all_genera,file="~/Desktop/genera.txt",quote=FALSE,sep="\n", # row.names=FALSE,col.names = FALSE) ##At this point, for each place we have a diaspidid phylogeny, and four dataframes: ## 1. cur_specimens - this is the main community data ## 2. cur_tree_genus_to_family - classification of each tree genus ## 3. cur_trees_sampled - comprensive cat. of sampled trees with classification ## 4. an empty results thing, with a row counter initialized at 1 ####################################################################################### library("reshape") for(cur_level in host_levels){ print(paste('Running analysis on',cur_level,'...',sep=' ')) results[row_counter,"Location"] <- cur_loc results[row_counter,"Host_level"] <- cur_level ##get bipartite quantitative trophic network as matrix host_table <- as.data.frame.matrix(table(cur_specimens[,cur_level],cur_specimens$Species)) ##Make a qualitative (use non-use) version host_table_binary <- host_table host_table_binary[host_table_binary>1] <- 1 ##of each scale, get diet breadth as host taxon richness scale_specs <- colSums(host_table_binary) ##unroll matrix into three column interaction data frame ##ditch any tree without diaspidids ##this has abundances of each species on each tree melted <- data.frame(table(cur_specimens$Host_specimen,cur_specimens$Species)) melted <- melted[melted$Freq>0,] colnames(melted) <- c("Host_specimen","Species","Abundance") melted$Scale_spec <- scale_specs[as.character(melted$Species)] #plot(jitter(melted$Abundance) ~ jitter(melted$Scale_spec),main=paste(cur_loc,cur_level,sep=": ")) ##FIRST MODEL: scale abundance as a function of diet breadth (Host S) diasp_abundance_model <- glm(Abundance ~ Scale_spec, data = melted, family="poisson") ############################# ##Calculate simpsons index of hosts for each diasp species melted[,cur_level] <- cur_specimens[match(melted$Host_specimen,cur_specimens$Host_specimen),cur_level] Simpsons_D_array <- c() for(cur_diasp_sp in unique(melted$Species)){ cur_sp_data <- melted[melted$Species==cur_diasp_sp,] if(length(cur_sp_data$Host_specimen)>1){ cur_hosts <- unique(cur_sp_data[,cur_level]) Simpsons_D <- 0 num_specimens <- length(cur_sp_data$Host_specimen) for(cur_host in cur_hosts){ Simpsons_D <- Simpsons_D + (sum(cur_sp_data==cur_host)/num_specimens)^2 } Simpsons_D_array <- c(Simpsons_D_array,1/Simpsons_D) names(Simpsons_D_array)[length(Simpsons_D_array)] <- cur_diasp_sp } } ############################# ##relationship between host range and % host individuals colonized trees_sampled_table <- table(cur_trees_sampled[,cur_level]) min_host_abundance <- 3 ##get individual counts for each tree taxon sampled tree_spec_table <- table(cur_trees_sampled[,cur_level]) abundant_hosts <- names(tree_spec_table)[tree_spec_table>=min_host_abundance] abundant_host_data <- melted[melted[,cur_level] %in% abundant_hosts,] ##get on-host occurence for each genetic species abundant_host_data <- as.data.frame.matrix(table(abundant_host_data$Species, abundant_host_data[,cur_level])) abundant_host_data$Species <- row.names(abundant_host_data) abundant_hosts_melted <- melt(abundant_host_data,id.vars="Species") abundant_hosts_melted <- abundant_hosts_melted[abundant_hosts_melted$value>0,] abundant_hosts_melted$Scale_spec <- scale_specs[as.character(abundant_hosts_melted$Species)] abundant_hosts_melted$Num_sampled <- trees_sampled_table[as.character(abundant_hosts_melted$variable)] abundant_hosts_melted$Empty_tree <- abundant_hosts_melted$Num_sampled - abundant_hosts_melted$value ##Now for each species, we get the porpotion of its hosts that it has colonized abundant_hosts_melted$Prop_colonized <- as.vector(abundant_hosts_melted$value / abundant_hosts_melted$Num_sampled) #plot(abundant_hosts_melted$Prop_colonized ~ jitter(abundant_hosts_melted$Scale_spec), # main=paste(cur_loc,cur_level,sep=": ")) ##SECOND MODEL: Proportion of hosts colonized as function of diet breadth (host S) diasp_num_tree_model <- glm(cbind(value,Empty_tree) ~ Scale_spec, data = abundant_hosts_melted, family="binomial") ####################################### ##Calculate values for randomized data ##Significance testing is through permutation tests. rand_abundance_slopes <- c() rand_Ds <- c() rand_tree_slopes <- c() num_reps <- 1000 for(cur_rep in 1:num_reps){ #print(cur_rep) cur_rand_data <- melted cur_rand_data$Abundance <- cur_rand_data$Abundance[order(runif(length(cur_rand_data$Abundance)))] cur_rand_data[,cur_level] <- cur_rand_data[order(runif(length(cur_rand_data[,cur_level]))),cur_level] #calculate abundance vs scale spec slope m_rand <- glm(Abundance ~ Scale_spec, data = cur_rand_data, family="poisson") rand_abundance_slopes <- c(rand_abundance_slopes, m_rand$coefficients["Scale_spec"]) #calculate Simpson's D rand_Simpsons_D_array <- c() for(cur_diasp_sp in unique(cur_rand_data$Species)){ rand_cur_sp_data <- cur_rand_data[cur_rand_data$Species==cur_diasp_sp,] if(length(rand_cur_sp_data$Host_specimen)>1){ cur_hosts <- unique(rand_cur_sp_data[,cur_level]) rand_Simpsons_D <- 0 num_specimens <- length(rand_cur_sp_data$Host_specimen) for(cur_host in cur_hosts){ rand_Simpsons_D <- rand_Simpsons_D + (sum(rand_cur_sp_data==cur_host)/num_specimens)^2 } rand_Simpsons_D_array <- c(rand_Simpsons_D_array,1/rand_Simpsons_D) } } rand_Ds <- c(rand_Ds, mean(rand_Simpsons_D_array)) #Calculate % trees model rand_abundant_hosts_melted <- abundant_hosts_melted rand_abundant_hosts_melted$Scale_spec <- rand_abundant_hosts_melted$Scale_spec[ order(runif(length(rand_abundant_hosts_melted$Scale_spec)))] rand_diasp_num_tree_model <- glm(cbind(value,Num_sampled) ~ Scale_spec, data = rand_abundant_hosts_melted, family="binomial") rand_tree_slopes <- c(rand_tree_slopes, rand_diasp_num_tree_model$coefficients["Scale_spec"]) } Abundance_z_score <- (diasp_abundance_model$coefficients["Scale_spec"] - mean(rand_abundance_slopes) )/ sd(rand_abundance_slopes) Abundance_p_val <- 2*pnorm(-abs(Abundance_z_score)) results[row_counter,"Abundance_slope"] <- diasp_abundance_model$coefficients["Scale_spec"] results[row_counter,"Abundance_rand_mean"] <- mean(rand_abundance_slopes) results[row_counter,"Abundance_Z_score"] <- Abundance_z_score results[row_counter,"Abundance_P_val"] <- Abundance_p_val D_z_score <- (mean(Simpsons_D_array) - mean(rand_Ds) ) / sd(rand_Ds) D_p_val <- 2*pnorm(-abs(D_z_score)) results[row_counter,"Simpsons_D"] <- mean(Simpsons_D_array) results[row_counter,"Simpsons_D_rand_mean"] <- mean(rand_Ds) results[row_counter,"Simpsons_D_Z_score"] <- D_z_score results[row_counter,"Simpsons_D_P_val"] <- D_p_val Num_trees_z_score <- (diasp_num_tree_model$coefficients["Scale_spec"] - mean(rand_tree_slopes) )/ sd(rand_tree_slopes) Num_trees_p_val <- 2*pnorm(-abs(Num_trees_z_score)) results[row_counter,"Num_trees_slope"] <- diasp_num_tree_model$coefficients["Scale_spec"] results[row_counter,"Num_trees_rand_mean"] <- mean(rand_tree_slopes) results[row_counter,"Num_trees_Z_score"] <- Num_trees_z_score results[row_counter,"Num_trees_P_val"] <- Num_trees_p_val #### row_counter <- row_counter + 1 } } output_filename <- "Panama_and_Malaysia_Canopy_Analysis/abundance_tradeoffs/abundance_tradeoffs_results_morph.txt" write.table(results, file=output_filename, sep="\t",quote=FALSE,row.names=FALSE)