setwd("~/Dropbox/Body size extinction/Phanerozoic extinction size bias/Data and Analyses") ############################################# #### #### #### READ IN DATA #### #### #### ############################################# #read in occurrences and sizes genera.1 <- read.csv(file="~/Dropbox/Body size extinction/Phanerozoic extinction size bias/Data and Analyses/pbdb.cleaned.rev.csv") #read in occurrence dataset size.data <- read.csv(file="~/Dropbox/Body size extinction/Phanerozoic extinction size bias/Data and Analyses/genus.sizes.rev.ranges.csv") #subset size data to just genus and size myvars <- c("genus", "logvol") sd1 <- size.data[,myvars] #delimit the time intervals of interest start.age <- c(485.4,252.17,66) #start time, in millions of years ago, of the study interval end.age <- c(252.17,66,1) #end time, in millions of years ago, of the study interval min.class.size <- 200 #number of genera in a class to include in analysis, based on studied interval nd <- list() for (i in 1:length(start.age)) { genera <- subset(genera.1, max_ma<=start.age[i] & min_ma>=end.age[i] & family!="") x <- table(unlist(genera$genus, genera$family, genera$logvol), genera$max_ma) #table genus occurrences by interval x <- x[, match(colnames(x), rev(colnames(x)))] # reverse the column order so time is going forward x[x > 0] <- 1 # set counts > 1 equal to 1; presence-absence matrix ch <- apply(x, 1, paste, collapse="") # need a column called 'ch' with the sighting history as a string zeoros and ones. sightingData <- data.frame(genus=rownames(x), ch, nSight=rowSums(x), stringsAsFactors=FALSE) sightingData <- sightingData[sightingData$nSight > 0,] # remove any taxon with no occurrences myvars <- c("genus", "class", "phylum", "family", "logvol") #create a list of genera by class class.list <- unique(genera[myvars]) #create a list of genera by class sightingData <- merge(sightingData, class.list, by="genus") #bring class assignments back into sightingData # Make sure to install and load the "dplyr" package before running this code! library(plyr) family.median <- ddply(sightingData, c("family"), function(df)c(median(df$logvol), mean(df$logvol), length(df$logvol))) #find median size by class names(family.median) <- c("family", "median", "mean", "n") #name output dataset family.median <- family.median[family.median$n>1,] #restrict data to classes with enough genera class.count <- ddply(sightingData, c("class"), function(df)c(length(df$logvol))) #find median size by class names(class.count) <- c("class", "N") class.count <- subset(class.count, N>=min.class.size) sightingData <- merge(sightingData, family.median, by="family") #merge median size information back into sightingData sightingData <- merge(sightingData, class.count, by="class") sightingData$small <- 0 #create binary variable for larger or smaller than median sightingData$small[sightingData$logvol>sightingData$median] <- 1 #assign new value to genera bigger than median myvars <- c("ch", "small", "class", "phylum", "family", "mean", "logvol") nd4 <- sightingData[myvars] #subset occs data frame to relevant columns nd4$logvol.rel <- nd4$logvol - nd4$mean nd[[i]] <- nd4 print (i) } save(nd, file="family_data.Rdata")