setwd("/Users/user-swillis/Desktop/mykiss/pittag/Bonneville_prev_years"); #import the data for interrogation & MMR sites int_sites=read.csv("PTAGIS Interrogation and MMR Sites Reduced.csv",header=T) names(int_sites) int_sites<-int_sites[,c("Interrogation.Site.Code","Interrogation.Site.Name","Interrogation.Site.Type","River.km","Facility.Name","HUC8","HUC8.Name","Latitude","Longitude")] head(int_sites) #import the tag histories pit_pings=read.csv("pits_2013_2018_Complete Tag History.csv",header=T) names(pit_pings) pit_pings<-pit_pings[,c("Tag.Code","Event.Type.Name","Event.Site.Code.Value","Event.Site.Name","Event.Date.MMDDYYYY")] head(pit_pings) pits<-unique(pit_pings$Tag.Code) #check that all ping sites are listed in interrogation sites file #length(levels(pit_pings$Event.Site.Name)) #length(levels(pit_pings$Event.Site.Code.Value)) #sort(unique(paste(pit_pings$Event.Site.Code.Value,pit_pings$Event.Site.Name))) #sum(!levels(pit_pings$Event.Site.Code.Value) %in% levels(int_sites$Interrogation.Site.Code)) #levels(pit_pings$Event.Site.Code.Value)[!levels(pit_pings$Event.Site.Code.Value) %in% levels(int_sites$Interrogation.Site.Code)] pit_pings<-droplevels(pit_pings[!pit_pings$Event.Site.Code.Value=="ORPHAN",]) sum(!levels(pit_pings$Event.Site.Code.Value) %in% levels(int_sites$Interrogation.Site.Code)) library(parsedate) #create column with ordinal day of the year in which event occurred pit_pings$Event.Date.Ordinal.Tmp<-as.numeric(strftime(as.Date(parse_date(pit_pings$Event.Date.MMDDYYYY)), format = "%j")) #create column with year in which event occurred pit_pings$Event.Date.Year<-as.numeric(strftime(as.Date(parse_date(pit_pings$Event.Date.MMDDYYYY)), format = "%Y")) #create column with number of days from Jan 1 of first year in dataset; used for ordering events series that span multiple years #will later be converted to relative ordinal based on year of Bonneville passage baseyear<-min(pit_pings$Event.Date.Year) pit_pings$Event.Date.Ordinal<-((pit_pings$Event.Date.Year-baseyear)*365)+pit_pings$Event.Date.Ordinal.Tmp combo_data<-data.frame(matrix(nrow = 0, ncol = 24)) for(k in 1:length(pits)) { tag<-pits[k] df<-pit_pings[pit_pings$Tag.Code==tag,] #get first date at Bonneville; proxy for freshwater entrance df_marked<-df[df$Event.Site.Name %in% c("BO1 - Bonneville Bradford Is. Ladder", "BO2 - Bonneville Cascades Is. Ladder", "BO3 - Bonneville WA Shore Ladder/AFF", "BO4 - Bonneville WA Ladder Slots", "BONAFF - BON - Adult Fish Facility"),] marking<-head(df_marked[order(df_marked$Event.Date.Ordinal),],n=1) #pings after marking, but no more than 11 months (330 days) after that mark to avoid kelts df_reads<-df[df$Event.Date.Ordinal>=marking$Event.Date.Ordinal&df$Event.Date.Ordinal<(marking$Event.Date.Ordinal+330),] pinged_sites=levels(droplevels(df_reads$Event.Site.Code.Value)) #river river distance for each site pinged sites_dist<-data.frame(matrix(nrow = 0, ncol = 2)) colnames(sites_dist)<-c("code","km") for(i in 1:length(pinged_sites)) { code=pinged_sites[i] RKM=int_sites[int_sites$Interrogation.Site.Code==code,]$River.km if (length(RKM)>0) { sites_dist[i,1]<-code sites_dist[i,2]<-RKM } } #keep most upstream site by river distance upsite<-sites_dist[order(sites_dist$km,decreasing = TRUE),1][1] #get date of first ping at most upstream site upsitereads<-df_reads[df_reads$Event.Site.Code.Value==upsite,] firstupsiteread<-head(upsitereads[order(upsitereads$Event.Date.Ordinal,decreasing = FALSE),c("Tag.Code","Event.Site.Code.Value","Event.Date.MMDDYYYY","Event.Date.Ordinal.Tmp","Event.Date.Year","Event.Date.Ordinal")],1) #extract info about that site from interrogation/MMR sites file upsitemeta<-int_sites[int_sites$Interrogation.Site.Code==upsite,] df2<-cbind.data.frame(tag,marking,firstupsiteread,upsitemeta) combo_data<-rbind.data.frame(combo_data,df2) out<-c("Tagged individual ",k," of ",length(pits)) message(out) } head(combo_data) colnames(combo_data)<-c("TAG.code", "MRK.Tag.Code", "MRK.Event.Type.Name", "MRK.Event.Site.Code.Value", "MRK.Event.Site.Name", "MRK.Event.Date.MMDDYYYY", "MRK.Event.Date.Ordinal.Tmp", "MRK.Event.Date.Year", "MRK.Event.Date.Ordinal", "OBS.Tag.Code", "OBS.Event.Site.Code.Value", "OBS.Event.Date.MMDDYYYY", "OBS.Event.Date.Ordinal.Tmp", "OBS.Event.Date.Year", "OBS.Event.Date.Ordinal", "INT.Interrogation.Site.Code", "INT.Interrogation.Site.Name", "INT.Interrogation.Site.Type", "INT.River.km", "INT.Facility.Name", "INT.HUC8", "INT.HUC8.Name", "INT.Latitude", "INT.Longitude") combo_data$ordinal.marked<-combo_data$MRK.Event.Date.Ordinal-(combo_data$MRK.Event.Date.Year-baseyear)*365 sum(combo_data$MRK.Event.Date.Ordinal.Tmp==combo_data$ordinal.marked) combo_data$ordinal.arrived<-combo_data$OBS.Event.Date.Ordinal-(combo_data$MRK.Event.Date.Year-baseyear)*365 sum(combo_data$MRK.Event.Date.Ordinal.Tmp==combo_data$ordinal.arrived) head(combo_data[combo_data$MRK.Event.Date.Ordinal.Tmp==combo_data$ordinal.arrived,]) write.table(combo_data,file="Omy_pit_pings_Bonneville_2013_2018_marked_arrived-no-kelt.txt",append=FALSE,quote=FALSE,sep="\t",col.names=T,row.names=F) head(combo_data) names(combo_data) unique_upstream_sites2<-unique(combo_data[,c("INT.Interrogation.Site.Code", "INT.Interrogation.Site.Name", "INT.Interrogation.Site.Type", "INT.River.km", "INT.Facility.Name", "INT.HUC8", "INT.HUC8.Name", "INT.Latitude", "INT.Longitude")]) nrow(unique_upstream_sites2) write.table(unique_upstream_sites2,file="Omy_pit_pings_Bonneville_2013_2018_marked_arrived_sites_unfiltered2.txt",append=FALSE,quote=FALSE,sep="\t",col.names=T,row.names=F) #arrival sites (arrays) with filtering status and updated HUC upstream_sites_classed=read.delim("Omy_pit_pings_Bonneville_2013_2018_marked_arrived_sites_classed6.txt") head(upstream_sites_classed) filter<-as.character(upstream_sites_classed[upstream_sites_classed$class=="filter","INT.Interrogation.Site.Code"]) combo_data<-read.delim("Omy_pit_pings_Bonneville_2013_2018_marked_arrived-no-kelt.txt") nrow(combo_data) #associate new subdivided HUC8s to each fish record new_HUC_num<-NULL new_HUC_name<-NULL BPAsubbasin<-NULL for (r in 1:nrow(combo_data)) { code<-droplevels(combo_data$INT.Interrogation.Site.Code[r]) code<-factor(code,levels=levels(upstream_sites_classed$INT.Interrogation.Site.Code)) new_HUC_num<-c(new_HUC_num,as.character(upstream_sites_classed[upstream_sites_classed$INT.Interrogation.Site.Code==code,"INT.HUC8.SCW"])) new_HUC_name<-c(new_HUC_name,as.character(upstream_sites_classed[upstream_sites_classed$INT.Interrogation.Site.Code==code,"INT.HUC8.Name.SCW"])) BPAsubbasin<-c(BPAsubbasin,as.character(upstream_sites_classed[upstream_sites_classed$INT.Interrogation.Site.Code==code,"BPAsubbasin"])) } combo_data$INT.HUC8.SCW<-new_HUC_num combo_data$INT.HUC8.Name.SCW<-new_HUC_name combo_data$BPAsubbasin<-BPAsubbasin #filter out individuals whose last ping is at a main Columbia dam or near a river mouth (to eliminate dip-ins, etc.) made_its<-droplevels(combo_data[!combo_data$INT.Interrogation.Site.Code %in% filter,]) nrow(made_its) write.table(made_its,file="Omy_pit_pings_Bonneville_2013_2018_marked_arrived_MADEITS_11.13.19.txt",append=FALSE,quote=FALSE,sep="\t",col.names=T,row.names=F) ##arrange phenotype file of fork length and ocean ages made_its_pits_IDs_genos<-read.delim("Omy_pit_pings_Bonneville_2013_2018_marked_arrived_MADEITS_filteredID.txt",header=TRUE,sep="\t") nrow(made_its_pits_IDs_genos) names(made_its_pits_IDs_genos) concat_ages<-read.delim("bonn2013_2018_pit_age_fork_ocean.txt",header=TRUE,sep="\t") head(concat_ages) sum(made_its_pits_IDs_genos$Individual.Name %in% concat_ages$Individual) sum(made_its_pits_IDs_genos$PIT.Tag.Number %in% concat_ages$PITTagNumber) fivecrows_ages<-read.delim("STHD_2013_2018_ocean.txt",header=TRUE,sep="\t") head(fivecrows_ages) sum(made_its_pits_IDs_genos$PIT.Tag.Number %in% fivecrows_ages$PitTag_long) names() keep_ages<-fivecrows_ages[fivecrows_ages$PitTag_long %in% made_its_pits_IDs_genos$PIT.Tag.Number,c("PitTag_long","Ocean","Length")] nrow(keep_ages) keep_ages[keep_ages$Length<40,"Length"]<-NA rownames(keep_ages)<-keep_ages$PitTag_long rownames(made_its_pits_IDs_genos)<-made_its_pits_IDs_genos$PIT.Tag.Number made_its_pits_IDs_genos_age_length<-merge(made_its_pits_IDs_genos,keep_ages,by="row.names") nrow(made_its_pits_IDs_genos_age_length) #write.table(made_its_pits_IDs_genos_age_length,"Omy_pit_pings_Bonneville_2013_2018_marked_arrived_MADEITS_filteredID_age_length.txt",col.names=TRUE,row.names=FALSE,quote=FALSE,sep="\t") names(made_its_pits_IDs_genos_age_length) unique(made_its_pits_IDs_genos_age_length$INT.HUC8.Name.SCW) unique(made_its_pits_IDs_genos_age_length$BPAsubbasin) coastal<-c("Hood","Wind") intermediate<-"Klickitat" interior<-as.character(unique(made_its_pits_IDs_genos_age_length$BPAsubbasin)[!unique(made_its_pits_IDs_genos_age_length$BPAsubbasin) %in% c(coastal,intermediate)]) r=1 lineages<-NULL for (r in 1:nrow(made_its_pits_IDs_genos_age_length)) { if(made_its_pits_IDs_genos_age_length$BPAsubbasin[r] %in% interior) { lineages<-c(lineages,"interior") } else if (made_its_pits_IDs_genos_age_length$BPAsubbasin[r] %in% coastal) { lineages<-c(lineages,"coastal") } else if (made_its_pits_IDs_genos_age_length$BPAsubbasin[r] %in% intermediate) { lineages<-c(lineages,"intermediate") } } table(lineages) length(lineages) made_its_pits_IDs_genos_age_length$lineage<-lineages write.table(made_its_pits_IDs_genos_age_length,"Omy_pit_pings_Bonneville_2013_2018_marked_arrived_MADEITS_filteredID_age_length_lineage.txt",col.names=TRUE,row.names=FALSE,quote=FALSE,sep="\t") madeit_data<-read.delim("Omy_pit_pings_Bonneville_2013_2018_marked_arrived_MADEITS_filteredID_age_length_lineage.txt",header=TRUE) #calculate Bonneville passage and Tributary arrival days relative to last or median per HUC, sub-basin (BPA) and year ##HUCxYEAR-wise mark/arrival day new_made_its<-data.frame(matrix(nrow = 0, ncol = (ncol(madeit_data)+4))) #h=1 #y=1 HUC8s<-unique(madeit_data$INT.HUC8.Name.SCW) years<-sort(unique(madeit_data$OBS.Event.Date.Year)) for(h in 1:length(HUC8s)) { for (y in 1:length(years)) { df_sub<-madeit_data[madeit_data$INT.HUC8.Name.SCW==as.character(HUC8s[h])&madeit_data$MRK.Event.Date.Year==(years[y]),] if (nrow(df_sub)>0) { last.passage.day.by.year<-max(df_sub$ordinal.marked)+1 median.passage.day.by.year<-median(df_sub$ordinal.marked) df_sub$early_passage_HUC8_year<-last.passage.day.by.year-df_sub$ordinal.marked df_sub$mid_passage_HUC8_year<-df_sub$ordinal.marked-median.passage.day.by.year last.arrival.day.by.year<-max(df_sub$ordinal.arrived)+1 median.arrival.day.by.year<-median(df_sub$ordinal.arrived) df_sub$early_arrival_HUC8_year<-last.arrival.day.by.year-df_sub$ordinal.arrived df_sub$mid_arrival_HUC8_year<-df_sub$ordinal.arrived-median.arrival.day.by.year #ncol(df_sub) new_made_its<-rbind.data.frame(new_made_its,df_sub) } } } head(new_made_its) #write.table(new_made_its,file="test.txt",append=FALSE,quote=FALSE,sep="\t",col.names=T,row.names=F) madeit_data<-new_made_its ##HUC-wise mark/arrival day new_made_its<-data.frame(matrix(nrow = 0, ncol = (ncol(madeit_data)+4))) #h=1 HUC8s<-unique(madeit_data$INT.HUC8.Name.SCW) for(h in 1:length(HUC8s)) { df_sub<-madeit_data[madeit_data$INT.HUC8.Name.SCW==as.character(HUC8s[h]),] if (nrow(df_sub)>0) { last.passage.day<-max(df_sub$ordinal.marked)+1 median.passage.day<-median(df_sub$ordinal.marked) df_sub$early_passage_HUC8<-last.passage.day-df_sub$ordinal.marked df_sub$mid_passage_HUC8<-df_sub$ordinal.marked-median.passage.day last.arrival.day<-max(df_sub$ordinal.arrived)+1 median.arrival.day<-median(df_sub$ordinal.arrived) df_sub$early_arrival_HUC8<-last.arrival.day-df_sub$ordinal.arrived df_sub$mid_arrival_HUC8<-df_sub$ordinal.arrived-median.arrival.day #ncol(df_sub) new_made_its<-rbind.data.frame(new_made_its,df_sub) } } head(new_made_its) write.table(new_made_its,file="test.txt",append=FALSE,quote=FALSE,sep="\t",col.names=T,row.names=F) madeit_data<-new_made_its ##BPAxYEAR-wise mark/arrival day new_made_its<-data.frame(matrix(nrow = 0, ncol = (ncol(madeit_data)+4))) h=2 y=1 BPAs<-unique(madeit_data$BPAsubbasin) years<-sort(unique(madeit_data$OBS.Event.Date.Year)) for(h in 1:length(BPAs)) { for (y in 1:length(years)) { df_sub<-madeit_data[madeit_data$BPAsubbasin==as.character(BPAs[h])&madeit_data$MRK.Event.Date.Year==(years[y]),] if (nrow(df_sub)>0) { last.passage.day.by.year<-max(df_sub$ordinal.marked)+1 median.passage.day.by.year<-median(df_sub$ordinal.marked) df_sub$early_passage_BPA_year<-last.passage.day.by.year-df_sub$ordinal.marked df_sub$mid_passage_BPA_year<-df_sub$ordinal.marked-median.passage.day.by.year last.arrival.day.by.year<-max(df_sub$ordinal.arrived)+1 median.arrival.day.by.year<-median(df_sub$ordinal.arrived) df_sub$early_arrival_BPA_year<-last.arrival.day.by.year-df_sub$ordinal.arrived df_sub$mid_arrival_BPA_year<-df_sub$ordinal.arrived-median.arrival.day.by.year #ncol(df_sub) new_made_its<-rbind.data.frame(new_made_its,df_sub) } } } head(new_made_its) write.table(new_made_its,file="test.txt",append=FALSE,quote=FALSE,sep="\t",col.names=T,row.names=F) madeit_data<-new_made_its ##BPA-wise mark/arrival day new_made_its<-data.frame(matrix(nrow = 0, ncol = (ncol(madeit_data)+4))) #h=2 BPAs<-unique(madeit_data$BPAsubbasin) for(h in 1:length(BPAs)) { df_sub<-madeit_data[madeit_data$BPAsubbasin==as.character(BPAs[h]),] if (nrow(df_sub)>0) { last.passage.day<-max(df_sub$ordinal.marked)+1 median.passage.day<-median(df_sub$ordinal.marked) df_sub$early_passage_BPA<-last.passage.day-df_sub$ordinal.marked df_sub$mid_passage_BPA<-df_sub$ordinal.marked-median.passage.day last.arrival.day<-max(df_sub$ordinal.arrived)+1 median.arrival.day<-median(df_sub$ordinal.arrived) df_sub$early_arrival_BPA<-last.arrival.day-df_sub$ordinal.arrived df_sub$mid_arrival_BPA<-df_sub$ordinal.arrived-median.arrival.day #ncol(df_sub) new_made_its<-rbind.data.frame(new_made_its,df_sub) } } head(new_made_its) names(new_made_its) new_made_its$lag.time<-new_made_its$ordinal.arrived-new_made_its$ordinal.marked #Bonneville is at River Km 234 new_made_its$eff.speed<-new_made_its$INT.River.km-234/new_made_its$lag.time write.table(new_made_its,file="Omy_pit_pings_Bonneville_2013_2018_marked_arrived_MADEITS_filteredID_age_length_lineage_HUC-BPA-YEAR-LAG-SPEED.txt",append=FALSE,quote=FALSE,sep="\t",col.names=T,row.names=F) ###comparing scale ages with PBT ages, calculate total age with river age made_its<-read.delim("Omy_pit_pings_Bonneville_2013_2018_marked_arrived_MADEITS_filteredID_age_length_lineage_HUC-BPA-YEAR-LAG-SPEED.txt",header=TRUE) river_ages<-read.delim("STHD_2013_2018_ocean_river.txt",header=TRUE) nrow(river_ages) names(river_ages) have_ages<-river_ages[!is.na(river_ages$Total),] nrow(have_ages) sum(river_ages$PitTag_long %in% made_its$PIT.Tag.Number) made_its_ages<-river_ages[river_ages$PitTag_long %in% made_its$PIT.Tag.Number,] nrow(made_its_ages) rownames(made_its_ages)<-made_its_ages$PitTag_long rownames(made_its)<-made_its$PIT.Tag.Number made_its_ages<-merge(made_its,made_its_ages,by="row.names") names(made_its_ages) made_its_ages<-made_its_ages[,!colnames(made_its_ages) %in% c("Row.names","Row.names.y")] head(made_its_ages) sum(have_ages$PitTag_long %in% made_its$PIT.Tag.Number) made_its_ages<-have_ages[have_ages$PitTag_long %in% made_its$PIT.Tag.Number,] rownames(made_its_ages)<-made_its_ages$PitTag_long made_its_ages2<-made_its[made_its$PIT.Tag.Number %in% have_ages$PitTag_long,] rownames(made_its_ages2)<-made_its_ages2$PIT.Tag.Number made_its_ages<-merge(made_its_ages,made_its_ages2,by="row.names") made_its_ages<-made_its_ages[,!colnames(made_its_ages) %in% c("Row.names","Row.names.x","Row.names.y")] head(made_its_ages) write.table(made_its_ages,file="Omy_pit_pings_Bonneville_2013_2018_marked_arrived_MADEITS_filteredID_length_lineage_HUC-BPA-YEAR-LAG-SPEED_ocean_total.txt",append=FALSE,quote=FALSE,sep="\t",col.names=T,row.names=F) #PBT assignments and ages PBT_ass_ages<-read.delim("OmyBonn_2009-2019_11-1-19Assignments_PBT_only_age_only.txt",header=TRUE) names(PBT_ass_ages) nrow(PBT_ass_ages) PBT_ass_ages$Total.age<-as.numeric(PBT_ass_ages$Date..Year.only)-as.numeric(PBT_ass_ages$SpawnYearYYYY) rownames(PBT_ass_ages)<-PBT_ass_ages$Individual.Name rownames(made_its_ages)<-made_its_ages$Individual.Name all_ages<-merge(PBT_ass_ages,made_its_ages,by="row.names") names(all_ages) nrow(all_ages) head(all_ages$Total.age) head(all_ages$Total) CORR<-cor.test(all_ages$Total.age,all_ages$Total,method="pearson") text<-bquote(R^2 == .(round(CORR$estimate, 2))) jitter=0.3 ggplot(data = all_ages, aes(x = Total.age, y = Total)) + geom_jitter(width = jitter, height = jitter,size=2)+labs(x= "PBT Age", y = "Scale Age")+ annotate("text", x = 2, y = 4.25, label = text,size=6) + scale_x_discrete(limits=c(2,3,4))+scale_y_discrete(limits=c(2,3,4))+theme_classic()+ labs(x= "Parentage Age (yrs)", y = "Scale Age (yrs)") + theme(axis.text=element_text(size=16), axis.title=element_text(size=16,face="bold"), axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)), axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0)))