#set the path to your working directory, where all .csv files must be placed setwd("~/SWITCHdrive/3_Projects/Anschubfinanzierung/Data analysis/Data for dryad") #collect all data, add some variables, and save them as dataframe "all.data" files <- data.frame( run = c("Run140514","Run140708","Run140722","Run140814","Run140902","Run140619","Run150624","Run150817","Run150909"), type = c("std.1", "std.2", "std.3", "std.4", "aq.fert", "aq.no.n", "aq.no.me", "aq.only.1","aq.only.2"), use.for.growth = c(F,F,F,T,T,T,F,F,F), use.for.evap = c(T,T,T,F,T,T,F,F,F)) all.data <- data.frame() # Factors par2watt <- 1/4.94 # PPFD to Watts per m2. Use conversion factor as given by Doucha and Livansky (2013) n.cont <- 0.08493 # N content of microalgae biomass (w/w) (Doucha and Livansky, 2006). p.cont <- 0.00899 # P content of microalgae biomass (w/w) (Doucha and Livansky, 2006). energy.in.biomass <- 23 # Energy content per g biomass in kJ kWh2kJ <- 3600 # kWh to kJ pe.assumed <- 0.05 # Energy in PAR converted into energy in Biomass par(mfrow = c(3,3), mar = c(4,4,1,1)) for(i in 1:nrow(files)){ run <- files$run[i] type <- files$type[i] cat(paste("\nProcesssing now:",run)) raw.data <- read.csv(paste(run,".csv", sep = ""), sep = ",", dec = ".", header = T) raw.data$time.string <- as.POSIXct(raw.data$time.string) if("dryweight.not.washed" %in% colnames(raw.data)){ names(raw.data)[names(raw.data)=="dryweight.not.washed"] <- "dryweight" } #Remove data that are before or after runs (e.g. if inoculum size was so small that dryweight measurements were not possible for a few days) if(run == "Run140708"){ cat(paste("\n\tData before or after run are truncated.")) index <- which(raw.data$time.string == "2014-07-12 08:30:00") raw.data <- raw.data[index:nrow(raw.data),] } #Now, the beginning of raw.data is the beginning of the experiment plot(raw.data$time.string, raw.data$TURBIDITY, ylim = c(0,35), xlab="", ylab="Turbidity (g solids/l)", type = "l", main = paste(run, type, sep = ","), lwd = 2) #Create new variable "days", which is time as days since midnight before the start of the experiment first.time <- as.POSIXct(raw.data$date[1]) raw.data$days <- as.numeric(difftime(raw.data$time.string, first.time, units = "days")) cat(paste("\n\tDuration of run",files$run[i],"is",round(raw.data$days[nrow(raw.data)], digits = 1),"days.")) #Create new variable "water" which is the cumulative amount of water added since the beginning of the experiment raw.data$water <- raw.data$SUM.OF.WATER-raw.data$SUM.OF.WATER[1] #Create new variable "delta.water" which is the amount of water added per 10-min time step raw.data$delta.water <- c(0,diff(raw.data$water, lag = 1)) #remove water data where the water meter was broken or where readings were erroneous. Replace water data in Run140902 with manually acquired data if(run == "Run140514"){ start <- which(raw.data$time.string == "2014-05-21 13:40:00") end <- which(raw.data$time.string == "2014-05-28 08:00:00") raw.data$water[start:end] <- NA index <- which(raw.data$time.string == "2014-05-28 06:00:00") raw.data$dryweight[index] <- NA raw.data <- raw.data[c(1:1980,1990:nrow(raw.data)),] } if(run == "Run140619"){ raw.data <- raw.data[1:(nrow(raw.data)-1),] } if(run == "Run140814"){ raw.data$water <- NA raw.data <- raw.data[1:(nrow(raw.data)-1),] #remove erroneous turbidity measurement index <- which(raw.data$time.string == "2014-08-26 07:40:00") raw.data$TURBIDITY[index] <- NA } if(run == "Run140722"){ #at July 29 the water meter stopped working index <- which(raw.data$time.string == "2014-07-29 00:00:00") raw.data$water[index:nrow(raw.data)] <- NA #dry weight is too low index <- which(raw.data$time.string == "2014-08-02 09:00:00") raw.data$dryweight[index] <- NA } if(run == "Run140902"){ #two times the numbers on the replacement water meter were not correctly read raw.data$water.m3[which(raw.data$time.string == "2014-10-04 09:30:00")] <- 56.1188 raw.data$water.m3[which(raw.data$time.string == "2014-10-05 11:00:00")] <- 56.1759 #water meter is broken. Replace values with those that have been collected with the replacement meter. raw.data$water <- NA raw.data$water <- (raw.data$water.m3 - raw.data$water.m3[1])*1000 #after October 4th, dryweight reached a plateau. index <- which(raw.data$time.string == "2014-10-04 08:30:00") raw.data$dryweight[(index+1):nrow(raw.data)] <- NA } if(run == "Run150909"){ #remove erroneous turbidity measurement index <- which(raw.data$time.string == "2015-09-17 17:40:00") raw.data$TURBIDITY[index] <- NA } lines(raw.data$time.string, raw.data$TURBIDITY, col = "red", lwd = 2) points(raw.data$time.string, raw.data$dryweight, col = "red") # calculate for every day (midnight to midnight) some variables. Store them at the end of the day at 00:00 # amount of water evaporated raw.data$evap.24h <- NA # PAR absorbed raw.data$par.abs <- raw.data$PAR.2 - raw.data$PAR.1 raw.data$par.W.m2 <- raw.data$par.abs * par2watt raw.data$par.24h <- NA raw.data$par.kWh.m2.24h <- NA # average temperature raw.data$temp.24h <- NA midnight <- which(raw.data$days%%1 == 0) if(length(midnight) >= 2){ for(j in 2:length(midnight)){ now <- midnight[j] before <- midnight[j-1] # only use 24-hr intervals if(raw.data$days[now] - raw.data$days[before] - 1 == 0){ raw.data$evap.24h[now] <- raw.data$water[now] - raw.data$water[before] # PAR is given in 10-min averages raw.data$par.24h[now] <- sum(raw.data$par.abs[(before+1):now]) * 600 raw.data$par.kWh.m2.24h[now] <- sum(raw.data$par.W.m2[(before+1):now] / 6) / 1000 raw.data$temp.24h[now] <- mean(raw.data$TEMPERATURE[(before+1):now]) } } } # calculate daily increase in biomass (not exactly 24h, depends on sampling times) # and calculate par absorbed/water evaporated/average temperature in the same intervals. Only accept intervals < 1.5 days. raw.data$n.conc.inflow <- NA raw.data$n.conc.in.pbr <- NA raw.data$dw.inc.pred <- NA raw.data$dw.pred <- NA raw.data$delta.dw <- NA raw.data$par.at.dw <- NA raw.data$par.kWh.m2.at.dw <- NA raw.data$evap.at.dw <- NA raw.data$temp.at.dw <- NA raw.data$day.temp.at.dw <- NA raw.data$pe <- NA raw.data$bioenergy.acquired <- NA raw.data$time.dw <- NA indices <- which(!is.na(raw.data$dryweight)) for(j in 2:length(indices)){ now <- indices[j] before <- indices[j-1] if(raw.data$days[now] - raw.data$days[before] < 1.5){ raw.data$delta.dw[now] <- raw.data$dryweight[now] - raw.data$dryweight[before] raw.data$time.dw[now] <- raw.data$days[now] - raw.data$days[before] raw.data$bioenergy.acquired[now] <- raw.data$delta.dw[now] * energy.in.biomass raw.data$evap.at.dw[now] <- raw.data$water[now] - raw.data$water[before] raw.data$par.at.dw[now] <- sum(raw.data$par.abs[(before+1):now]) * 600 raw.data$par.kWh.m2.at.dw[now] <- sum(raw.data$par.W.m2[(before+1):now] / 6) / 1000 raw.data$temp.at.dw[now] <- mean(raw.data$TEMPERATURE[(before+1):now]) temp <- raw.data[(before+1):now,] raw.data$day.temp.at.dw[now] <- mean(temp$TEMPERATURE[which(temp$daynight == 1)]) raw.data$pe[now] <- (raw.data$delta.dw[now] * energy.in.biomass) / (raw.data$par.kWh.m2.at.dw[now] * 18 / 200 * kWh2kJ) # remove erroneous data if(!is.na(raw.data$par.at.dw[now])){ if(raw.data$par.at.dw[now] < 1){ raw.data$par.at.dw[now] <- NA raw.data$par.kWh.m2.at.dw[now] <- NA } } } } if(run == "Run140619"){ # Calculate how much nitrogen is in the water that is added to the reactor (g/l) indices <- c(which(raw.data$water.barrel.refilled == "yes"), nrow(raw.data)) for(j in 1:(length(indices)-1)){ raw.data$nh4.n[indices[j]] <- ifelse(is.na(raw.data$nh4.n[indices[j]]),0,raw.data$nh4.n[indices[j]]) raw.data$n.conc.inflow[indices[j]:(indices[j+1]-1)] <- (raw.data$no3.n[indices[j]] + raw.data$nh4.n[indices[j]]) } raw.data$n.conc.inflow[nrow(raw.data)] <- (raw.data$no3.n[nrow(raw.data)] + raw.data$nh4.n[nrow(raw.data)]) # g/l instead of mg/l raw.data$n.conc.inflow <- raw.data$n.conc.inflow / 1000 } if(run %in% c("Run150624","Run150817","Run150909")){ # Measurement with Hach-Lange are given in NO3-N (or the like), measurements with the IC are give as NO3 (or the like) indices <- which(!is.na(raw.data$nr.ic)) if(indices[length(indices)] < nrow(raw.data)){ indices <- c(indices, nrow(raw.data)) } for(j in 1:(length(indices)-1)){ index <- indices[j] n.inflow <- raw.data$ic.no3[index]/62*14 + raw.data$ic.nh4[index]/18*14 # If measurements have also been made with Hach-Lange, then take the average of both if(!is.na(raw.data$hach.no3[index])){ n.hach <- raw.data$hach.no3[index] + raw.data$hach.no2[index] + raw.data$hach.nh4[index] n.inflow <- (n.inflow + n.hach) / 2 } raw.data$n.conc.inflow[index:(indices[j+1]-1)] <- n.inflow } # g/l instead of mg/l raw.data$n.conc.inflow <- raw.data$n.conc.inflow / 1000 } # Consider only those runs, where no fertilizer was added if(run %in% c("Run140619","Run150624","Run150817","Run150909")){ # When filling the reactor, the concentration in the reactor is the same as in the water raw.data$n.conc.in.pbr[1] <- raw.data$n.conc.inflow[1] # Given PAR, how much biomass can grow in a 10-min period? (g/l) raw.data$dw.from.par <- raw.data$par.W.m2 / 1000 / 6 * kWh2kJ / 23 / 200 # How much nitrogen (g/l) is required for this? raw.data$n.req.from.par <- raw.data$dw.from.par * n.cont # How much phosphorous (g/l) is required for this? raw.data$p.req.from.par <- raw.data$dw.from.par * p.cont # Check for every step whether growth is limited by light or by nitrogen and take the lower value (g/l) for(j in 1:nrow(raw.data)){ # How much biomass actually grows in a 10-min period (g/l)? PAR tells you what is possible, but it may be limited by N. raw.data$dw.inc.pred[j] <- ifelse(raw.data$n.req.from.par[j] < raw.data$n.conc.in.pbr[j], raw.data$dw.from.par[j], raw.data$n.conc.in.pbr[j] / n.cont) # How much N does this require (g/l)? n.used <- raw.data$dw.inc.pred[j] * n.cont # How much N has been delivered from the external source (g/l)? n.gained <- raw.data$delta.water[j] * raw.data$n.conc.inflow[j] / 200 # How much N (g/l) is left in the water after growth? n.left <- raw.data$n.conc.in.pbr[j] - n.used + n.gained if(j < nrow(raw.data)){ raw.data$n.conc.in.pbr[j+1] <- ifelse(n.left >= 0, n.left, 0) } } # Calculate growth and take into account that algae were added in the beginning raw.data$dw.pred <- cumsum(raw.data$dw.inc.pred) + 1.12 } all.data <- rbind(all.data,data.frame( time = raw.data$time.string, # UTC days = raw.data$days, # days since midnight before start ph = raw.data$pH, # average pH (every 10 min) temp = raw.data$TEMPERATURE, # average temperature in °C (every 10 min) dw = raw.data$dryweight, # dry weight (every morning) par = raw.data$par.abs, # average PAR absorbed per m2 and s (every 10 min) par.w.m2 = raw.data$par.abs * par2watt, # same as above but W/m2 water = raw.data$water, # water added since start par.24h = raw.data$par.24h, # PAR absorbed per day (midnight to midnight) (in umol photons) par.kWh.m2.24h = raw.data$par.kWh.m2.24h, # PAR absorbed per day (midnight to midnight) (in kWh per m2) evap.24h = raw.data$evap.24h, # water evaporated per day (midnight to midnight) temp.24h = raw.data$temp.24h, # average culture temp (midnight to midnight) delta.dw = raw.data$delta.dw, # increase in dry weight between measurements time.dw = raw.data$time.dw, # time elapsed since last dry weight measurement (in days) evap.at.dw = raw.data$evap.at.dw, # water evaporated between dw measurements par.at.dw = raw.data$par.at.dw, # PAR absorbed between dw measurements per m2 (in umol photons) par.kWh.m2.at.dw = raw.data$par.kWh.m2.at.dw, # PAR absorbed between dw measurements per m2 (in Wh) temp.at.dw = raw.data$temp.at.dw, # Average temperature between dw measurements (deg C) day.temp.at.dw = raw.data$day.temp.at.dw, # Average daytime temperature between dw measurements pe = raw.data$pe, # Photosynthetic efficiency calculated from dw measurements n.inflow = raw.data$n.conc.inflow, # concentration of N in the supplied water n.pbr = raw.data$n.conc.in.pbr, # concentration of N in the pbr dw.inc.pred = raw.data$dw.inc.pred, # predicted biomass increase in 10-min period dw.pred = raw.data$dw.pred, # predicted total biomass as predicted from available N and PAR turb = raw.data$TURBIDITY, # as measured by the sensor run = files$run[i], # ID of the run type = files$type[i])) # type of batch } # Add sunrise/sunset data sun <- read.csv(file = "sunrise2014.csv", header = T, sep = ",", stringsAsFactors = F) sun$date <- as.Date(sun$date) sun <- rbind(sun, sun) sun$date[366:730] <- sun$date[366:730] + 365 for(i in 1:nrow(sun)){ today <- as.POSIXct(paste(sun$date[i],"00:00:00"), tz = "UTC") today <- today + (3600 * c(sun$sunrise[i], sun$sunset[i])) sun$sunrise[i] <- as.POSIXct(today[1]) sun$sunset[i] <- today[2] } sun$sunrise <- as.POSIXct(sun$sunrise, origin = "1970-01-01 00:00:00 UTC") sun$sunset <- as.POSIXct(sun$sunset, origin = "1970-01-01 00:00:00 UTC") all.data$date <- format(all.data$time, format = "%Y-%m-%d", tz = "UTC") all.data$day1night0 <- 0 for(i in 1:nrow(all.data)){ date <- all.data$date[i] sunrise <- sun$sunrise[which(sun$date == date)] sunset <- sun$sunset[which(sun$date == date)] all.data$day1night0[i] <- ifelse((all.data$time[i] > sunrise) & (all.data$time[i] < sunset), 1, all.data$day1night0[i]) } save(all.data, file = "all.data.RData") rm(raw.data,sun,date,end,index,midnight,n.cont,n.hach,n.inflow,p.cont,start,sunrise,sunset,today,type,n.gained,before,first.time,i,indices,j,n.left,n.used,now,run,files) rm(all.data)