rqtl<-list() rqtl$plotGenoProb <- function(idname, gpcross, chr){ # Plots the three genotype probabilities for a given F2 individual from an "F2" design # idname is the name of the F2 individual (not a column number) # gpcross contains the genotype probabilities # chr must be identical in name to that in the linkage map used # test # idname <- "F2_P5_1"; chr <- 4 idname <- as.character(idname) # Grab the genotype probabilities for the whole chromosome (includes pseudomarkers) # Each marker has 3 rows, one for each of the three genotypes genoprob <- pull.genoprob(gpcross, chr, include.pos.info = TRUE, rotate = TRUE) genoprob.id <- data.frame(genoprob[, c(1:4)], prob = genoprob[, idname], stringsAsFactors = FALSE) plot(prob ~ pos, data = genoprob.id[genoprob.id$gen == "AA", ], type = "l", ylim=c(0,1), col = "red", main = paste(idname[1], "chr", chr[1])) lines(prob ~ pos, data = genoprob.id[genoprob.id$gen == "BB", ]) lines(prob ~ pos, data = genoprob.id[genoprob.id$gen == "AB", ], lty = 2) } rqtl$tabulatePeakmarkerFreqPerPond <- function(gpcross, chr = 4, pos = 43.50706, vicinity = 5, f2info = f2info){ # Extracts the genotype probabilities of all TRUE markers within "vicinity" cM of pos, # uses them to obtain the highest probability genotype at each of the selected markers, # and then the "consensus" genotype for every F2 for the whole interval (the most frequent genotype), # and finally tabulates the genotype frequencies for all the F2's in the vicinity. # gpcross contains the genotype probabilities # make vicinity small if want to include only the single genotype at peak marker # f2info is obtained by reading the file "f2info.csv" # gpcross <- pondsAll.gp; chr = 4; pos = 43.50706 # gpcross <- pondsAll.gp; chr = 9; pos = 16.00 # Grab the genotype probabilities for the whole chromosome (includes pseudomarkers) # Each marker has 3 rows, one for each of the three genotypes at a marker genoprob <- pull.genoprob(gpcross, chr, include.pos.info=TRUE, rotate=TRUE) # Obtain all the marker and pseudomarker names on the whole chromosome # (Each marker name is repeated 3 times) genoprob.marker <- genoprob$marker # Grab the list of markers in the vicinity of pos. # Can't use pull.map, because it doesn't include pseudomarkers. Must get from gpcross$geno$prob # geno <- gpcross$geno[ as.character(chr) ][[ as.character(chr) ]]$prob # geno.map <- attr(geno, "map") # peakmarkers <- geno.map[geno.map > pos - vicinity & geno.map < pos + vicinity] # Easier! # peakmarkers <- genoprob$marker[genoprob$pos > pos - vicinity & genoprob$pos < pos + vicinity] peakmarkers <- which(genoprob$pos > pos - vicinity & genoprob$pos < pos + vicinity) # Grab genotype probabilities for these markers # To make this work, we have to account for the fact that the row names # don't match those in gpcross$geno$prob, which don't include the "c9." prefix # genoprob.chrnames2 <- rep(names(geno.map), rep(3, length(geno.map))) # test that the marker names correspond # cbind(rownames(genoprob), genoprob.chrnames2) # Another test, that genoprob.chrnames2 are a substring of genoprob.chrnames # all( mapply(genoprob.chrnames, genoprob.chrnames2, FUN = function(x,y){grepl(y,x)}) ) # genoprob <- genoprob[genoprob.chrnames2 %in% names(peakmarkers), ] # genoprob.chrnames <- genoprob.chrnames[genoprob.chrnames2 %in% names(peakmarkers)] genoprob <- genoprob[peakmarkers, ] z <- split(genoprob, genoprob$marker) # head(z[[1]]) # marker gen chr pos F2_P6_20 F2_P6_27 F2_P6_7 F2_P6_22 F2_P6_12 F2_P6_41 F2_P6_3 # c9.loc12:AA c9.loc12 AA 9 12 0.1576773 0.1576773 0.1576773 0.25 0.25 0.1576773 0.64611433 # c9.loc12:AB c9.loc12 AB 9 12 0.6846454 0.6846454 0.6846454 0.50 0.50 0.6846454 0.31537311 # c9.loc12:BB c9.loc12 BB 9 12 0.1576773 0.1576773 0.1576773 0.25 0.25 0.1576773 0.03851255 # ... # Determine the highest probability genotype at each marker for each F2 and assign it to that F2 bestgeno <- lapply(z, function(x){ # x <- z[[1]] bestgeno <- x$gen[ apply(x[, -c(1:4)], 2, function(x){which(x == max(x))}) ] names(bestgeno) <- colnames( x[, -c(1:4)] ) bestgeno }) bestgeno <- do.call("rbind", bestgeno) # F2 genotypes at each marker # tabulate genotypes for each F2 across markers and pseudomarkers in the vicinity of peak bestgenoTab <- apply(bestgeno, 2, function(x){table(factor(x, levels = c("AA","AB","BB")))}) # head(bestgenoTab) # F2_P6_20 F2_P6_27 F2_P6_7 F2_P6_22 F2_P6_12 F2_P6_41 F2_P6_3 F2_P6_11 F2_P6_35 F2_P6_1 F2_P6_26 # AA 0 0 0 0 0 0 8 0 0 0 0 # AB 8 8 8 8 8 8 0 0 8 8 0 # BB 0 0 0 0 0 0 0 8 0 0 8 # ... # Identify consensus genotype (most frequent genotype) for each individual F2 # Make sure it is a unique genotype peakgenotype <- apply(bestgenoTab, 2, function(x){ c("AA","AB","BB")[which(x==max(x))] }) nmax <- sapply(peakgenotype, length) # number of most-frequent genotypes (want just 1) peakgenotype <- mapply(peakgenotype, nmax, FUN = function(x,y){if(y > 1) x <- NA; return(x)}) # Grab the source pond for each F2 and calculate pond genotype freqeuencies sourcepond <- f2info$pond[match(names(peakgenotype), f2info$F2)] print(table(factor(sourcepond, levels = c("P6","P7","P8","P10","P12","P14","P5","P11")), peakgenotype, useNA = "always")) } rqtl$glm.scan <- function(gpcross, trait, covariates=NULL, model = "quasipoisson"){ # gpcross <- newGreatAll.gp; trait <- "noffspring.p0.6"; covariates <- family; model <- "quasipoisson" if(model != "quasipoisson") stop("Only quasipoisson enabled so far") # Carries out Haley-Knott regression on the genotype probabilities specified in the gpcross. # Uses "glm" instead of "lm", as suggested by Haley and Knott at the end of their Nature paper. # Returns results in a data frame also having class "scanone" so can use plot to plot results # "gpcross" is the Rqtl cross object - must contain genotype probabilities. # "trait" is the trait name in quotes. # "covariates" is a single covariate or a model matrix for additive covariates # "model" refers to the glm family: binomial, poisson, quasibinomial, quasipoisson # Fits the additive+dominance form # The dominance (d) and additive (a) terms in the linear predictor are calculated as follows: # d = 0.5*Pr[AB] # a = 0.5 - Pr[AA]/2 + Pr[BB]/2 # The magnitudes of the resulting coefficients correspond to those of traditional line means analyses # and are TWICE that estimated in effectscan or fitqtl in ests$ests (SE's are also double) # Lod scores calculated according to Broman & Sen p76-77. if(is.null(gpcross$geno[[1]]$prob)) stop("Cross is missing genotype probabilities") if(!(trait %in% names(gpcross$pheno))) stop("Unrecognized trait") response <- gpcross$pheno[,trait] results <- vector() for(i in names(gpcross$geno)){ # i <- names(gpcross$geno)[4] probmap <- attr(gpcross$geno[[i]]$prob,"map") # pull.map command doesn't yield pseudomarkers markers <- names(probmap) fakemarkers <- grepl("loc[0-9]+", markers) markers[fakemarkers] <- paste("c", i, ".", markers[fakemarkers], sep = "") chrdat <- pull.genoprob(gpcross, i) chrlods <- lapply(markers, function(m){ # m <- markers[1] datnames <- paste(m, c(":AA", ":AB", ":BB"), sep = "") dat <- chrdat[, datnames] colnames(dat) <- c("AA","AB","BB") misng <- apply(cbind(dat,response,covariates),1,function(x){any(is.na(x))}) y <- response[!misng] x <- dat[!misng,] if(is.matrix(covariates)) f <- covariates[!misng,] else f <- covariates[!misng] z0 <- glm( y ~ f , family = quasipoisson(link = "log")) d <- 0.5*x[,"AB"] # dominance #a <- 0.5 - x[,"AA"]/2 + x[,"BB"]/2 # additive, calculated as a deviation from 0.5 toward AA or BB a <- (1 - x[,"AA"] + x[,"BB"])/2 # same, but maybe quicker to calculate z1 <- glm( y ~ a + d + f, family = quasipoisson(link = "log")) # "full" model with additive and dominance term z <- anova(z0, z1, test = "F") Fstat <- z["F"][2,1] df <- z["Df"][2,1] acoef <- coef(z1)["a"] dcoef <- coef(z1)["d"] n <- length(y) # Using Broman & Sen conversion between F and lod on p77 lod <- (n/2) * log10( Fstat*df/(n-df-1) + 1 ) return(c(lod = lod, acoef, dcoef)) }) chrlods <- do.call("rbind", chrlods) z <- cbind.data.frame(chr = i, pos = probmap, chrlods, stringsAsFactors=FALSE) rownames(z) <- markers results <- rbind.data.frame(results,z) } class(results) <- c("scanone","data.frame") return(results) } rqtl$plot.pxgprob <- function(gpcross, trait, chr, pos, groupvariable=NULL, gotomarker=FALSE, strip=FALSE, ...){ # Function to plot phenotype against additive genotype in an ***F2-type cross*** # The additive genotype is calculated as in the additive+dominance model using Haley-Knott # Is like plot.pxg in Rqtl but plots against genotype probabilities rather than imputations # Uses marker nearest to "pos", if pos might is an imputed marker # If groupvariable=familyname is given, a separate plot is given for each family # Set strip=TRUE to get header strips when groupvariable=[familyname] # Set gotomarker=TRUE to get plots for nearest true marker # ... additional arguments to xyplot # if(summary(gpcross)$type != "f2") stop("This function only works for F2-type crosses") y <- pull.pheno(gpcross, trait) if(gotomarker){ nearestmarker <- find.marker(gpcross, chr=chr, pos=pos) x <- gpcross$geno[[as.character(chr)]]$prob[,nearestmarker,] markername <- nearestmarker } else{ pseudomap <- attr(gpcross$geno[[as.character(chr)]]$prob, "map") markername <- names(pseudomap)[pseudomap==pos] if(grepl("loc", markername)) markername <- sub("[c][0-9.]*[.]","", markername) x <- gpcross$geno[[as.character(chr)]]$prob[,markername,] } notmissing <- apply(cbind(x,y),1,function(x){all(!is.na(x))}) x <- x[notmissing, ] y <- y[notmissing] if(!is.null(groupvariable)){ family <- as.character(pull.pheno(gpcross, groupvariable)) family <- as.character(family[notmissing]) } a <- 0.5 - x[,"AA"]/2 + x[,"BB"]/2 # additive term, calculated as deviation from 0.5 toward AA or B #print(split(a,family)) if(is.null(groupvariable) | length(unique(family)) < 2){ plot(y ~ a, ylab=trait, xlab=paste("chr =",chr, "pos =",pos, markername), pch=1, col="blue", ...) agroups <- cut(a,breaks=c(-1,.25,.75,1.1)) amean <- tapply(a, agroups, mean, na.rm=TRUE) ymean <- tapply(y, agroups, mean, na.rm=TRUE) #abline(lm(y ~ a )) lines(ymean ~ amean) } else{ z <- lm(y ~ a + as.factor(family)) slope <- z$coef[2] library(lattice) # Weirdly, it is necessary to use print.trellis inside a user function to get trellis graph to plot print(xyplot(y ~ a | family, ylab=trait, xlab=paste("chr =",chr, "pos =",pos, markername), scales = list(cex = 0.6), strip = strip, main = paste("lod = ", round(lodscore,2)), panel=function(x,y, ...){ # this function is repeated for each panel panel.xyplot(x,y,...) #if( max(x) - min(x) > 0.4) panel.lmline(x,y, lty=1) #panel.abline( a=(lm( (y-slope*x) ~ 1)$coef[1]), b=slope, lty=2) #print(x) #print(y) agroups <- cut(x,breaks=c(-1,.25,.75,1.1)) levels(agroups) <- c(1,2,3) agroups <- as.numeric(agroups) #print(agroups) amean <- tapply(x, agroups, mean, na.rm=TRUE) ymean <- tapply(y, agroups, mean, na.rm=TRUE) panel.lines(ymean ~ amean) }, ...)) } } rqtl$scan.nonparallel <- function(gpcross, traitname, groupname, removeoutliers = FALSE, outlierz=5, method = "lod"){ # Fits a model for the interaction between groupname and genotype # This can be used to test for non-parallel qtl between groups (e.g., families or lakes) # "gpcross" is the cross object containing the genotype probabilities # If removeoutliers=TRUE, residuals more than outlierz sd away from the mean (based on the full model) # are dropped and the models are refitted to the remaining data # Returns lod, aicdiff and bicdiff. To plot aicdiff instead of lod, use plot(scanoneobject, lodcolumn=2) # No setup yet to handle additional covariates # if(is.null(gpcross$geno[[1]]$prob)) stop("Cross is missing genotype probabilities") if(!(traitname %in% names(gpcross$pheno))) stop("Unrecognized trait") group <- as.character(pull.pheno(gpcross, groupname)) response <- gpcross$pheno[,traitname] results <- vector() for(i in names(gpcross$geno)){ # i <- names(gpcross$geno)[4] probmap <- attr(gpcross$geno[[i]]$prob,"map") chrlods <- apply(gpcross$geno[[i]]$prob,2,function(dat){ # dat <- gpcross$geno[[i]]$prob[,1,] misng <- apply(cbind(dat,response,group),1,function(x){any(is.na(x))}) y <- response[!misng] x <- dat[!misng,] f <- group[!misng] z0 <- lm( y ~ x - 1 + f ) # the "-1" drops the intercept but keeps 3 columns in x: AA AB BB z1 <- lm( y ~ x - 1 + f + f*x) # model with the interaction between group and genotype # extra steps to remove outliers (from full model?) if(removeoutliers){ r <- residuals(z1) r <- (r - mean(r))/sd(r) out <- which(abs(r)>5) if(length(out) > 0 ){ y <- y[-out] x <- x[-out,] f <- f[-out] z0 <- lm( y ~ x - 1 + f ) z1 <- lm( y ~ x - 1 + f + f*x) } } rss0 <- anova(z0)["Sum Sq"]["Residuals",] rss1 <- anova(z1)["Sum Sq"]["Residuals",] lod <- ( length(y)/2 ) * log10 (rss0/rss1) aicdiff <- AIC(z0) - AIC(z1) bicdiff <- BIC(z0) - BIC(z1) return(c(lod, aicdiff, bicdiff)) }) # Note the tricky "t(chrlods)", needed when calculating aic, etc in addition to lod z <- cbind.data.frame(i,probmap,t(chrlods), stringsAsFactors=FALSE) names(z) <- c("chr", "pos", "lod", "aicdiff", "bicdiff") fakemarkers <- grepl("loc[0-9]+",rownames(z)) rownames(z)[fakemarkers] <- paste("c",i,".",rownames(z)[fakemarkers], sep="") results <- rbind.data.frame(results,z) } class(results) <- c("scanone","data.frame") return(results) } rqtl$geno.table <- function(crosslist, impute = FALSE, combine = FALSE){ # "crosslist" is a list of crosses, all having the same map # Usually each element of crosslist is the cross object for 1 family # This version works only for the "F2" type cross. # Function calculates the chisquare statistic for marker frequencies, family by family, # by deviations from 1:2:1 (AA,AB,BB) # or 1:1 (notAA, notBB) [ordinary geno.table can't handle this type of marker] # If combine=TRUE, the chisq and df are summed across families # If combine=FALSE, a list is returned, with elements the results of individual families # If impute=TRUE, uses fill.geno to generate a single imputation of genotypes based on sim.geno. # Results are of class "scanone" for easy plotting. x1 <- lapply(crosslist, function(x){ if(impute) x <- fill.geno(x, method="imp", map.function = "kosambi") mapfile <- as.data.frame(pull.map(x, as.table=TRUE), stringsAsFactors=FALSE) x1 <- geno.table(x) x1$pos <- mapfile$pos[match(rownames(x1), rownames(mapfile))] x1 <- x1[,c(1,ncol(x1),2:(ncol(x1)-1))] # reorder the columns n <- nind(x) x2 <- apply(x1[,-1], 1, function(x){ # need to drop first column of x1 or x is converted to character if(x[2] < 0.95*n){ z <- x[-c(1:2)] #print(z) if( sum(z[1:3]) > sum(z[4:5]) ){ chisq <- chisq.test(z[1:3], p = c(0.25, 0.50, 0.25))$statistic df <- 2 } else{ chisq <- chisq.test(z[4:5], p = c(0.50, 0.50))$statistic df <- 1 } } else{ chisq <- 0 df <- 0 } return(c(chisq,df)) }) x2 <- as.data.frame(t(x2)) names(x2) <- c("chisq","df") x2$P.value <- 1 - pchisq(x2$chisq, x2$df) x2 <- cbind.data.frame(x1[,c(1:2)], x2, x1[,c(3:(ncol(x1)-1))], stringsAsFactors=FALSE) class(x2) <- c("scanone","data.frame") return(x2) }) if(combine){ names(x1) <- 1:length(x1) # rename families 1 to m x1 <- lapply(x1, function(x){ x$marker <- rownames(x) return(x) }) z <- do.call("rbind.data.frame", x1) z1 <- aggregate(z[,c("missing", "AA", "AB", "BB", "not.BB", "not.AA", "chisq","df")], list(marker=z$marker), sum) # Sum chisq and df across families z1$P <- 1 - pchisq(z1$chisq, z1$df) segdist <- x1[[1]][,c("chr","pos")] segdist[,c("missing", "AA", "AB", "BB", "not.BB", "not.AA", "chisq", "df","P")] <- z1[match(rownames(segdist), z1$marker), -1] x2 <- segdist[,c(1,2,9:11,3:8)] return(x2) } else{ return(x1) } } rqtl$add.scan <- function(gpcross, trait, covariates=NULL){ # Calculates the lod score corresponding to the fit of an additive model. # Uses Haley-Knott regression on the genotype probabilities specified in the gpcross. # "gpcross" is the Rqtl cross object - must contain genotype probabilities. # "trait" is the trait name in quotes. # "covariates" is a single covariate or a model matrix for additive covariates # The additive (a) term in the linear is calculated as follows: # a = 0.5 - Pr[AA]/2 + Pr[BB]/2 # The magnitude of the resulting coefficient corresponds to those of traditional line means analyses # and is TWICE that estimated in effectscan or fitqtl in ests$ests (SE's are also double) # Lod scores calculated according to Broman & Sen p76-77. # Returns results in a data frame also having class "scanone" so can use plot to plot results if(is.null(gpcross$geno[[1]]$prob)) stop("Cross is missing genotype probabilities") if(!(trait %in% names(gpcross$pheno))) stop("Unrecognized trait") response <- gpcross$pheno[,trait] results <- vector() for(i in names(gpcross$geno)){ # i <- names(gpcross$geno)[4] probmap <- attr(gpcross$geno[[i]]$prob,"map") chrlods <- apply(gpcross$geno[[i]]$prob,2,function(dat){ misng <- apply(cbind(dat,response,covariates),1,function(x){any(is.na(x))}) y <- response[!misng] x <- dat[!misng,] if(is.matrix(covariates)) f <- covariates[!misng,] else f <- covariates[!misng] #d <- 0.5*x[,"AB"] # dominance #a <- 0.5 - x[,"AA"]/2 + x[,"BB"]/2 # additive, calculated as a deviation from 0.5 toward AA or BB a <- (1 - x[,"AA"] + x[,"BB"])/2 # same, but might be quicker to calculate z0 <- lm( y ~ f) # "reduced" model includes only covariates, if present z1 <- lm( y ~ a + f) # "full" model with additive term rss0 <- anova(z0)["Sum Sq"]["Residuals",] rss1 <- anova(z1)["Sum Sq"]["Residuals",] lod <- ( length(y)/2 ) * log10 (rss0/rss1) a <- coef(z1)["a"] return(c(lod,a)) }) z <- cbind.data.frame(i,probmap,chrlods[1,], chrlods[2,], stringsAsFactors=FALSE) names(z) <- c("chr", "pos", "lod", "a") fakemarkers <- grepl("loc[0-9]+",rownames(z)) rownames(z)[fakemarkers] <- paste("c",i,".",rownames(z)[fakemarkers], sep="") results <- rbind.data.frame(results,z) } class(results) <- c("scanone","data.frame") return(results) } rqtl$add.dom.scan <- function(gpcross, trait, covariates=NULL){ # Calculates the lod score corresponding to the additive term in an additive+dominance model. # Uses Haley-Knott regression on the genotype probabilities specified in the gpcross. # "gpcross" is the Rqtl cross object - must contain genotype probabilities. # "trait" is the trait name in quotes. # "covariates" is a single covariate or a model matrix for additive covariates # The dominance (d) and additive (a) terms in the linear are calculated as follows: # d = 0.5*Pr[AB] # a = 0.5 - Pr[AA]/2 + Pr[BB]/2 # The magnitudes of the resulting coefficients correspond to those of traditional line means analyses # and are TWICE that estimated in effectscan or fitqtl in ests$ests (SE's are also double) # Lod scores calculated according to Broman & Sen p76-77. # Returns results in a data frame also having class "scanone" so can use plot to plot results if(is.null(gpcross$geno[[1]]$prob)) stop("Cross is missing genotype probabilities") if(!(trait %in% names(gpcross$pheno))) stop("Unrecognized trait") response <- gpcross$pheno[,trait] results <- vector() for(i in names(gpcross$geno)){ # i <- names(gpcross$geno)[4] probmap <- attr(gpcross$geno[[i]]$prob,"map") chrlods <- apply(gpcross$geno[[i]]$prob,2,function(dat){ misng <- apply(cbind(dat,response,covariates),1,function(x){any(is.na(x))}) y <- response[!misng] x <- dat[!misng,] if(is.matrix(covariates)) f <- covariates[!misng,] else f <- covariates[!misng] #z0 <- lm( y ~ f ) #z1 <- lm( y ~ x - 1 + f) # the "-1" drops the intercept but keeps 3 columns in x: AA AB BB d <- 0.5*x[,"AB"] # dominance #a <- 0.5 - x[,"AA"]/2 + x[,"BB"]/2 # additive, calculated as a deviation from 0.5 toward AA or BB a <- (1 - x[,"AA"] + x[,"BB"])/2 # same, but might be quicker to calculate z0 <- lm( y ~ d + f) # "reduced" model includes dominance term (and any covariates) z1 <- lm( y ~ a + d + f) # "full" model with additive term rss0 <- anova(z0)["Sum Sq"]["Residuals",] rss1 <- anova(z1)["Sum Sq"]["Residuals",] lod <- ( length(y)/2 ) * log10 (rss0/rss1) a <- coef(z1)["a"] return(c(lod,a)) }) z <- cbind.data.frame(i,probmap,chrlods[1,], chrlods[2,], stringsAsFactors=FALSE) names(z) <- c("chr", "pos", "lod", "a") fakemarkers <- grepl("loc[0-9]+",rownames(z)) rownames(z)[fakemarkers] <- paste("c",i,".",rownames(z)[fakemarkers], sep="") results <- rbind.data.frame(results,z) } class(results) <- c("scanone","data.frame") return(results) } rqtl$plotqtl <- function(cross, peakmarkerinfo, xlim=c(0.6,2.5)){ # Plots the linkage map with qtl intervals added. # In this newer version, the mapfile is extracted from "cross" # "peakmarkerinfo" is created from the summary of a scanone object using format="tabByCol" # and must have the columns: trait chr pos ci.low ci.high # The chromosome is plotted at x=1 # Adjust xlim to fit contents onto plot window mapfile <- pull.map(cross, as.table=TRUE) for(i in unique(mapfile$chr)){ map <- mapfile[mapfile$chr==i,] marker <- rownames(map) location <- map$pos groupname <- paste("chr",i) qtl <- peakmarkerinfo[peakmarkerinfo$chr == i,] if(nrow(qtl) > 0 ){ xpl<-rep(1,length(location)) ypl<- location plot(xpl, ypl, type = "n", xaxt = "n", bty = "n", main = groupname, ylab = "Map location", xlim = xlim, ylim = c(max(ypl),0), xlab="", las=1) # add marker ticks seglength<-0.02 segments(xpl - seglength, ypl, xpl + seglength, ypl, col="red", lwd = 1.5) # add vertical line segments(1, 0, 1, max(ypl), col="black", lwd=1.5) # label markers yloc <- c(0:(length(ypl) - 1)) * (max(ypl) - min(ypl)) / (length(ypl) - 1) text(xpl - 5 * seglength, yloc, labels = marker, adj = 1, cex = 0.8) segments(xpl - 4.9 * seglength, yloc, xpl - 1.2 * seglength, ypl) # plot the qtl intervals xqtl <- 4*seglength + seq(from = 1, by = 3*seglength, length.out = nrow(qtl)) for(j in 1:nrow(qtl)){ lines( c(xqtl[j], xqtl[j]), c(qtl$ci.low[j], qtl$ci.high[j]), lwd=4, col = "light blue" ) text(xqtl[j]-seglength, (qtl$ci.low[j] + qtl$ci.high[j])/2, lab = qtl$trait[j], srt = 90, cex=0.8) } } } } rqtl$plotqtl.bak <- function(mapfile, peakmarkerinfo, xlim=c(0.6,2.5)){ # Plots the linkage map with qtl intervals added # "mapfile" is the mapfile object read from the map text file # it must contain these columns: marker location lg groupname # "peakmarkerinfo" is created from the summary of a scanone object using format="tabByCol" # and must have the columns: trait chr pos ci.low ci.high # The chromosome is plotted at x=1 # Adjust xlim to fit contents onto plot window for(i in unique(mapfile$lg)){ map <- mapfile[mapfile$lg==i,] marker <- map$marker location <- map$location qtl <- peakmarkerinfo[peakmarkerinfo$chr == i,] if(nrow(qtl) > 0 ){ xpl<-rep(1,length(location)) ypl<- location plot(xpl, ypl, type = "n", xaxt = "n", bty = "n", main = as.character(map$groupname[1]), ylab = "Map location", xlim = xlim, ylim = c(max(ypl),0), xlab="", las=1) # add marker ticks seglength<-0.02 segments(xpl - seglength, ypl, xpl + seglength, ypl, col="red", lwd = 1.5) # add vertical line segments(1, 0, 1, max(ypl), col="black", lwd=1.5) # label markers yloc <- c(0:(length(ypl) - 1)) * (max(ypl) - min(ypl)) / (length(ypl) - 1) text(xpl - 5 * seglength, yloc, labels = marker, adj = 1, cex = 0.8) segments(xpl - 4.9 * seglength, yloc, xpl - 1.2 * seglength, ypl) # plot the qtl intervals xqtl <- 4*seglength + seq(from = 1, by = 3*seglength, length.out = nrow(qtl)) for(j in 1:nrow(qtl)){ lines( c(xqtl[j], xqtl[j]), c(qtl$ci.low[j], qtl$ci.high[j]), lwd=4, col = "light blue" ) text(xqtl[j]-seglength, (qtl$ci.low[j] + qtl$ci.high[j])/2, lab = qtl$trait[j], srt = 90, cex=0.8) } } } } rqtl$plot.pxgdata <- function(gpcross, trait, chr, pos, groupvariable=NULL, test=FALSE, strip=FALSE, ...){ # Function to plot phenotypes x genotypes for an ***F2-type cross*** # We are using the real genotypes, where 1=AA, 2=notBB, 3=AB, 4=notAA 5-BB # Is like plot.pxg in Rqtl but plots against real data, rather than imputations # Uses marker nearest to "pos", if pos might is an imputed marker # If groupvariable="family" is given, a separate plot is given for each family group # If test = TRUE, an ANOVA is also carried out *** current version assumes a family variable is present # Set strip=TRUE to get header strips when family=familyname # ... additional arguments to stripchart or stripplot if(summary(gpcross)$type != "f2") stop("This function only works for F2-type crosses") y <- pull.pheno(gpcross, trait) nearestmarker <- find.marker(gpcross, chr=chr, pos=pos) x <- pull.geno(gpcross, chr = chr)[,nearestmarker] # remember, we are using the real genotypes, here 1-5 if(length(x[!is.na(x)]) < 2){ cat("Cannot print ",nearestmarker,", no data",sep="",fill=TRUE) return() } notmissing <- apply(cbind(x,y),1,function(x){all(!is.na(x))}) x <- x[notmissing] y <- y[notmissing] if(!is.null(groupvariable)){ family <- pull.pheno(gpcross, groupvariable) family <- as.character(family[notmissing]) } if(any(is.element(c(4,5),x))){ x <- factor(x,levels=c("1","4","2","5","3")) # reorder the genotype codes levels(x) <- c("AA","notBB","AB","notAA","BB") # rename the genotype codes } else { x <- factor(x, levels=c("1","2","3")) levels(x) <- c("AA","AB","BB") } if(is.null(groupvariable) | length(unique(family)) < 2){ stripchart(y ~ x, ylab=trait, xlab=paste("chr =",chr, "pos =",pos, nearestmarker), vertical=TRUE, method="jitter", pch=1, col="blue", ...) abline(lm(y~as.integer(x))) } else{ z <- lm(y ~ as.numeric(x) + as.factor(family)) slope <- z$coef[2] library(lattice) # Weirdly, it is necessary to use print.trellis inside a user function to get trellis graph to plot print(stripplot(y ~ x | family, ylab=trait, xlab=paste("chr =",chr, "pos =",pos, nearestmarker), scales = list(cex = 0.6), strip = strip, panel=function(x,y){ # this function is repeated for each panel panel.xyplot(x,y, jitter.x=TRUE,...) # adds a bit of jitter to measurements for plotting #if( max(as.numeric(x), na.rm=TRUE) > min(as.numeric(x),na.rm=TRUE) ) # panel.lmline(as.numeric(x), y, lty=1) #panel.loess(jitter(as.numeric(x), factor=0.2),y, lty=1, span=0.8) # one per family; hard to control #panel.abline( a=(lm((y-slope*as.numeric(x))~ 1)$coef[1]), b=slope, lty=2) # fit of data to common slope means <- tapply(y,as.numeric(x),mean) #print(means) panel.lines(means ~ as.integer(names(means)) ) }, ...)) } if(test){ anova(lm(y ~ x * family)) } } rqtl$segdistmap<-function(cross,score="chisq",...){ # plots a measure of segregation distortion across the linkage map # "cross" is a cross object for a single family, eg "cranbyE4" # mapfile is the mapfile # ... passes plot options to plot # If "chi", the score is set to "3" if genotype data are missing for a family # (=df for an "ac x bd" marker) # The following marker genotypes are expected: # "1" "2" "3" "4" # "1" "4" "10" # "7" "8" # "5" "6" # Only in the case of "1,4,10" are the expected frequencies unequal between genotypes. # Also, this case might have missing values, eg only "1,4" "1,10", or "4,10" may be present. x<-as.data.frame(pull.geno(cross)) x<-lapply(x,table) # creates a frequency table for each marker chiscore<-function(x){ if(length(x) < 2) return(c(3)) if( all(is.element(names(x),c("1","4","10"))) ){ if(length(x)==3) y<-as.vector(x) else{ y<-as.vector(x) z<-which(!is.element(c("1","4","10"),names(x))) if(z==1) y<-c(0,y) else if(z==2) y<-c(y[1],0,y[2]) else y<-c(y,0) } z<-chisq.test(y,p=c(0.25,0.25,0.5)) } else z<-chisq.test(x) return(c(z$statistic,p=z$p.value,z$parameter)) } z<-lapply(x,chiscore) z<-as.data.frame(do.call("rbind",z)) z1<-pull.map(cross) z2<-lapply(z1,function(x){unlist(x[1,])}) # z3<-unlist(lapply(z2,names)) z4<-do.call("c",z2) z5<-rep(names(z2),unlist(sapply(z2,length))) z$chr<-as.integer(z5) z$pos<-z4 if(score=="chisq"){ z$lod<-z[,"X-squared"] } z<-z[,c("chr","pos","lod")] class(z)<-c("scanone","data.frame") plot(z,ylab=paste("segregation distortion:",score)) } rqtl$peak.means<-function(lodpeaks,cross,conversion=NULL){ # calculates straight genotype means at nearest markers (no model fitting incorporated) # sex is not yet factored in # Conversion converts to mm for xy traits only if(class(lodpeaks)[1]=="summary.scanone"){ # covers the unlikely case when lodpeaks contains results for only one trait } else{ # need to get the name of the trait into lodpeaks: for(i in 1:length(lodpeaks)){ lodpeaks[[i]]$name<-names(lodpeaks)[i] } x<-lapply(lodpeaks,function(lodpeaks,cross,conversion){ result<-list() for(i in 1:nrow(lodpeaks)){ chr<-lodpeaks[i,1] posn<-lodpeaks[i,2] traitname<-lodpeaks[i,4] marker<-find.marker(cross,chr,posn) genotypes<-as.factor(cross$geno[[chr]]$data[,marker]) if(is.element(10,genotypes)){ genotypes<-factor(genotypes,levels=c(1,10,4)) #levels(genotypes)<-c("AC","BC/AD","BD") } else if(all(is.element(c(1,2,3,4),genotypes))){ levels(genotypes)<-c("1","2/3","2/3","4") } trait<-cross$pheno[[traitname]] x<-na.omit(cbind.data.frame(genotypes,trait)) y<-x$trait x<-x$genotypes z<-as.data.frame(tapply(y,x,mean)) z$se<-tapply(y,x,se) z$n<-tapply(y,x,length) names(z)<-c("mean","se","n") if(!is.null(conversion) & regexpr("[xy][0-9]",traitname)>-1){ z$mean<-z$mean*conversion z$se<-z$se*conversion } z<-t(z) result[[marker]]<-z } return(result) },cross,conversion) return(x) } } rqtl$family.matrix<-function(gpcross){ # Creates a design matrix for family that can be used in model fitting # gpcross can be replaced with cross or simcross and it will work # BUT CROSS MUST HAVE ID VARIABLE NAMED "id" IN THE FORMAT "E10.25" # so this function is very specific to NHG data w <- regexpr("[a-zA-Z0-9]+\\.", gpcross$pheno$id) famname<-as.factor(substring(as.character(gpcross$pheno$id), w,w+attr(w, "match.length")-2)) if(length(unique(famname))==1) stop("$family.matrix needs more than one family") family.matrix<-model.matrix(~famname)[,-1] return(family.matrix) } rqtl$means.simcross<-function (simcross, pheno.col = 1, mname1, mark1, geno1, mname2, mark2, geno2, var.flag = c("pooled", "group")) { # Extracted from Rqtl command "effectplot" to get means and SE's from simcross # Can accept up to 2 marker names # See effectplot to understand other terms if (!sum(class(simcross) == "cross")) stop("The first input variable must be an object of class cross") if (pheno.col > nphe(simcross)) stop("Input pheno.col is wrong") n.ind <- nind(simcross) pheno <- simcross$pheno[, pheno.col] type <- class(simcross)[1] chrtype1 <- chrtype2 <- "A" gennames1 <- gennames2 <- NULL if (!("draws" %in% names(simcross$geno[[1]]))) { warning(" -Running sim.geno.") simcross <- sim.geno(simcross, n.draws = 16) } if (missing(mark1)) { if (missing(mname1)) stop("Either mname1 or mark1 must be specified.") tmp <- effectplot.getmark(simcross, mname1) mark1 <- tmp$mark gennames1 <- tmp$genname } else { if (class(mark1) != "matrix") mark1 <- matrix(mark1, ncol = 1) if (dim(mark1)[1] != n.ind) stop("Marker 1 data hass the wrong dimension") if (missing(mname1)) mname1 <- "Marker 1" } if (!missing(mname2) || !missing(mark2)) { if (missing(mark2)) { tmp <- effectplot.getmark(simcross, mname2) mark2 <- tmp$mark gennames2 <- tmp$genname } else { if (class(mark2) != "matrix") mark2 <- matrix(mark2, ncol = 1) if (dim(mark2)[1] != n.ind) stop("Marker 2 data has the wrong dimension") if (missing(mname2)) mname2 <- "Marker 2" } } else mark2 <- NULL ndraws1 <- dim(mark1)[2] if (is.null(mark2)) ndraws2 <- 1 else ndraws2 <- dim(mark2)[2] if ((ndraws1 > 1) && (ndraws2 > 1)) { if (ndraws1 != ndraws2) stop("Input two pseudomarkers have different number of draws.") else ndraws <- ndraws1 } else if ((ndraws1 > 1) && (ndraws2 == 1)) { if (!is.null(mark2)) mark2 <- matrix(rep(mark2, ndraws1), ncol = ndraws1) ndraws <- ndraws1 } else if ((ndraws1 == 1) && (ndraws2 > 1)) { mark1 <- matrix(rep(mark1, ndraws2), ncol = ndraws2) ndraws <- ndraws2 } else ndraws <- 1 tmp <- cbind(pheno, mark1, mark2) keepind <- apply(tmp, 1, function(x) all(!is.na(x))) mark1 <- mark1[keepind, ] mark2 <- mark2[keepind, ] pheno <- pheno[keepind] tmpf <- factor(mark1) if (!missing(geno1)) { if (length(geno1) < length(levels(tmpf))) stop("geno1 is too short.") } else { if (!is.null(gennames1)) geno1 <- gennames1 else if (is.factor(mark1)) { geno1 <- levels(mark1) mark1 <- as.numeric(mark1) } else if (!is.numeric(mark1)) { geno1 <- levels(tmpf) } else { geno1 <- getgenonames(type, "A", cross.attr = attributes(simcross)) if (length(levels(tmpf)) > length(geno1)) geno1 <- c(geno1, rep("?", length(levels(tmpf)) - length(geno1))) } } if (!is.numeric(mark1)) mark1 <- matrix(as.numeric(tmpf, levels = sort(levels(tmpf))), ncol = ndraws) if (!is.null(mark2)) { tmpf <- factor(mark2) if (!missing(geno2)) { if (length(geno2) < length(levels(tmpf))) stop("geno2 is too short.") } else { if (!is.null(gennames2)) geno2 <- gennames2 else if (is.factor(mark2)) { geno2 <- levels(mark2) mark2 <- as.numeric(mark2) } else if (!is.numeric(mark2)) { geno2 <- levels(tmpf) } else { geno2 <- getgenonames(type, "A", cross.attr = attributes(simcross)) if (length(levels(tmpf)) > length(geno2)) geno2 <- c(geno2, rep("?", length(levels(tmpf)) - length(geno2))) } } if (!is.numeric(mark2)) mark2 <- matrix(as.numeric(tmpf, levels = sort(levels(tmpf))), ncol = ndraws) } ngen1 <- length(geno1) if (!is.null(mark2)) ngen2 <- length(geno2) result <- effectplot.calmeanse(pheno, mark1, mark2, ndraws, var.flag) means <- result$Means ses <- result$SEs if (is.null(mark2)) { if (length(means) != length(geno1)) { warning("Number of genotypes is different than length(geno1).") if (length(means) < length(geno1)) geno1 <- geno1[1:length(means)] else geno1 <- c(geno1, rep("?", length(means) - length(geno1))) ngen1 <- length(geno1) } names(result$Means) <- paste(mname1, geno1, sep = ".") names(result$SEs) <- paste(mname1, geno1, sep = ".") } else { if (nrow(means) != length(geno1)) { warning("Number of genotypes in marker 1 is different than length(geno1).") if (nrow(means) < length(geno1)) geno1 <- geno1[1:nrow(means)] else geno1 <- c(geno1, rep("?", nrow(means) - length(geno1))) ngen1 <- length(geno1) } if (ncol(means) != length(geno2)) { warning("Number of genotypes in marker 2 is different than length(geno2).") if (ncol(means) < length(geno2)) geno2 <- geno2[1:ncol(means)] else geno2 <- c(geno2, rep("?", ncol(means) - length(geno2))) ngen2 <- length(geno2) } rownames(result$Means) <- paste(mname1, geno1, sep = ".") colnames(result$Means) <- paste(mname2, geno2, sep = ".") rownames(result$SEs) <- paste(mname1, geno1, sep = ".") colnames(result$SEs) <- paste(mname2, geno2, sep = ".") } lo <- means - ses hi <- means + ses return(result) } rqtl$fitmarker<-function(trait,simcross,scanresult,markernames,sex=FALSE,family=FALSE, effects=TRUE,type="4way"){ # THIS VERSION IS ONLY WORKING FOR THE CASE OF A 4WAY CROSS # Fits with rqtl$fit and then extracts the essential information from its output # "trait" is the trait name # "simcross" is the result of "sim.geno" # "scanresult" is the result of "scanone", must include the trait # "markernames" is of the form c("gac4174","stn312") # if sex=TRUE then model is fit with simcross$pheno$sex as a covariate (must be present!) # cross="CP" corresponds to the 4-way cross; "F2" is an intercross (3 genotypes only) z<-rqtl$fit(trait=trait,simcross=simcross,scanresult=scanresult, markernames=markernames,sex=sex,family=family,effects=effects) # This code is adapted from rqtl$fitpeaks. x<-data.frame(trait=trait,marker=markernames) # "result.drop" is only present if more than one factor is fit, # otherwise the results are in "result.full" if(!is.null(z$result.drop)){ x$fitLOD<-z$result.drop[1:nrow(x),3] x$PVE<-z$result.drop[1:nrow(x),4] } else{ x$fitLOD<-z$result.full[1:nrow(x),4] x$PVE<-z$result.full[1:nrow(x),5] } x$MSE<-rep(z$result.full[2,3],nrow(x)) x$intercept<-rep(z$est$est[1],nrow(x)) x$BC<-z$est$est[(seq(1:nrow(x))*3)-1] x$AD<-z$est$est[(seq(1:nrow(x))*3)] x$BD<-z$est$est[1+(seq(1:nrow(x))*3)] x$effect<- x$BD x$dominBD<- round(((x$BC+x$AD)/2)/x$BD,2) # x$dominance<-((x$BC+x$AD)/2)-(x$effect/2) return(x) } rqtl$permute<-function(...,traitname,n=10,method="hk",model="normal",sex=FALSE,family=FALSE,n.perm=NULL){ # Permute single phenotypic trait of one or more (for Sum) gpcross object # This function is fiddly because all the gpcross objects must have identical # step size, map.function, and map. # All gpcross objects must include the variable "traitname" # In each permutation the LOD scores for each gpcross are Summed before calculating max(LOD) # Repeat this n times # Return the distribution of max(LOD) # If sex=TRUE and/or family=TRUE then permutations are carried out within strata # and analyses use these covariates in scanone if(!is.null(n.perm)) n<-n.perm # just in case n.perm is given instead of n by mistake crosslist<-list(...) # a list of all the cross.gp files passed to function # debug: erroneous elements in call to rqtl$permute will get included in crosslist, leading to error newcrosslist<-crosslist # will store the permuted data result<-vector() # get covariates, if any if(any(sex,family)){ covlist<-list() for(k in 1:length(crosslist)){ if(family & sex){ covlist[[k]]<-cbind(rqtl$family.matrix(crosslist[[k]]),crosslist[[k]]$pheno$sex) } else if(!family & sex){ covlist[[k]]<-cbind(crosslist[[k]]$pheno$sex) } else { covlist[[k]]<-cbind(rqtl$family.matrix(crosslist[[k]])) } } for(i in 1:n){ lodvector<-0 for(k in 1:length(crosslist)){ # Permute rows of $pheno within families and/or sex perm.strata<-apply(covlist[[k]],1,paste,collapse="") # creates strata based on covariates nfish<-nrow(crosslist[[k]]$pheno) z1<-tapply(1:nfish,perm.strata,sample) # permute rownumbs by strata, result is a list z<-1:nfish # initialize only for(i in 1:length(z1)){ z[perm.strata==names(z1)[i]]<-z1[[i]] # puts permuted rownumbs in correct order } newcrosslist[[k]]$pheno<-crosslist[[k]]$pheno[z,] # permuted array z<-scanone(newcrosslist[[k]],pheno.col=traitname,model=model, method=method,addcovar=covlist[[k]]) lodvector<-lodvector+z$lod } result<-c(result,max(lodvector)) } } else for(i in 1:n){ lodvector<-0 for(k in 1:length(crosslist)){ # Permute the entire row of $pheno. See Broman & Sen p182 nfish<-nrow(crosslist[[k]]$pheno) newcrosslist[[k]]$pheno<-crosslist[[k]]$pheno[sample(nfish),] z<-scanone(newcrosslist[[k]],pheno.col=traitname,model=model,method=method) lodvector<-lodvector+z$lod } result<-c(result,max(lodvector)) } return(result) } rqtl$plotlod<-function(scan,minheight=3.5,...){ # Quick function to plot LOD profile of entire scan # Multiple pages if object is a list of scans # ... passes options to plot (eg ylim) # Precede call with .SavedPlots<-NULL to erase plot history # Note that by default this function puts a gap of 25 between linkage groups # So to get the limit of the X axis take total maplength+25*(nchr-1) # minheight controls the upper limit of the plot. Set minheight=3.5 to ensure that if(is.data.frame(scan)){ if(minheight>0){ maplength<-sum(tapply(scan$pos,scan$chr,max)) xlimit<-maplength+25*( length(unique(scan$chr)) -1 ) ymax<-max(minheight+.5,max(scan$lod)) plot(scan,ylim=c(0,ymax),...) lines(c(1,xlimit),c(minheight,minheight)) } else{ plot(scan,...) } } else if(is.list(scan)){ if(minheight>0){ maplength<-sum(tapply(scan[[1]]$pos,scan[[1]]$chr,max)) xlimit<-maplength+25*( length(unique(scan[[1]]$chr)) -1 ) for(i in 1:length(scan)){ ymax<-max(minheight+.5,max(scan[[i]]$lod)) plot(scan[[i]],ylim=c(0,ymax),main=names(scan)[i],...) lines(c(1,xlimit),c(minheight,minheight)) } } else{ for(i in 1:length(scan)){ plot(scan[[i]],main=names(scan)[i],...) } } } invisible() } rqtl$sumscan<-function(...){ # Sums the LOD scores of all the scan objects listed # This function is fiddly because all the scan objects listed must result from IDENTICAL scans: # *The scans must all be based on gpcrosses that have identical step size, map.function, and map. # *The scans must have the same traits in the same order (if more than one trait is included) # The program first checks whether the objects are data frames or lists of data frames # Use as follows: rqtl$sumscan(cranbyE4.hk,cranbyE5.hk) scanlist<-list(...) nscans<-length(scanlist) if(nscans<2) stop("This function requires more than one scan object") # Case of simple scanone objects if(is.data.frame(scanlist[[1]])){ sumscan<-scanlist[[1]] for(i in 2:nscans){ if(length(scanlist[[i]]$lod)!=length(sumscan$lod)) stop("Scan objects are not similar") sumscan$lod<-sumscan$lod+scanlist[[i]]$lod } } else if(is.list(scanlist[[1]])){ traitnames<-unique(unlist(lapply(scanlist,names))) sumscan<-list() for(k in traitnames){ z<-lapply(scanlist,function(x){x[[k]]}) # grab trait k from all families z1<-z[sapply(z,function(x){!is.null(x)})] # drop those that are NULL (missing) sumscan[[k]] <- z1[[1]] for(i in 2:length(z1)){ if(length(z1[[i]]$lod)!=length(sumscan[[k]]$lod)) stop("Scan objects are not similar") sumscan[[k]]$lod<-sumscan[[k]]$lod+z1[[i]]$lod } } } return(sumscan) } rqtl$scanqtl<-function(traitname="all",gpcross,method="hk",model="normal",sex=FALSE,family=FALSE){ # Runs scanone for all traits (or a selected trait) in gpcross, the output of calc.geneprob # Assumes that the first column in gpcross is an id variable, so is not mapped # If sex=TRUE, then it is used as a covariate # gpcross can be a list of several gpcross from 2+ families (eg gpcross=c(cranbyE4.gp,cranbyE5.gp)), # in this case the result is the sum of lods across all families mapped separately if(sex){ if(family){ family.matrix<-rqtl$family.matrix(gpcross) cov.frame<-cbind(family.matrix,gpcross$pheno$sex) } else{ cov.frame<-cbind(gpcross$pheno$sex) } if(traitname=="all"){ result<-list() for(i in 2:length(gpcross$pheno)){ result[[i-1]]<-scanone(gpcross, pheno.col=i,model=model, method=method,addcovar=cov.frame) } names(result)<-names(gpcross$pheno)[2:length(names(gpcross$pheno))] result$sex<-NULL } else{ result<-scanone(gpcross, pheno.col=find.pheno(gpcross,traitname),model=model, method=method,addcovar=cov.frame) } } else{ if(family){ family.matrix<-rqtl$family.matrix(gpcross) cov.frame<-cbind(family.matrix) } else{ cov.frame<-NULL } if(traitname=="all"){ result<-list() for(i in 2:length(gpcross$pheno)){ result[[i-1]]<-scanone(gpcross, pheno.col=i, method=method,model=model, addcovar=cov.frame) } names(result)<-names(gpcross$pheno)[2:length(names(gpcross$pheno))] } else{ result<-scanone(gpcross, pheno.col=find.pheno(gpcross,traitname), method=method,model=model,addcovar=cov.frame) } } return(result) } rqtl$fitchr<-function(traitname,simcross,scanresult,chr,sex=FALSE,anova=TRUE,cim=FALSE){ # Fits all markers on a single linkage group simultaneously to a trait # Each marker is dropped one at a time to get LOD score # This routine just prepares the marker info and then calls rqtl$fit # "traitname" is the trait name, in quotes # "simcross" is the result of "sim.geno" # "scanresult" is the result of "scanone", must include the trait # anova=TRUE returns the ANOVA table as a data.frame # anova=FALSE returns a scanone object with LODs that can be plotted using plot # cim=TRUE results in a crude composite interval mapping, where each marker is fit holding # just the two flanking markers fixed. Output is scanone not anova format # test if we have the trait data required, in the case that scanresult is a list of scanone results if(!is.data.frame(scanresult)){ if(!any(traitname %in% names(scanresult))) stop("Can't find scanone results for traitname") } if(!any(traitname %in% names(simcross$pheno))) stop("Can't find traitname in simcross$pheno") map<-simcross$geno[[chr]]$map[1,] n<-length(map) markernames<-names(map) if(cim & length(map)>=4){ result<-data.frame(chr=rep(chr,length(map))) row.names(result)<-markernames result$pos<-map # fit first marker z<-rqtl$fit(traitname,simcross,scanresult,markernames[1:2],sex=sex,effects=FALSE)$result.drop result$lod[1]<-z[1,3] # fit markers in between for(i in 2:(n-1)){ z<-rqtl$fit(traitname,simcross,scanresult,markernames[c(i-1,i,i+1)], sex=sex,effects=FALSE)$result.drop result$lod[i]<-z[2,3] } # fit nth marker z<-rqtl$fit(traitname,simcross,scanresult,markernames[(n-1):n],sex=sex,effects=FALSE)$result.drop result$lod[n]<-z[2,3] } else{ z<-rqtl$fit(traitname,simcross,scanresult,markernames,sex=sex,effects=FALSE)$result.drop z<-as.data.frame(z) if(cim) anova==FALSE if(anova) result<-z else{ result<-data.frame(chr=rep(chr,length(map))) row.names(result)<-markernames result$pos<-map result$lod<-z$LOD class(result)<-c("scanone", "data.frame") } } return(result) } rqtl$fit<-function(trait,simcross,scanresult,markernames,sex=FALSE,family=FALSE,effects=TRUE){ # "trait" is the trait name # "simcross" is the result of "sim.geno" # "scanresult" is the result of "scanone", must include the trait # if sex=TRUE then model is fit with simcross$pheno$sex as a covariate (must be present!) # Here are the resulting coefficients from a fit, obtained using $ests$ests (4way cross) # 42.7016579 -0.9825133 -1.6964213 -2.6350667 2.6348260 2.2432244 4.2808822 -1.7441410 # The first number is the intercept, and represents the mean # when genotype=AC at all QTL (=marine genotype) # The next 3 number are the means of BC, AD, and BD, respectively, relative to that for AC at "Q1", # the first QTL in the model # The 3 after that are the means of BC, AD, and BD, relative to that for AC at "Q2", # the second QTL in the model and so on. # This routine doesn't seem to give the coefficients for the covariates # Pull out the scan results for the trait from "scanresult" if(is.element(trait,names(scanresult))) x<-scanresult[[trait]] else if(is.data.frame(scanresult)) x<-scanresult else stop("Can't find scan results for trait") # Grab the scanresult results for markernames z<-data.frame() for(i in 1:length(markernames)){ z<-rbind(z,x[which(row.names(x)==markernames[i]),]) } if(length(markernames)!=nrow(z)){ print(z) stop("Couldn't find all the markers") } # makeqtl grabs the info for the qtl from simcross zinfo<-makeqtl(simcross, chr=z$chr, pos=z$pos, qtl.name=row.names(z)) # fitqtl does the ANOVA fit. Ignore the warnings about the estimates, we can figure them out! if(family){ family.matrix<-rqtl$family.matrix(simcross) if(sex) cov.frame<-cbind.data.frame(simcross$pheno$sex,family.matrix) else cov.frame<-cbind.data.frame(family.matrix) zformula<-as.formula(paste("y~",paste(paste("Q",1:length(zinfo$chr),sep=""),collapse="+"), "+",paste(names(cov.frame),collapse="+"))) y<-fitqtl(simcross,pheno.col=trait, zinfo, get.ests=effects, formula = zformula, covar=cov.frame) y$ndimcov<-ncol(cov.frame) } else{ if(sex){ zformula<-as.formula(paste("y~",paste(paste("Q",1:length(zinfo$chr),sep=""), collapse="+"),"+simcross$pheno$sex")) y<-fitqtl(simcross,pheno.col=trait, zinfo, get.ests=effects, formula=zformula, covar=as.data.frame(simcross$pheno$sex)) y$ndimcov<-1 } else{ zformula<-as.formula(paste("y~",paste(paste("Q",1:length(zinfo$chr),sep=""), collapse="+"))) y<-fitqtl(simcross,pheno.col=trait, zinfo, get.ests=effects, formula = zformula) y$ndimcov<-0 } } # print(y) # To check that I'm still grabbing the right output when called from rqtl$fit return(y) } rqtl$make4waymap<-function(cross){ # make a 4-way map suitable for using in sim.cross, using the map in a cross map<-pull.map(cross) # length of each linkage group in map of cross x1<-unlist(lapply(map,function(x){max(x[1,])})) # number of markers on each linkage group in map of cross x2<-unlist(lapply(map,ncol)) # I can't think of another way to simulate a map without mucking up the class attribute # Note that sex.sp must be true to generate the map structure that Rqtl uses for 4-way crosses # even when sex is not a variable. z<-sim.map(include.x=FALSE,len=x1,n.mar=x2,sex.sp=TRUE) for(i in 1:length(x1)){ z[[i]][1:2,1:x2[i]]<-map[[i]][1:2,] dimnames(z[[i]])[[2]]<-unlist(dimnames(map[[i]])[2]) } return(z) } rqtl$sim4way <- function(map, chr=c(1),sites=c(1),effects=(1), n.ind=100, type="4way",BDdominance=0.5, map.function=c("haldane","kosambi","c-f","morgan"),residvar=1){ # Model must be a matrix with 5 columns: chr, pos and effects # Here I assume that "effects" is a single constant, ie there's only one qtl and one trait # The designation of model effects input to simcross is backward compared to the way effects are presented. # From the help: "For a four-way cross, the effect of a QTL is a set of three numbers, (a,b,c), where, # in the case of one QTL, the mean phenotype, conditional on the QTL genotyping being AC, BC, AD or BD, # is a, b, c or 0, respectively." So in this format , "BD" is set to zero rather than "AC". # To accommodate this, let (a,b,c) be (-effect, -effect+effect*BDdominance, -effect+effect**BDdominance) # where "BDdominance" is (heterozygote mean - AC)/(BD - AC) model<-cbind(chr,sites,-effects,-effects+effects*BDdominance,-effects+effects*BDdominance) error.prob<-0 missing.prob<-0 partial.missing.prob<-0 keep.qtlgeno<-TRUE keep.errorind<-TRUE m<-0 p<-0 z<-rqtl$simcross.new4way(map, model, n.ind, error.prob, missing.prob, partial.missing.prob, keep.errorind, m, p, map.function,residvar) return(z) } rqtl$fitpeaks<-function(traitname,lodpeaks,simcross,sex=TRUE,family=FALSE,type="4way"){ # Extracts effects for a single trait for markers listed in data frame "lodpeaks" # In a 4-way cross, the estimates from rqtl$fit are in the following order: # intercept BC1 AD1 BD1 BC2 AD2 BD2 sex # 42.7016579 -0.9825133 -1.6964213 -2.6350667 2.6348260 2.2432244 4.2808822 -1.7441410 # intercept is the intercept for all traits; all other numbers are differences relative to intercept # AC = 0, so its mean is the same as the intercept. # Effect size is BD-AC (i.e., is the size in the benthic minus the marine; AC is zero, so effect is BD) # dominance is in fractional units above AC, as the fraction of the difference toward BD of the hets. # A dominance of 0.5 that the mean of AD and BC is half way toward BD. # In an F2 cross, the estimates are in the following order # (unfortunately the covariates come before effects) # Intercept [cov] A1 D1 A2 D2 # 20.0026019 xxxxx -10.8800467 3.2280075 -1.4513286 0.1194289 # Intercept is the midpoint between the AA and BB genotypes # A1 and D1 are the additive and dominance coefficients for the 1st marker # A2 and D2 are the additive and dominance coefficients for the 2nd marker # Multiply additive effect to get "effect" as defined here # Dominance here is fraction of difference towards BB of the hets if(class(lodpeaks)[1]=="list") {x<-lodpeaks[[traitname]]} else {x<-lodpeaks} z<-rqtl$fit(traitname,simcross,x,row.names(x),sex=sex,family=family) # print(z) # "result.drop" is present if more than one factor is fit, otherwise results are in "result.full" if(!is.null(z$result.drop)){ x$fitLOD<-z$result.drop[1:nrow(x),3] x$PVE<-z$result.drop[1:nrow(x),4] } else{ x$fitLOD<-z$result.full[1:nrow(x),4] x$PVE<-z$result.full[1:nrow(x),5] } x$MSE<-rep(z$result.full[2,3],nrow(x)) x$intercept<-rep(z$est$est[1],nrow(x)) if(type=="4way"){ x$BC<-z$est$est[(seq(1:nrow(x))*3)-1] x$AD<-z$est$est[(seq(1:nrow(x))*3)] x$BD<-z$est$est[1+(seq(1:nrow(x))*3)] x$effect<- x$BD x$dominBD<- round(((x$BC+x$AD)/2)/x$BD,2) } else{ # type=="F2" x$effect<- 2*z$est$est[(seq(1:nrow(x))*2)+z$ndimcov] # double the additive effect to get "effect" x$dominBD<- z$est$est[(seq(1:nrow(x))*2)+1+z$ndimcov] x$dominBD<- 0.5+x$dominBD/x$effect } return(x) } rqtl$findlodpeaks<-function(scanresult,lodmin=3.5,markersonly=FALSE,exclude="loc"){ # Takes a list of scanone results in scanresult and picks out the qtl # The current version takes only the highest peak on each qtl, # so can't find more than one for a given trait # scanresult might be a single scanone object, in which case only one trait is analyzed, # or it might be a list of scanone objects, as when created for all traits at once # If markersonly=TRUE then pseudomarker names containing "loc" are removed if((class(scanresult)=="scanone")[1]){ z<-scanresult if(markersonly) z<-z[!chontains(exclude,rownames(z)),] z<-summary(z) z<-z[z$lod>=lodmin,] } else{ z<-lapply(scanresult,function(x,lodmin,markersonly,exclude){ z<-x if(markersonly) z<-z[!chontains(exclude,rownames(z)),] z<-summary(z) return(z[z$lod>=lodmin,]) },lodmin,markersonly,exclude) y<-lapply(z,function(z){length(unlist(z))>0}) # identifies the items with results z<-z[y==TRUE] } return(z) } rqtl$f2table<-function(cross,mname1,mname2=NULL,acode=TRUE){ x<-pull.geno(cross) x1<-x[,mname1] if(acode) x1<-recode(x1,c("NA","1","2","3","4","5","6","7","8","9","10"), c("--","ac","bc","ad","bd","a-","b-","-c","-d","ac/bd","ad/bc")) if(!is.null(mname2)){ x2<-x[,mname2] if(acode) x2<-recode(x2,c("NA","1","2","3","4","5","6","7","8","9","10"), c("--","ac","bc","ad","bd","a-","b-","-c","-d","ac/bd","ad/bc")) print(table(x1,x2,dnn=c(mname1,mname2))) } else print(table(x1,dnn=mname1)) invisible() } rqtl$hist<-function(cross,traitname,mname){ # generates a histogram that is conditional on marker genotype ("mname") library(lattice) x<-pull.geno(cross) histogram(~cross$pheno[[traitname]]|as.factor(x[,mname]),xlab=traitname, type="count",include.lowest=TRUE,right=FALSE, main=paste(deparse(substitute(cross)),mname)) } rqtl$effectplot<-function(simcross,trait,firstname,secondname=NULL){ # draws a plot of effects for one or two qtl # simcross is the output of "sim.geno" # trait is the trait # firstname is the name of the first qtl # secondname is the name of the second qtl (if present) # example call: rqtl$effectplot(family15.sim, "stdl", "stn20") # rqtl$effectplot(family15.sim, "stdl", "stn20", "idh") if(is.null(secondname)) effectplot(simcross, pheno.col=find.pheno(simcross,trait), mname1=firstname, main=trait) else effectplot(simcross, pheno.col=find.pheno(simcross,trait), mname1=firstname, mname2=secondname, main=trait) } rqtl$mqm1<-function(simcross,cross.gp,trait,lg,jlg,jposition,method="hk",plotcompare=TRUE){ # This is a function to carry out interval mapping across a chromosome "lg" # for trait "trait" when also holding qtl j on chromosome "jlg" fixed # (this could be extended to hold other positions fixed, as in MQM mapping and CIM) # "jposition" is the numerical position of the qtl j # "simcross" is a cross resulting from running "sim.geno" # "cross" is the main cross # "cross.gp" is the result of running "calc.genoprob" if(!exists("convert.scanqtl")) source("http://www.rqtl.org/multqtlfunc.R") scanoneresult<-scanone(cross.gp, pheno.col=find.pheno(simcross,trait), method=method) x.sq <- scanqtl(simcross, chr=c(lg,jlg), pos = list( c(-Inf,Inf), jposition), incl.markers=TRUE, pheno.col=find.pheno(simcross,trait) ) x.null <- scanqtl(simcross, chr=jlg, pos=list(jposition), incl.markers=TRUE, pheno.col=find.pheno(simcross,trait)) x.result <- convert.scanqtl(x.sq, x.null) if(plotcompare) plot(scanoneresult, x.result , col=c("blue", "red"), chr=lg) return(x.result) } rqtl$simcross.new4way<-function (map, model, n.ind, error.prob, missing.prob, partial.missing.prob, keep.errorind, m, p, map.function, residvar) { # Same as sim.cross.4way except we can specify the residual variance to be other than 1 # The only line changed in original code is # "pheno <- rnorm(n.ind, 0, sqrt(residvar))" instead of "pheno <- rnorm(n.ind, 0, 1)" if (map.function == "kosambi") mf <- mf.k else if (map.function == "c-f") mf <- mf.cf else if (map.function == "morgan") mf <- mf.m else mf <- mf.h if (!all(sapply(map, is.matrix))) stop("Map must be sex-specific.") n.chr <- length(map) if (is.null(model)) n.qtl <- 0 else { if (!((!is.matrix(model) && length(model) == 5) || (is.matrix(model) && ncol(model) == 5))) { stop("Model must be a matrix with 5 columns (chr, pos and effects).") } if (!is.matrix(model)) model <- rbind(model) n.qtl <- nrow(model) if (any(model[, 1] < 0 | model[, 1] > n.chr)) stop("Chromosome indicators in model matrix out of range.") model[, 2] <- model[, 2] + 1e-14 } chr.type <- sapply(map, function(a) ifelse(class(a) == "A", "A", "X")) if (n.qtl > 0) { for (i in 1:n.qtl) { temp <- map[[model[i, 1]]] temp1 <- temp[1, ] temp2 <- temp[2, ] qtlloc <- model[i, 2] if (qtlloc < min(temp1)) { temp1 <- c(qtlloc, temp1) temp2 <- min(temp2) - (min(temp1) - qtlloc)/diff(range(temp1)) * diff(range(temp2)) temp1 <- temp1 - min(temp1) temp2 <- temp2 - min(temp2) n <- c(paste("QTL", i, sep = ""), colnames(temp)) } else if (qtlloc > max(temp1)) { temp1 <- c(temp1, qtlloc) temp2 <- (qtlloc - max(temp1))/diff(range(temp1)) * diff(range(temp2)) + max(temp2) n <- c(colnames(temp), paste("QTL", i, sep = "")) } else { temp1 <- c(temp1, qtlloc) o <- order(temp1) wh <- (seq(along = temp1))[order(temp1) == length(temp1)] temp2 <- c(temp2[1:(wh - 1)], NA, temp2[-(1:(wh - 1))]) temp2[wh] <- temp2[wh - 1] + (temp1[wh] - temp1[wh - 1])/(temp1[wh + 1] - temp1[wh - 1]) * (temp2[wh + 1] - temp2[wh - 1]) temp1 <- sort(temp1) n <- c(colnames(temp), paste("QTL", i, sep = ""))[o] } map[[model[i, 1]]] <- rbind(temp1, temp2) dimnames(map[[model[i, 1]]]) <- list(NULL, n) } } geno <- vector("list", n.chr) names(geno) <- names(map) n.mar <- sapply(map, ncol) mar.names <- lapply(map, function(a) colnames(a)) for (i in 1:n.chr) { sex <- NULL if (chr.type[i] == "X") sex <- rep(0, n.ind) thedata <- sim.bcg(n.ind, map[[i]], m, p, map.function) dimnames(thedata) <- list(NULL, mar.names[[i]]) if (chr.type[i] != "X") thedata <- thedata + 2 * sim.bcg(n.ind, map[[i]], m, p, map.function) - 2 dimnames(thedata) <- list(NULL, mar.names[[i]]) geno[[i]] <- list(data = thedata, map = map[[i]]) class(geno[[i]]) <- chr.type[i] class(geno[[i]]$map) <- NULL } pheno <- rnorm(n.ind, 0, sqrt(residvar)) if (n.qtl > 0) { QTL.chr <- QTL.loc <- NULL for (i in 1:n.chr) { o <- grep("^QTL[0-9]+", mar.names[[i]]) if (length(o) > 0) { QTL.chr <- c(QTL.chr, rep(i, length(o))) QTL.loc <- c(QTL.loc, o) } } for (i in 1:n.qtl) { QTL.geno <- geno[[QTL.chr[i]]]$data[, QTL.loc[i]] pheno[QTL.geno == 1] <- pheno[QTL.geno == 1] + model[i, 3] pheno[QTL.geno == 2] <- pheno[QTL.geno == 2] + model[i, 4] pheno[QTL.geno == 3] <- pheno[QTL.geno == 3] + model[i, 5] } } n.mar <- sapply(geno, function(a) ncol(a$map)) if (error.prob > 0) { for (i in 1:n.chr) { if (chr.type[i] != "X") { a <- sample(0:3, n.mar[i] * n.ind, repl = TRUE, prob = c(1 - error.prob, rep(error.prob/3, 3))) if (any(a > 0 & geno[[i]]$data == 1)) geno[[i]]$data[a > 0 & geno[[i]]$data == 1] <- geno[[i]]$data[a > 0 & geno[[i]]$data == 1] + a[a > 0 & geno[[i]]$data == 1] if (any(a > 0 & geno[[i]]$data == 2)) geno[[i]]$data[a > 0 & geno[[i]]$data == 2] <- geno[[i]]$data[a > 0 & geno[[i]]$data == 2] + c(-1, 1, 2)[a[a > 0 & geno[[i]]$data == 2]] if (any(a > 0 & geno[[i]]$data == 3)) geno[[i]]$data[a > 0 & geno[[i]]$data == 3] <- geno[[i]]$data[a > 0 & geno[[i]]$data == 3] + c(-2, -1, 1)[a[a > 0 & geno[[i]]$data == 3]] if (any(a > 0 & geno[[i]]$data == 4)) geno[[i]]$data[a > 0 & geno[[i]]$data == 4] <- geno[[i]]$data[a > 0 & geno[[i]]$data == 4] - a[a > 0 & geno[[i]]$data == 4] } else { a <- sample(0:1, n.mar[i] * n.ind, repl = TRUE, prob = c(1 - error.prob, error.prob)) if (any(a > 0 & geno[[i]]$data == 1)) geno[[i]]$data[a > 0 & geno[[i]]$data == 1] <- geno[[i]]$data[a > 0 & geno[[i]]$data == 1] + 1 if (any(a > 0 & geno[[i]]$data == 2)) geno[[i]]$data[a > 0 & geno[[i]]$data == 2] <- geno[[i]]$data[a > 0 & geno[[i]]$data == 2] - 1 if (any(a > 0 & geno[[i]]$data == 3)) geno[[i]]$data[a > 0 & geno[[i]]$data == 3] <- geno[[i]]$data[a > 0 & geno[[i]]$data == 3] + 1 if (any(a > 0 & geno[[i]]$data == 4)) geno[[i]]$data[a > 0 & geno[[i]]$data == 4] <- geno[[i]]$data[a > 0 & geno[[i]]$data == 4] - 1 } if (keep.errorind) { errors <- matrix(0, n.ind, n.mar[i]) errors[a > 0] <- 1 colnames(errors) <- colnames(geno[[i]]$data) geno[[i]]$errors <- errors } } } if (partial.missing.prob > 0) { for (i in 1:n.chr) { if (chr.type[i] != "X") { o <- sample(c(TRUE, FALSE), n.mar[i], repl = TRUE, prob = c(partial.missing.prob, 1 - partial.missing.prob)) if (any(o)) { o2 <- grep("^QTL[0-9]+", mar.names[[i]]) if (length(o2) > 0) x <- geno[[i]]$data[, o2] m <- (1:n.mar[i])[o] for (j in m) { a <- sample(1:4, 1) if (a == 1) { geno[[i]]$data[geno[[i]]$data[, j] == 1 | geno[[i]]$data[, j] == 3, j] <- 5 geno[[i]]$data[geno[[i]]$data[, j] == 2 | geno[[i]]$data[, j] == 4, j] <- 6 } else if (a == 2) { geno[[i]]$data[geno[[i]]$data[, j] == 1 | geno[[i]]$data[, j] == 2, j] <- 7 geno[[i]]$data[geno[[i]]$data[, j] == 3 | geno[[i]]$data[, j] == 4, j] <- 8 } else if (a == 3) geno[[i]]$data[geno[[i]]$data[, j] == 2 | geno[[i]]$data[, j] == 3, j] <- 10 else geno[[i]]$data[geno[[i]]$data[, j] == 1 | geno[[i]]$data[, j] == 4, j] <- 9 } if (length(o2) > 0) geno[[i]]$data[, o2] <- x } } } } if (missing.prob > 0) { for (i in 1:n.chr) { o <- grep("^QTL[0-9]+", mar.names[[i]]) if (length(o) > 0) x <- geno[[i]]$data[, o] geno[[i]]$data[sample(c(TRUE, FALSE), n.mar[i] * n.ind, repl = TRUE, prob = c(missing.prob, 1 - missing.prob))] <- NA if (length(o) > 0) geno[[i]]$data[, o] <- x } } if (!is.null(sex)) { pheno <- cbind(pheno, sex) dimnames(pheno) <- list(NULL, c("phenotype", "sex")) } else { pheno <- cbind(pheno) dimnames(pheno) <- list(NULL, "phenotype") } pheno <- as.data.frame(pheno) cross <- list(geno = geno, pheno = pheno) class(cross) <- c("4way", "cross") cross }