library(adegenet); library(hierfstat) setwd('/Users/Fre/SCIENCE/Dropbox/Papers/2013/6_Inbreeding') ## Read in data and convert to other packages ## ------------------------------------------ genotypes <- read.table('1_GenotypeData.txt',header = T) cerc.geno <- genotypes[c(1:16),] mira.geno <- genotypes[-c(1:16),] genind.cerc <- df2genind(X=cerc.geno[,-c(1:4)],ncode=6,ind.names=cerc.geno$Par,pop=as.character(cerc.geno$Village)) genind.host <- df2genind(X=mira.geno[,-c(1:4)],ncode = 6, ind.names=mira.geno$Par,pop=as.character(mira.geno$Host)) genind.village <- df2genind(X=mira.geno[,-c(1:4)],ncode=6,ind.names=mira.geno$Par,pop=as.character(mira.geno$Village)) # 1. Replace individual names by generic labels indNames(genind.cerc) <- c(1:length(indNames(genind.cerc))) indNames(genind.host) <- c(1:length(indNames(genind.host))) indNames(genind.village) <- c(1:length(indNames(genind.village))) # 2. Create hierfstat object hierf.host <- genind2hierfstat(genind.host) hierf.village <- genind2hierfstat(genind.village) hierf.cerc <- genind2hierfstat(genind.cerc) # 3. Create structure file structure <- genind2df(genind.host, oneColPerAll=T) structure[structure == "NA"] <- 0 popnumber <- gsub('P','',genind.host$pop) structure <- cbind(c(1:length(indNames(genind.host))),as.numeric(popnumber),structure[,-1]) write.table(structure, file = 'StructureInbreeding.txt', eol = '\r\n',quote= F, append = T, col.names= F, row.names = F) ## Number of Alleles and Allelic Richness ## -------------------------------------- # 1. Number of alleles per locus sum.host <- summary(genind.host) mean(sum.host$loc.nall) # 2. AR per HOST AR.host <- allelic.richness(hierf.host) ARhost.min <- AR.host$min.all AR <- AR.host$Ar AR[AR>10] <- NA meanAR <- colMeans(AR,na.rm=T) names(meanAR) <- genind.host$pop.names meanAR <- as.data.frame(meanAR) meanAR <- cbind(sum.host$pop.eff, meanAR) write.csv2(meanAR, file = 'AR_host.csv') # AR per VILLAGE AR.village <- allelic.richness(hierf.village, min.n=ARhost.min) MAR.village <- colMeans(AR.village$Ar) write.csv2(MAR.village, file = 'ARvillage.csv') # AR cerc AR.cerc <- allelic.richness(hierf.cerc,min.n=ARhost.min) MAR.cerc <- colMeans(AR.cerc$Ar) ## Hierarchical analysis of genetic diversity ## ------------------------------------------ Village <- as.numeric(gsub('P','',genind.village$pop)) Host <- hierf.host[,1] Loci <- hierf.host[,-1] # Multilocus estimator varcomp.glob(data.frame(Village,Host),Loci) test.between(Loci[,-c(2,9)], test.lev = Village, rand.unit = Host, nperm = 1000) test.within(Loci[,-c(2,9)], within = Village, test.lev = Host, nperm = 1000) bootstrap <- boot.vc(data.frame(Village,Host),Loci,nboot=10000) bootstrap$ci # Multilocus estimator recoded following Hedrick 2005 and Meirmans 2006 hostRecoded <- read.fstat('HostRecoded.dat') villageRecoded <- read.fstat('VillageRecoded.dat') varcomp.glob(data.frame(Village),villageRecoded[,-1]) varcomp.glob(hostRecoded[1,],hostRecoded[,-1]) # single locus estimators L46951 <- varcomp(data.frame(Village,Host,hierf.host[,2]),diploid=TRUE) ca11.1 <- varcomp(data.frame(Village,Host,hierf.host[,3]),diploid=TRUE) s91 <- varcomp(data.frame(Village,Host,hierf.host[,4]),diploid=TRUE) smd11 <- varcomp(data.frame(Village,Host,hierf.host[,5]),diploid=TRUE) smd25 <- varcomp(data.frame(Village,Host,hierf.host[,6]),diploid=TRUE) smd28 <- varcomp(data.frame(Village,Host,hierf.host[,7]),diploid=TRUE) smd43 <- varcomp(data.frame(Village,Host,hierf.host[,8]),diploid=TRUE) smd89 <- varcomp(data.frame(Village,Host,hierf.host[,9]),diploid=TRUE) smda28 <- varcomp(data.frame(Village,Host,hierf.host[,10]),diploid=TRUE) test.between(data.frame(hierf.host[,2]),rand.unit= Host,test= Village,nperm = 10000) test.within(data.frame(hierf.host[,2]),test= Host, within = Village,nperm = 10000) # Temporal differences in allele frequencies par.Pakh <- which(genotypes$Village=='Pakh',arr.ind=T) time.Pakh <- as.numeric(genotypes$Time[par.Pakh]) host.Pakh <- hierf.host[par.Pakh,1] loci.Pakh <- hierf.host[par.Pakh,-1] Pakh <- data.frame(time.Pakh,host.Pakh,loci.Pakh) Pakh <- Pakh[order(time.Pakh,host.Pakh),] varcomp.glob(data.frame(Pakh$time.Pakh,Pakh$host.Pakh),Pakh[,-c(1,2)]) test.between(data=Pakh[,-c(1,2,4,11)],test.lev=Pakh$time.Pakh,rand.unit=Pakh$host.Pakh, nperm = 10000) boot.Pakh <- boot.vc(data.frame(Pakh$time.Pakh,Pakh$host.Pakh),Pakh[,-c(1,2)],nboot=1000) boot.Pakh$ci par.Ndieumeul <- which(genotypes$Village=='Ndieumeul',arr.ind=T) time.Ndieumeul <- as.numeric(genotypes$Time[par.Ndieumeul]) host.Ndieumeul <- hierf.host[par.Ndieumeul,1] loci.Ndieumeul <- hierf.host[par.Ndieumeul,-1] Ndieumeul <- data.frame(time.Ndieumeul,host.Ndieumeul,loci.Ndieumeul) Ndieumeul <- Ndieumeul[order(time.Ndieumeul,host.Ndieumeul),] varcomp.glob(data.frame(Ndieumeul$time.Ndieumeul,Ndieumeul$host.Ndieumeul),Ndieumeul[,-c(1,2)]) test.between(data=Ndieumeul[,-c(1,2,4,11)],test.lev=Ndieumeul$time.Ndieumeul,rand.unit=Ndieumeul$host.Ndieumeul, nperm = 10000) boot.Ndieum <- boot.vc(data.frame(Ndieumeul$time.Ndieumeul,Ndieumeul$host.Ndieumeul),Ndieumeul[,-c(1,2)],nboot=1000) boot.Ndieum$ci ## Testing for Inbreeding ## ---------------------- ## Function to assess MLH for genind files MultiLocHeteroz <- function(data) { locnames <- names(data$loc.names) locnumber <- c(1:length(locnames)) Nloci <- length(locnumber) Nindividuals <- length(data$ind.names) Npop <- length(data$pop.names) # Create a empty matrix for number individuals and loci MLH <- matrix(rep(NA,Nindividuals*Nloci), nrow = Nindividuals, ncol = Nloci) for (locus in locnumber) { #1. Seperate genind file into genind file per locus perloc <- data[loc=locnames[locus]] #2. Convert to dataframe with 1 column per allele peral <- genind2df(x=perloc,oneColPerAll=T) peral[peral=='NA'] <- 000 #3. Recode per individual whether locus is NA (0) homozygote (1) or heterozygote (2) for (ind in c(1:Nindividuals)) { if (peral[ind,2] == 0) { x <- 0 } else { if (peral[ind,2] == peral[ind,3]) { x <- 1 } else x <- 2 } MLH[ind,locus] <- x } } # Number of successfully amplified loci per individual SucLoc <- rep(NA,Nindividuals) for (ind in c(1:Nindividuals)) { SucLoc[ind] <- Nloci-sum(MLH[ind,] == 0) } # Number of heterozygous loci per individual HetLoc <- rep(NA,Nindividuals) for (ind in c(1:Nindividuals)) { HetLoc[ind] <- sum(MLH[ind,] == 2) } # MLH per individual MLH <- HetLoc/SucLoc # average MLH for all individuals MLHaverage <- mean(MLH) # Average MLH within each population MLH <- cbind(data$pop,MLH) MLH.av <- rep(NA,Npop) for (pop in c(1:Npop)) { MLH.av[pop] <- mean(MLH[MLH[,1]==pop,2]) } # Standardized estimate of MLH sMLH <- MLH.av/MLHaverage # Proportion of individuals per population in each MLH class (5 classes in total) MLH.perclass <- matrix(rep(NA, Npop*5), nrow = Npop, ncol = 5) for (pop in c(1:Npop)) { tmp <- MLH[MLH[,1]==pop,2] oneclass <- sum(tmp < 0.2) twoclass <- sum(tmp > 0.2 & tmp < 0.4) threeclass <- sum(tmp > 0.4 & tmp < 0.6) fourclass <- sum(tmp > 0.6 & tmp < 0.8) fiveclass <- sum(tmp > 0.8) x <- c(oneclass, twoclass, threeclass, fourclass, fiveclass) x <- x/sum(x) MLH.perclass[pop,] <- x } final.MLH <- cbind(MLH.av,sMLH,MLH.perclass) colnames(final.MLH) <- c('Average MLH per Pop', 'Standardised MLH', 'MLHclass1','MLHclass2','MLHclass3','MLHclass4', 'MLHclass5') rownames(final.MLH) <- levels(factor(data$pop)) return(final.MLH) } ## Observed distribution of MLH classes obs.mlh <- MultiLocHeteroz(genind.host) popnames <- levels(factor(genind.host$pop)) # popnames <- levels(factor(genind.cerc$pop)) # obs.mlh.cerc <- MultiLocHeteroz(genind.cerc) ## Expected distribution of MLH classes # First permutate alleles within populations and create 1000 new gtx datasets in GENETIX # Read in all datasets and calculate mlh per dataset setwd("/Users/Fre/Dropbox/SCIENCE/Papers/2013/5_Schisto_Inbreeding/3_HeredityV2/FINAL/GENETIX/PermutationsAlleleCerc/") filenames <- list.files(path=getwd()) numfiles <- length(filenames) for (genetix in c(1:numfiles)) { gtx <- read.genetix(filenames[genetix],quiet=T) gtx.mlh <- MultiLocHeteroz(gtx) final <- cbind(genetix,popnames,gtx.mlh) write.table(final, file = 'expectedMLH.txt', quote= F, append = T, col.names= F, row.names = F) } # Mean Expected distribution of each class and the average MLH per pop mlh.exp <- read.table('expectedMLH.txt', header = F) MeanExp <- matrix(rep(NA,length(popnames)*7), nrow = length(popnames), ncol = 7) rownames(MeanExp) <- popnames colnames(MeanExp) <- c('average','standardised', 'MLH.cl1','MLH.cl2','MLH.cl3','MLH.cl4','MLH.cl5') for (pop in popnames) { exp.per.pop <- mlh.exp[mlh.exp[,2] == pop,] MeanExp[pop,] <- colMeans(exp.per.pop[,-c(1,2)]) } # Obtain p values PvalMLH <- matrix(rep(NA,length(popnames)*7), nrow = length(popnames), ncol = 7) rownames(PvalMLH) <- popnames colnames(PvalMLH) <- c('average','standardised','MLH.cl1','MLH.cl2','MLH.cl3','MLH.cl4','MLH.cl5') for (pop in popnames) { exp.per.pop <- mlh.exp[mlh.exp[,2] == pop,] # H1: average observed MLH is smaller than average expected MLH under random mating p1 <- sum(exp.per.pop[,3] < obs.mlh[pop,1])/length(exp.per.pop[,3]) p1b <- sum(exp.per.pop[,4] < obs.mlh[pop,2])/length(exp.per.pop[,4]) # H1: proportion of individuals in lowest MLH class is larger than expected under random mating p2 <- sum(exp.per.pop[,5] > obs.mlh[pop,3])/length(exp.per.pop[,5]) p3 <- sum(exp.per.pop[,6] > obs.mlh[pop,4])/length(exp.per.pop[,6]) p4 <- sum(exp.per.pop[,7] > obs.mlh[pop,5])/length(exp.per.pop[,7]) p5 <- sum(exp.per.pop[,8] > obs.mlh[pop,6])/length(exp.per.pop[,8]) p6 <- sum(exp.per.pop[,9] > obs.mlh[pop,7])/length(exp.per.pop[,9]) PvalMLH[pop,] <- cbind(p1,p1b,p2,p3,p4,p5,p6) } # Homozygote excess or heterozygote deficiency: MLHobs - MLHexp df_obsexp <- (obs.mlh-MeanExp)/MeanExp # Correlation of frequency of MLH class with Fis cor.test(df_obsexp[,2],host$Fis) plot(df_obsexp[,2],host$Fis, xlab = '(MLHobs - MLHexp) / MLHexp', ylab = 'Fis', pch = 16) abline(lm(host$Fis~df_obsexp[,2])) #text(locator(1), 'p < 0.001; r = -0.99') ## Tests for family structure ## -------------------------- setwd('/Users/Fre/Dropbox/SCIENCE/Papers/2013/5_Schisto_Inbreeding/3_HeredityV2/FINAL/GENETIX/PermutationsGenotypeIdentix/') PvalREL <- data.frame(rep(NA,length(popnames)*1)) popnames <- levels(factor(genind.host$pop)) rownames(PvalREL) <- popnames for (pop in popnames) { exp.rel <- read.table(paste(pop,'.txt',sep=''), skip = 6) obs.rel <- read.table(paste(pop,'.txt',sep=''), skip = 4, nrows=1) exp.rel <- as.numeric(unlist(exp.rel)) obs.rel <- as.numeric(obs.rel[1]) PvalREL[pop,] <- sum(exp.rel > obs.rel)/length(exp.rel) } ## Final table with all p values for family structure and inbreeding tests ## ----------------------------------------------------------------------- # Table1 with all classes head(PvalREL); head(PvalMLH) pvaltable <- cbind(PvalREL,PvalMLH) colnames(pvaltable)[1] <- 'Relatedness' write.csv2(pvaltable, file = 'PvalueTable.csv') ## Table2 for paper FinalTable <- matrix(rep(NA,length(popnames)*4), nrow = length(popnames), ncol = 4) rownames(FinalTable) <- popnames colnames(FinalTable) <- c('Rel<', 'Rel>','MLH<','MLH>') for (pop in popnames) { exp.rel <- read.table(paste('PermutRelat',pop,'.txt',sep=''), skip = 6) obs.rel <- read.table(paste('PermutRelat',pop,'.txt',sep=''), skip = 4, nrows=1) exp.rel <- as.numeric(unlist(exp.rel)) obs.rel <- as.numeric(obs.rel[1]) prelsmaller <- sum(exp.rel < obs.rel)/length(exp.rel) prellarger <- sum(exp.rel > obs.rel)/length(exp.rel) exp.per.pop <- mlh.exp[mlh.exp[,2] == pop,] pmlhsmaller <- sum(exp.per.pop[,3] < obs.mlh[pop,1])/length(exp.per.pop[,3]) pmlhlarger <- sum(exp.per.pop[,3] > obs.mlh[pop,1])/length(exp.per.pop[,3]) FinalTable[pop,] <- cbind(prelsmaller,prellarger,pmlhsmaller,pmlhlarger) } Table <- cbind(host$Fis,FinalTable) write.csv2(Table,file='Table3.csv2') ## PLOTS ## ----- par(mfrow=c(2,3)) plot(Age,Fis, pch = 16, ylim = c(-0.2,0.8), xlab = 'Host Age', ylab = 'Parasite Inbreeding') abline(lm(Fis~Age)) #cor.fis <- cor.test(Age,Fis) #legend('topright', paste('r = ', round(cor.fis$estimate, digits = 2), '; p = ', round(cor.fis$p.value,digits = 2))) plot(Age[which(host$Gender == 1)], Fis[which(host$Gender == 1)], pch = 16, ylim = c(-0.2,0.8), xlab = 'Host Age', ylab = 'Parasite Inbreeding') points(Age[which(host$Gender == 2)], Fis[which(host$Gender == 2)], pch = 1, ylim = c(-0.2,0.8)) abline(lm(Fis[which(host$Gender == 1)] ~ Age[which(host$Gender == 1)])) abline(lm(Fis[which(host$Gender == 2)] ~ Age[which(host$Gender == 2)]), lty = 2) legend("topright", lty = c(1,2), pch = c(16,1),c('Male','Female')) # malecor <- cor.test(Age[which(host$Gender == 1)], Fis[which(host$Gender == 1)]) # femalecor <- cor.test(Age[which(host$Gender == 2)], Fis[which(host$Gender == 2)]) # legend("topright", lty = c(1,2), pch = c(16,1), # c(paste('Male; ', 'r = ', round(malecor$estimate, digits = 2), '; p = ', round(malecor$p.value,digits = 2), sep = ''), # paste('Female; ', 'r = ', round(femalecor$estimate, digits = 2), '; p = ', round(femalecor$p.value,digits = 2), sep = '') # )) plot(Age[which(host$Village == 'Diokhor')], Fis[which(host$Village == 'Diokhor')], pch = 16, ylim = c(-0.2,0.8), xlab = 'Host Age', ylab = 'Parasite Inbreeding') points(Age[which(host$Village == 'Pakh')], Fis[which(host$Village == 'Pakh')], pch = 1, ylim = c(-0.2,0.8)) points(Age[which(host$Village == 'Ndieumeul')], Fis[which(host$Village == 'Ndieumeul')], pch = 2, ylim = c(-0.2,0.8)) abline(lm(Fis[which(host$Village == 'Diokhor')] ~ Age[which(host$Village == 'Diokhor')])) abline(lm(Fis[which(host$Village == 'Pakh')] ~ Age[which(host$Village == 'Pakh')]), lty = 2) abline(lm(Fis[which(host$Village == 'Ndieumeul')] ~ Age[which(host$Village == 'Ndieumeul')]), lty = 4) #DIOKHcor <- cor.test(Age[which(host$Village == 'Diokhor')], Fis[which(host$Village == 'Diokhor')]) #PAKHcor <- cor.test(Age[which(host$Village == 'Pakh')], Fis[which(host$Village == 'Pakh')]) #NDIEUMcor <- cor.test(Age[which(host$Village == 'Ndieumeul')], Fis[which(host$Village == 'Ndieumeul')]) legend("topright", lty = c(1,2,4),pch = c(16,1,2),c('Diokhor','Pakh','Ndieumeul')) # c(paste('Diokhor; ', 'r = ', round(DIOKHcor$estimate, digits = 2), '; p = ', round(DIOKHcor$p.value,digits = 2), sep = ''), # paste('Pakh; ', 'r = ', round(PAKHcor$estimate, digits = 2), '; p = ', round(PAKHcor$p.value,digits = 2), sep = ''), # paste('Ndieumeul; ', 'r = ', round(NDIEUMcor$estimate, digits = 2), '; p = ', round(NDIEUMcor$p.value,digits = 2), sep = '') # )) plot(Age,Hobs, pch = 16, ylim = c(-0.2,0.8), xlab = 'Host Age', ylab = 'Parasite Heterozygosity') abline(lm(Hobs~Age)) #cor.Hobs <- cor.test(Age,Hobs) #legend('topright', paste('r = ', round(cor.Hobs$estimate, digits = 2), '; p = ', round(cor.Hobs$p.value,digits = 2))) plot(Age[which(host$Gender == 1)], Hobs[which(host$Gender == 1)], pch = 16, ylim = c(-0.2,0.8), xlab = 'Host Age', ylab = 'Parasite Heterozygosity') points(Age[which(host$Gender == 2)], Hobs[which(host$Gender == 2)], pch = 1, ylim = c(-0.2,0.8)) abline(lm(Hobs[which(host$Gender == 1)] ~ Age[which(host$Gender == 1)])) abline(lm(Hobs[which(host$Gender == 2)] ~ Age[which(host$Gender == 2)]), lty = 2) legend("bottomright", lty = c(1,2), pch = c(16,1),c('Male','Female')) # malecor <- cor.test(Age[which(host$Gender == 1)], Hobs[which(host$Gender == 1)]) # femalecor <- cor.test(Age[which(host$Gender == 2)], Hobs[which(host$Gender == 2)]) # legend("bottomright", lty = c(1,2), pch = c(16,1), # c(paste('Male; ', 'r = ', round(malecor$estimate, digits = 2), '; p = ', round(malecor$p.value,digits = 2), sep = ''), # paste('Female; ', 'r = ', round(femalecor$estimate, digits = 2), '; p = ', round(femalecor$p.value,digits = 2), sep = '') # )) plot(Age[which(host$Village == 'Diokhor')], Hobs[which(host$Village == 'Diokhor')], pch = 16, ylim = c(-0.2,0.8), xlab = 'Host Age', ylab = 'Parasite Heterozygosity') points(Age[which(host$Village == 'Pakh')], Hobs[which(host$Village == 'Pakh')], pch = 1, ylim = c(-0.2,0.8)) points(Age[which(host$Village == 'Ndieumeul')], Hobs[which(host$Village == 'Ndieumeul')], pch = 2, ylim = c(-0.2,0.8)) abline(lm(Hobs[which(host$Village == 'Diokhor')] ~ Age[which(host$Village == 'Diokhor')])) abline(lm(Hobs[which(host$Village == 'Pakh')] ~ Age[which(host$Village == 'Pakh')]), lty = 2) abline(lm(Hobs[which(host$Village == 'Ndieumeul')] ~ Age[which(host$Village == 'Ndieumeul')]), lty = 4) #DIOKHcor <- cor.test(Age[which(host$Village == 'Diokhor')], Hobs[which(host$Village == 'Diokhor')]) #PAKHcor <- cor.test(Age[which(host$Village == 'Pakh')], Hobs[which(host$Village == 'Pakh')]) #NDIEUMcor <- cor.test(Age[which(host$Village == 'Ndieumeul')], Hobs[which(host$Village == 'Ndieumeul')]) legend("bottomright", lty = c(1,2,4),pch = c(16,1,2), c('Diokhor','Pakh','Ndieumeul')) # c(paste('Diokhor; ', 'r = ', round(DIOKHcor$estimate, digits = 2), '; p = ', round(DIOKHcor$p.value,digits = 2), sep = ''), # paste('Pakh; ', 'r = ', round(PAKHcor$estimate, digits = 2), '; p = ', round(PAKHcor$p.value,digits = 2), sep = ''), # paste('Ndieumeul; ', 'r = ', round(NDIEUMcor$estimate, digits = 2), '; p = ', round(NDIEUMcor$p.value,digits = 2), sep = '') # ))