## ***** Table 2 - mixed phenotypes assayed on the gel corresponded to mixed genotypes - Table 2 *****
## This code requires ggplot2 version 0.9.1
## This code requires mixtools version 0.4.5
## This code requires d_log_adj() function. See: mobility_of_crosses.txt
library(ggplot2)
library(mixtools)
# Data file: phenotypic_mixing_experiment.csv
# The "Rf" column contains the position sampled on the gel (relative to dye migration)
#
# The Count column: As described in the paper, tips on an 8-channel pipette were used to sample the gel after the run was finished.
# Instead of going into liquid media to do serial dilutions, the tips were poked serially 12 times on an agar plate containing a bacterial (Pp) lawn.
# The locations of the serial pokes would exhibit a plaque until the viruses were diluted in successive pokes. Thus, a count of 1 represents the most dilute
# detectable concentration and 12 most concentrated. The dilution factor of this method is approximately XX, meaning that a count of 1 represents a XX-fold
# dilution and 12 represents an XX fold dilution. **The function d_log(), see below, is used to transform these counts to reflect their logarithmic scale.
#
# For the phenotypic mixing experiments SSxFF, PT309xFF (i.e. phiH2xFF), CA65axFF, KRI300xFF are identical to the corresponding data in 'mobility_of_crosses.txt'
# Since these distributions are derived from single burst lysates they are designated SBL
# SSxFF2, PT309xFF2, CA65axFF2, KRI300xFF2 represent distributions generated from lysate derived from single plaques harvested from the SBL,
# allowing an opportunity for clones to express their genotypes without other strains in the cell. These are designated SP for single plaques
#
#
# The remaining columns are self explanatory
# Import Data from .csv file
data <- read.csv("mobility_of_crosses.csv")
# Make data columns available to R
# Creates a data frame for each cross with the same structure as the .csv file. They are named SSxFF, PT309xFF, CA65axFF, KRI300xFF and so on, for SBL
# and SSxFF2, PT309xFF2, CA65axFF2, KRI300xFF2 for SP's
all_splits <- split(data, data$Name)
attach(all_splits)
# Example of how to generate data in Table 2 using CA65axFF. Straightforward to implement with other strains
# Create vector of Rf values as in mobility_of_crosses.txt
d_CA65axFF <- d_log_adj(CA65axFF)
d_CA65axFF2 <- d_log_adj(CA65axFF2)
# Generate mixture model for SBL
results_sbl <- normalmixEM(d_CA65axFF, k=3, mean.constr = c(0.7369916, NA, 1.463648), epsilon=1e-2, maxit=400)
number of iterations= 58
summary(results_sbl)
# The output used for the published paper is replicated here because the results are simulation based and may differ slightly
# summary of normalmixEM object:
# comp 1 comp 2 comp 3
# lambda 0.2023357 0.336499 0.461166
# mu 0.7369916 0.997316 1.463648
# sigma 0.0145774 0.400616 0.258172
# loglik at estimate: -76.08733
# Then generate mixture model for SP treatment.
results_sp <- normalmixEM(d_CA65axFF2, k=3, mean.constr = c(0.7369916, NA, 1.463648), epsilon=1e-2, maxit=400)
number of iterations= 21
summary(results_sp)
# The output used for the published paper is replicated here because the results are simulation based and may differ slightly
# summary of normalmixEM object:
# comp 1 comp 2 comp 3
# lambda 0.521056 0.3579507 0.120994
# mu 0.736992 1.2100603 1.463648
# sigma 0.300062 0.0463425 0.204505
# loglik at estimate: -30.88868
# Note that the estimated hybrid component is virtually identical for SBL (0.34) and SP (0.36) treatments
# Now to compare the means of the main components, simulate norm data based on the mean, sd and proportion (N) of the main component(s)
sbl <- mvrnorm(n = results_sbl$lambda[2]*length(results_sbl$x), results_sbl$mu[2], results_sbl$sigma[2], tol = 1e-6, empirical = TRUE)
sp <- mvrnorm(n = results_sp$lambda[2]*length(results_sp$x), results_sp$mu[2], results_sp$sigma[2], tol = 1e-6, empirical = TRUE)
#Compare means, but first test whether we have equal variances to conduct Student's t or Welch's test. Normal testing not necessary because simulated from the normal above
var.test(sbl, sp)
# F test to compare two variances
# data: sbl and sp
# F = 8.6447, num df = 103, denom df = 63, p-value = 2.22e-16
# alternative hypothesis: true ratio of variances is not equal to 1
# 95 percent confidence interval:
# 5.463419 13.349922
# sample estimates:
# ratio of variances
# 8.644696
# Variances not equal so Welch's test
t.test(sp, sbl, conf.int=T, var.equal=F)
# Welch Two Sample t-test
#
# data: sp and sbl
# t = 3.1449, df = 137.424, p-value = 0.002037
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
# 0.07897963 0.34650876
# sample estimates:
# mean of x mean of y
# 1.2100603 0.9973161
# In this case the estimated means were significantly different, but still intermediate to the parentals and, importantly, the proportion of hybrids in the mixture is the same.
#If you made it this far, let me know how this data package was useful for you. Very interested in how people will use open data and open code. Please contact me at the following email address:
me ((at)) samdiazmunoz.org
-Samuel Díaz-Muñoz