#------------------------------------------------------------------------------- # combineSNPs.pl # Combine SNPs from TaqMan assays and SNPs inferred from sequences # April 13, 2016 =comment Waiver To the extent possible under law, Joseph E Neigel has waived all copyright and related or neighboring rights to combineSnps.pl. This work is published from the United States. =cut =comment Citation Cornwell et al. 2016 Chaotic genetic patchiness without sweepstakes reproduction in the shore crab Hemigrapsus oregonensis Marine Ecology Progress Series (in press) =cut #------------------------------------------------------------------------------- # GLOBAL VARIABLES $in1 = 'snpCounts.txt'; $in2 = 'seqSnpCounts.txt'; $out = 'totalSnpCounts.txt'; $freqs = 'totalSnpFrequencies.txt'; $genepop = 'totalSnpsGenepop.txt'; @locations = qw(TLM YQR SLR COB HBB NYR DF DR PB YC HB JT DN PR BR R1 R2 NC SFB EHS); @alleles = qw (A C G T); #------------------------------------------------------------------------------- # OPEN FILES open (OUT, ">$out") || die; open (FRQ, ">$freqs") || die; open (GEN, ">$genepop") || die; #------------------------------------------------------------------------------- parseCounts ($in1); parseCounts ($in2); writeTotals (); writeFrequencies (); makeLocationHash (); writeGroups (); print "finished\n"; #------------------------------------------------------------------------------- sub parseCounts { $infile = $_[0]; open (IN, $infile) || die; while ($line = ) { @fields = split(/\t/, $line); $location = $fields [0]; $locations {$location} = 1; shift (@fields); for ($i = 0; $i < 4; $i++) { # Assign counts to file-specific hash $counts {$infile} {$location} {$alleles [$i]} = $fields [$i]; # Add counts to totals if (!exists $counts {'totals'} {$location} {$alleles [$i]}) { $counts {'totals'} {$location} {$alleles [$i]} = 0; } $counts {'totals'} {$location} {$alleles [$i]} += $fields [$i]; } } } #------------------------------------------------------------------------------- sub writeTotals { foreach $allele (@alleles) { print OUT "\t$allele" } print OUT "\n"; @dataSources = ($in1, $in2, 'totals'); foreach $source (@dataSources) { print OUT "$source\n"; foreach $location (@locations) { print OUT "$location"; foreach $allele (@alleles) { printf OUT "\t%d", $counts {$source} {$location} {$allele}; } print OUT "\n"; } } } #------------------------------------------------------------------------------- sub writeFrequencies { foreach $allele (@alleles) { print FRQ "\t$allele" } print FRQ "\n"; @dataSources = ($in1, $in2, 'totals'); foreach $source (@dataSources) { print FRQ "$source\n"; foreach $location (@locations) { print FRQ "$location"; $localTotal = 0; foreach $allele (@alleles) { $localTotal += $counts {$source} {$location} {$allele}; } if ($localTotal == 0) { foreach $allele (@alleles) { printf FRQ "\t%1.4f", 0; } } else { foreach $allele (@alleles) { printf FRQ "\t%1.4f", $counts {$source} {$location} {$allele} / $localTotal; } } print FRQ "\n"; } } } #------------------------------------------------------------------------------- sub makeLocationHash { # Create hashes for main groupings: # Northern Adults (NoAd), Harbor Adults (HaAd), Harbor Larvae (HaLa), Coast Larvae (CoLa), Offshore Larvae (OfLa), Southern Adults (SoAd) =comment uncomment this section to make aggregates of samples #%NoAd = map{$_ => 'NoAd'} qw(TLM YQR SLR COB); #%HaAd = map{$_ => 'HaAd'} qw (DF DR); #%HaLa = map{$_ => 'HaLa'} qw(PB YC HB); #%CoLa = map{$_ => 'CoLa'} qw(JT DN PR); #%OfLa = map{$_ => 'OfLa'} qw(BR R1 R2); #%SoAd = map{$_ => 'SoAd'} qw(NC SFB EHS); #%locationHash = (%NoAd, %HaAd, %HaLa, %CoLa, %OfLa, %SoAd); =cut %HaAd = map{$_ => 'HaAd'} qw (DF DR); %HaLa = map{$_ => 'HaLa'} qw(PB YC HB); %BaLa = map{$_ => 'BaLa'} qw(JT DN PR BR R1 R2); %SoAd = map{$_ => 'SoAd'} qw(NC SFB EHS); %locationHash = (%HaAd, %HaLa, %BaLa, %SoAd); } #------------------------------------------------------------------------------- sub writeGroups { @groups = qw(NoAd HaAd HaLa CoLa OfLa SoAd); # COMBINE LOCATION COUNTS INTO GROUP COUNTS $source = 'totals'; foreach $location (@locations) { $group = $locationHash {$location}; foreach $allele (@alleles) { if (!exists $counts {$source} {$group} {$allele}) { $counts {$source} {$group} {$allele} = 0; } $counts {$source} {$group} {$allele} += $counts {$source} {$location} {$allele}; } } # WRITE GROUP COUNTS @groups = qw(NoAd HaAd HaLa CoLa OfLa SoAd); foreach $group (@groups) { print OUT "$group"; foreach $allele (@alleles) { printf OUT "\t%d", $counts {$source} {$group} {$allele}; } print OUT "\n"; } # WRITE GROUP FREQUENCIES foreach $group (@groups) { print FRQ "$group"; $groupTotal = 0; foreach $allele (@alleles) { $groupTotal += $counts {$source} {$group} {$allele}; } if ($groupTotal == 0) { foreach $allele (@alleles) { printf FRQ "\t%1.4f", 0; } } else { foreach $allele (@alleles) { printf FRQ "\t%1.4f", $counts {$source} {$group} {$allele} / $groupTotal; } } print FRQ "\n"; } # WRITE GENEPOP FILE # alternate groups # @groups = qw(HaAd HaLa CoLa OfLa SoAd); @groups = qw (HaAd HaLa BaLa SoAd); print GEN "Hemigrapsus oregonensis SNP data February 3, 2011\n"; print GEN "COXI\n"; $i = 1; foreach $allele (@alleles) { $alleleMap {$allele} = '0' . $i; $i++; } foreach $group (@groups) { print GEN "pop\n"; foreach $allele (@alleles) { for ($i = 0; $i < $counts {$source} {$group} {$allele}; $i++) { printf GEN "%s\t,\t%s\n", $group, $alleleMap {$allele}; } } } } #------------------------------------------------------------------------------- #-------------------------------------------------------------------------------