# v2:: uses input from snpEff # second input: IM.all.genotypes.txt instead of IM.genomat.all.txt # v3:: analysis in windows windowsize = 500 # snps per window bad_threshold=0.051 inputIM = "IM.all.sorted.txt" # or IM.genomat.all.txt import sys # scaffold_1 current_file2 = open("window.key.txt", 'w') lastvals={} names={} good_bad={} src = open("samples.187.txt", "rU") # for line_idx, line in enumerate(src): cols = line.replace('\n', '').split('\t') # 9 109 0 names[int(cols[0])-9]=cols[1] good_bad[int(cols[0])-9]=[0,0] lastvals[int(cols[0])-9]=[0,0] src.close() print names print good_bad crel=0 relevantloci={} in1= open("MinorAlleles.in.IM.txt", "rU") # IM.GWAS.polymorphisms.scaffold_1.txt for line_idx, line in enumerate(in1): cols = line.replace('\n', '').split('\t') # scaffold_17 911 C A 0.965986394558 # scaffold_17 1287 G A 0.993506493506 # scaffold_17 1840 C G 1.0 crel+=1 if float(cols[4])(1.0-bad_threshold): relevantloci[cols[0]+"_"+cols[1]+"_"+cols[2]+"_"+cols[3]]= 1 # alt allele is rare print "found in MinorAlleles ",crel crel=0 cc=0 windowID=1 src = open(inputIM, "rU") # for line_idx, line in enumerate(src): cols = line.replace('\n', '').split('\t') # scaffold_17 1912 A C 32632.54 AC=335;AF=0.825;AN=406;BaseQRankSum=0.145;DP=1302;Dels=0.00;FS=5.281;HaplotypeScore=0.2555;InbreedingCoeff=0.3806;MLEAC=335;MLEAF=0.825;MQ=58.04;MQ0=6;MQRankSum=3.898;QD=28.55;ReadPosRankSum=6.286 2 0 2 0 2 2 0 2 0 2 2 0 2 2 2 0 2 2 2 2 ... if line_idx==0: winst = [cols[0],cols[1]] try: bad = relevantloci[ cols[0]+"_"+cols[1]+"_"+cols[2]+"_"+cols[3] ] if cc==windowsize: current_file2.write(str(windowID)+'\t'+winst[0]+'\t'+winst[1]+'\t'+cols[0]+'\t'+cols[1]+'\n') for j in range(187): out1=open(names[j]+".minors.txt","a") if good_bad[j][0]+good_bad[j][1]>0: out1.write(str(windowID)+'\t'+str( good_bad[j][0] )+'\t'+str( good_bad[j][1] )+'\t'+str( float(good_bad[j][1])/float(good_bad[j][0]+good_bad[j][1]) ) +'\n') else: out1.write(str(windowID)+'\t'+str( lastvals[j][0] )+'\t'+str( lastvals[j][1] )+'\t'+str( float(lastvals[j][1])/float(lastvals[j][0]+lastvals[j][1]) ) +'\n') out1.close() cc=0 print windowID,winst windowID+=1 winst = [cols[0],cols[1]] for j in range(187): lastvals[j]=[good_bad[j][0],good_bad[j][1]] good_bad[j]=[0,0] for j in range(187): geno=int(cols[j+6]) if (geno==0 and bad==0) or (geno==2 and bad==1): good_bad[j][1]+=1 elif (geno==0 and bad==1) or (geno==2 and bad==0): good_bad[j][0]+=1 # common count cc+=1 except KeyError: # not a rare allele snp pass src.close() # end window for j in range(187): out1=open(names[j]+".minors.txt","a") if good_bad[j][0]+good_bad[j][1]>0: out1.write(str(windowID)+'\t'+str( good_bad[j][0] )+'\t'+str( good_bad[j][1] )+'\t'+str( float(good_bad[j][1])/float(good_bad[j][0]+good_bad[j][1]) ) +'\n') else: out1.write(str(windowID)+'\t'+str( lastvals[j][0] )+'\t'+str( lastvals[j][1] )+'\t'+str( float(lastvals[j][1])/float(lastvals[j][0]+lastvals[j][1]) ) +'\n') out1.close()