import sys if len(sys.argv) != 4: print """ python python_tax_code.py nodes.dmp names.dmp inputfile.txt """ quit() def get_taxonomy(taxon): if taxon == "1": return "unknown" if taxon in parent: tmp_parent = parent[taxon] # print tmp_parent tmp_rank = rank[taxon] # print tmp_rank if tmp_rank == "phylum": # print names[tmp_parent] return names[tmp_parent] else: return get_taxonomy(tmp_parent) else: return "Parent not found" f = open(sys.argv[1], "r") parent={} rank={} names={} for lines in f: values = lines.split("|") parent[values[0].strip()] = values[1].strip() rank[values[0].strip()] = values[2].strip() f.close() g = open(sys.argv[2], "r") for lines in g: values = lines.split("|") if values[3].strip() == "scientific name": names[values[0].strip()] = values[1].strip() g.close() h = open(sys.argv[3], "r") for lines in h: values = lines.split("\t") taxonom = get_taxonomy(values[1].strip()) print values[0].strip(), taxonom