# -*- coding: utf-8 -*-
"""
Created on Mon Jul 7 17:17:13 2021
@author: dujidan
"""
import sys
# taxid <=> name
def taxid2name(input_taxid_list):
taxid_name_file = 'taxonomy/names.dmp'
taxid_name_dict = {}
name_taxid_dict = {}
with open(taxid_name_file) as f_tn:
tn_data_list = [i.split('\t') for i in f_tn.read().strip().split('\n')]
for line_list in tn_data_list:
taxid = line_list[0]
name = line_list[2]
unique_name = line_list[4]
name_class = line_list[6]
if name_class == 'scientific name':
taxid_name_dict[taxid] = name
# print(unique_name)
if unique_name == '':
name_taxid_dict[name] = taxid
else:
name_taxid_dict[unique_name] = taxid
# print(len(taxid_name_dict.keys()), len(name_taxid_dict.keys()))
# if type(input_taxid_list) is list:
# input_taxid_list = input_taxid_list
# else:
input_taxid_list = eval(str(input_taxid_list))
for input_taxid in input_taxid_list:
t_name = taxid_name_dict[str(input_taxid)]
print(f'{input_taxid}\t{t_name}')
if __name__ == '__main__':
if len(sys.argv) != 2:
print(f'Error:\n python3 {sys.argv[0]} \t [input_taxid_list]')
sys.exit(1)
input_taxid = sys.argv[1] # ['2683680', '844', '582', '1773', '246273', '573', '492670', '562', '29575', '346', '310769', '28901', '2058152', '470', '913107', '487', '548', '244366', '1719'] # sys.argv[1]
taxid2name(input_taxid)
相关事宜查看上一篇nodes.dmp 的文章,2者的应用对象是相互关的
因篇幅问题不能全部显示,请点此查看更多更全内容