Skip to content

Commit

Permalink
Fixing bug when parsing new dataset schema
Browse files Browse the repository at this point in the history
  • Loading branch information
r78v10a07 committed Nov 2, 2022
1 parent a585e00 commit f386138
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/gtax/gtax_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,18 @@ def filter_metadata_zip():
'{}/ncbi_dataset/data/assembly_data_report.jsonl'.format(db), 'w') as fjson_out:
for line in fjson.readlines():
d = json.loads(line.decode("utf-8"))
v = assemblies_tmp.setdefault(d['taxId'], [])
v = assemblies_tmp.setdefault(d['organism']['taxId'], [])
v.append(d)
for s in assemblies_tmp.keys():
rep_genome = []
for e in assemblies_tmp[s]:
if 'refseqCategory' in e['assemblyInfo']:
rep_genome.append(e)
if len(rep_genome) == 1:
assemblies.append(rep_genome[0]['assemblyInfo']['assemblyAccession'])
assemblies.append(rep_genome[0]['accession'])
fjson_out.write('{}\n'.format(json.dumps(rep_genome[0])))
else:
assemblies.append(assemblies_tmp[s][0]['assemblyInfo']['assemblyAccession'])
assemblies.append(assemblies_tmp[s][0]['accession'])
fjson_out.write('{}\n'.format(json.dumps(assemblies_tmp[s][0])))

print('There are {} assemblies included'.format(len(assemblies)))
Expand Down

0 comments on commit f386138

Please sign in to comment.