|
| 1 | +#!/usr/bin/python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | + |
| 4 | +import click |
| 5 | +from pymongo import MongoClient |
| 6 | + |
| 7 | +CASES_WITH_DIA = { |
| 8 | + "diagnosis_phenotypes": {"$exists": True, "$ne": []} |
| 9 | +} # MongoDB query to locate cases with any diagnosis |
| 10 | + |
| 11 | +SELECT_FIELDS = { |
| 12 | + "owner": 1, |
| 13 | + "display_name": 1, |
| 14 | + "diagnosis_phenotypes": 1, |
| 15 | +} # select only a few important fields using the query above |
| 16 | + |
| 17 | + |
| 18 | +@click.command() |
| 19 | +@click.option("--db-uri", required=True, help="mongodb://user:password@db_url:db_port") |
| 20 | +@click.option("--db-name", required=True, help="db name") |
| 21 | +@click.option("--fix", help="Use this flag to fix the OMIM format in old cases", is_flag=True) |
| 22 | +def omim_case_fix_format(db_uri, db_name, fix): |
| 23 | + try: |
| 24 | + client = MongoClient(db_uri) |
| 25 | + db = client[db_name] |
| 26 | + # test connection |
| 27 | + click.echo("database connection info:{}".format(db)) |
| 28 | + |
| 29 | + cases_with_dia = list(db.case.find(CASES_WITH_DIA, SELECT_FIELDS)) |
| 30 | + click.echo(f"Total number of cases with diagnosis:{len(cases_with_dia)}") |
| 31 | + |
| 32 | + # Display cases with old format of diagnosis (a list of integers) |
| 33 | + cases_with_old_dia = [ |
| 34 | + case for case in cases_with_dia if isinstance(case["diagnosis_phenotypes"][0], int) |
| 35 | + ] |
| 36 | + click.echo(f"Total number of cases with old diagnosis format:{len(cases_with_old_dia)}") |
| 37 | + |
| 38 | + for i, case in enumerate(cases_with_old_dia): |
| 39 | + click.echo(f"n:{i}\t{case['owner']}\t{case['display_name']}") |
| 40 | + old_dia = case["diagnosis_phenotypes"] |
| 41 | + new_dia = [] |
| 42 | + |
| 43 | + for dia_nr in old_dia: |
| 44 | + disease_term = db.disease_term.find_one({"disease_nr": dia_nr}) |
| 45 | + if disease_term is None: |
| 46 | + click.echo(f"Could not find a disease term with id:{dia_nr}") |
| 47 | + continue |
| 48 | + new_dia.append( |
| 49 | + { |
| 50 | + "disease_nr": dia_nr, |
| 51 | + "disease_id": disease_term["disease_id"], |
| 52 | + "description": disease_term["description"], |
| 53 | + } |
| 54 | + ) |
| 55 | + |
| 56 | + if fix is False: |
| 57 | + new_dia = old_dia |
| 58 | + else: |
| 59 | + db.case.find_one_and_update( |
| 60 | + {"_id": case["_id"]}, {"$set": {"diagnosis_phenotypes": new_dia}} |
| 61 | + ) |
| 62 | + click.echo(f"old dia:{old_dia}--->new dia:{new_dia}\n") |
| 63 | + |
| 64 | + except Exception as err: |
| 65 | + click.echo("Error {}".format(err)) |
| 66 | + |
| 67 | + |
| 68 | +if __name__ == "__main__": |
| 69 | + omim_case_fix_format() |
0 commit comments