diff --git a/.gitignore b/.gitignore index 2424433f464d54ba4b0b92d79bfcdc2c346d6161..ab0c2dcfa60a17d7b771e35b4a4cd373ae6e3bbd 100644 --- a/.gitignore +++ b/.gitignore @@ -14,12 +14,7 @@ __pycache__ # Build build/* -output/ -concepts-output/ -archive/ -maps/* -concepts-new/ -codes/ +*output* # temporary script diff --git a/acmc.py b/acmc.py index 40a3d78b2a432e7f32d700ac8ba19005e2145362..9318e9b159b8cbfece3a2d4333f8d56a7e58b0ac 100644 --- a/acmc.py +++ b/acmc.py @@ -2,61 +2,36 @@ import argparse import trud import omop -import map +import phen from pathlib import Path def trud_install(args): """Handle the `trud install` command.""" - print(f"Installing TRUD") trud.install(args.api_key) - print(f"TRUD installation completed") def omop_install(args): """Handle the `omop install` command.""" - print(f"Installing OMOP database") omop.install(omop.OMOP_DB_PATH, args.omop_folder) - print(f"OMOP installation completed") def omop_clear(args): """Handle the `omop clear` command.""" - print(f"Clearing OMOP data from database") - omop.clear(omop.OMOP_DB_PATH) - print(f"OMOP database cleared") + omop.clear(omop.OMOP_DB_PATH) def omop_delete(args): """Handle the `omop delete` command.""" - print(f"Deleting OMOP database") omop.delete(omop.OMOP_DB_PATH) - print(f"OMOP database deleted") -def map_process(args): - """Handle the `map process` command.""" - print(f"Processing map with phenotype config file: {args.config_file}") - print(f"Output directory: {args.output_file}") - print(f"Target coding format: {args.target_coding}") - if args.translate: - print("Translating code types.") - else: - print("Not translating codes") - if args.verify: - print("Verifying codes.") - else: - print("Not verifying codes.") - if args.error_log: - print(f"Saving errors to: {args.error_log}") - else: - args.error_log = 'errors.csv' - - map.process(args.config_file, - args.source_codes_dir, - args.target_coding, - args.translate, - args.verify, - error_path=Path(args.error_log), - output_path=Path(args.output_file)) +def phen_init(args): + """Handle the `phen init` command.""" + phen.init(args.phen_dir) - print(f"Phenotype processing completed") +def phen_map(args): + """Handle the `phen map` command.""" + phen.map(args.phen_dir, + args.target_coding, + args.translate, + args.verify) def main(): parser = argparse.ArgumentParser(description="ACMC command-line tool") @@ -85,30 +60,31 @@ def main(): # omop clear omop_clear_parser = omop_subparsers.add_parser("clear", help="Clear OMOP data from database") omop_clear_parser.set_defaults(func=omop_clear) + # omop delete omop_delete_parser = omop_subparsers.add_parser("delete", help="Delete OMOP database") omop_delete_parser.set_defaults(func=omop_delete) - ### MAP Command ### - map_parser = subparsers.add_parser("map", help="Map commands") - map_subparsers = map_parser.add_subparsers(dest="subcommand", required=True, help="Map subcommands") + ### PHEN Command ### + phen_parser = subparsers.add_parser("phen", help="Phen commands") + phen_subparsers = phen_parser.add_subparsers(dest="subcommand", required=True, help="Phen subcommands") + + # phen init + phen_init_parser = phen_subparsers.add_parser("init", help="Initiatise phenotype configuration") + phen_init_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory") + phen_init_parser.set_defaults(func=phen_init) - # map process - map_process_parser = map_subparsers.add_parser("process", help="Process map configuration file") - map_process_parser.add_argument("-c", "--config-file", required=True, help="Phenotype configuration file") - map_process_parser.add_argument("-s", "--source-codes-dir", required=True, help="Source codes root directory") - map_process_parser.add_argument("-t", "--target-coding", required=True, choices=['read2_code', 'read3_code', 'icd10_code', 'snomed_code', 'opcs4_code'], help="Specify the target coding (read2, read3, icd10, snomed, opcs4)") - map_process_parser.add_argument("-o", "--output-file", type=str, default=str(map.OUTPUT_PATH.resolve()), help="Output directory for CSV or OMOP database") + # phen map + phen_map_parser = phen_subparsers.add_parser("map", help="Process phen configuration file") + phen_map_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory") + phen_map_parser.add_argument("-t", "--target-coding", required=True, choices=['read2_code', 'read3_code', 'icd10_code', 'snomed_code', 'opcs4_code'], help="Specify the target coding (read2, read3, icd10, snomed, opcs4)") # Flags - map_process_parser.add_argument("-tr", "--translate", action="store_true", default=False, help="Do not translate code types") - map_process_parser.add_argument("-v", "--verify", action="store_true", default=False, help="Do not verify codes") - - # Error log file - map_process_parser.add_argument("-l", "--error-log", type=str, default=str(map.ERROR_PATH.resolve()), help="Filepath to save error log to") + phen_map_parser.add_argument("-tr", "--translate", action="store_true", default=False, help="Do not translate code types") + phen_map_parser.add_argument("-v", "--verify", action="store_true", default=False, help="Do not verify codes") # Set the function to call when 'process' subcommand is used - map_process_parser.set_defaults(func=map_process) + phen_map_parser.set_defaults(func=phen_map) # Parse arguments args = parser.parse_args() diff --git a/example/clinical-codes-org/About the source.docx b/example/codes/clinical-codes-org/About the source.docx similarity index 100% rename from example/clinical-codes-org/About the source.docx rename to example/codes/clinical-codes-org/About the source.docx diff --git a/example/clinical-codes-org/Behaviours code lists/Alcohol intake/Where this code list comes from .docx b/example/codes/clinical-codes-org/Behaviours code lists/Alcohol intake/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Behaviours code lists/Alcohol intake/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Behaviours code lists/Alcohol intake/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Behaviours code lists/Alcohol intake/res47-alcohol-intake.csv b/example/codes/clinical-codes-org/Behaviours code lists/Alcohol intake/res47-alcohol-intake.csv similarity index 100% rename from example/clinical-codes-org/Behaviours code lists/Alcohol intake/res47-alcohol-intake.csv rename to example/codes/clinical-codes-org/Behaviours code lists/Alcohol intake/res47-alcohol-intake.csv diff --git a/example/clinical-codes-org/Behaviours code lists/Physical activity/Where this code list comes from .docx b/example/codes/clinical-codes-org/Behaviours code lists/Physical activity/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Behaviours code lists/Physical activity/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Behaviours code lists/Physical activity/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Behaviours code lists/Physical activity/res47-physical-activity.csv b/example/codes/clinical-codes-org/Behaviours code lists/Physical activity/res47-physical-activity.csv similarity index 100% rename from example/clinical-codes-org/Behaviours code lists/Physical activity/res47-physical-activity.csv rename to example/codes/clinical-codes-org/Behaviours code lists/Physical activity/res47-physical-activity.csv diff --git a/example/clinical-codes-org/Behaviours code lists/Smoking status/Where this code list comes from .docx b/example/codes/clinical-codes-org/Behaviours code lists/Smoking status/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Behaviours code lists/Smoking status/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Behaviours code lists/Smoking status/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Behaviours code lists/Smoking status/res56-smoking-status.csv b/example/codes/clinical-codes-org/Behaviours code lists/Smoking status/res56-smoking-status.csv similarity index 100% rename from example/clinical-codes-org/Behaviours code lists/Smoking status/res56-smoking-status.csv rename to example/codes/clinical-codes-org/Behaviours code lists/Smoking status/res56-smoking-status.csv diff --git a/example/clinical-codes-org/Cardiovascular events (ICD10)/Where this code list comes from .docx b/example/codes/clinical-codes-org/Cardiovascular events (ICD10)/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Cardiovascular events (ICD10)/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Cardiovascular events (ICD10)/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv b/example/codes/clinical-codes-org/Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv similarity index 100% rename from example/clinical-codes-org/Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv rename to example/codes/clinical-codes-org/Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv diff --git a/example/clinical-codes-org/Motor neurone disease/Where this code list comes from .docx b/example/codes/clinical-codes-org/Motor neurone disease/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Motor neurone disease/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Motor neurone disease/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Motor neurone disease/phenotype_PH62_ver_124_concepts_20230719T112819.csv b/example/codes/clinical-codes-org/Motor neurone disease/phenotype_PH62_ver_124_concepts_20230719T112819.csv similarity index 100% rename from example/clinical-codes-org/Motor neurone disease/phenotype_PH62_ver_124_concepts_20230719T112819.csv rename to example/codes/clinical-codes-org/Motor neurone disease/phenotype_PH62_ver_124_concepts_20230719T112819.csv diff --git a/example/clinical-codes-org/Non-attendance codes/Where this code list comes from .docx b/example/codes/clinical-codes-org/Non-attendance codes/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Non-attendance codes/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Non-attendance codes/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Non-attendance codes/res201-did-not-attend-appointment.csv b/example/codes/clinical-codes-org/Non-attendance codes/res201-did-not-attend-appointment.csv similarity index 100% rename from example/clinical-codes-org/Non-attendance codes/res201-did-not-attend-appointment.csv rename to example/codes/clinical-codes-org/Non-attendance codes/res201-did-not-attend-appointment.csv diff --git a/example/clinical-codes-org/Obsessive Compulsive Disorder/Where this code list comes from .docx b/example/codes/clinical-codes-org/Obsessive Compulsive Disorder/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Obsessive Compulsive Disorder/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Obsessive Compulsive Disorder/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Obsessive Compulsive Disorder/phenotype_PH223_ver_446_concepts_20230719T114228.csv b/example/codes/clinical-codes-org/Obsessive Compulsive Disorder/phenotype_PH223_ver_446_concepts_20230719T114228.csv similarity index 100% rename from example/clinical-codes-org/Obsessive Compulsive Disorder/phenotype_PH223_ver_446_concepts_20230719T114228.csv rename to example/codes/clinical-codes-org/Obsessive Compulsive Disorder/phenotype_PH223_ver_446_concepts_20230719T114228.csv diff --git a/example/clinical-codes-org/Palliative care codes/Where this code list comes from .docx b/example/codes/clinical-codes-org/Palliative care codes/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Palliative care codes/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Palliative care codes/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Palliative care codes/res176-palliative-and-end-of-life-care.csv b/example/codes/clinical-codes-org/Palliative care codes/res176-palliative-and-end-of-life-care.csv similarity index 100% rename from example/clinical-codes-org/Palliative care codes/res176-palliative-and-end-of-life-care.csv rename to example/codes/clinical-codes-org/Palliative care codes/res176-palliative-and-end-of-life-care.csv diff --git a/example/clinical-codes-org/Peptic ulcer/Where this code list comes from .docx b/example/codes/clinical-codes-org/Peptic ulcer/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Peptic ulcer/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Peptic ulcer/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Peptic ulcer/phenotype_PH1091_ver_2385_concepts_20230719T122902.csv b/example/codes/clinical-codes-org/Peptic ulcer/phenotype_PH1091_ver_2385_concepts_20230719T122902.csv similarity index 100% rename from example/clinical-codes-org/Peptic ulcer/phenotype_PH1091_ver_2385_concepts_20230719T122902.csv rename to example/codes/clinical-codes-org/Peptic ulcer/phenotype_PH1091_ver_2385_concepts_20230719T122902.csv diff --git a/example/clinical-codes-org/Personality disorders/Where this code list comes from .docx b/example/codes/clinical-codes-org/Personality disorders/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Personality disorders/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Personality disorders/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Personality disorders/res38-personality-disorder.csv b/example/codes/clinical-codes-org/Personality disorders/res38-personality-disorder.csv similarity index 100% rename from example/clinical-codes-org/Personality disorders/res38-personality-disorder.csv rename to example/codes/clinical-codes-org/Personality disorders/res38-personality-disorder.csv diff --git a/example/clinical-codes-org/Self harm/Where this code list comes from .docx b/example/codes/clinical-codes-org/Self harm/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Self harm/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Self harm/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Self harm/res41-self-harm.csv b/example/codes/clinical-codes-org/Self harm/res41-self-harm.csv similarity index 100% rename from example/clinical-codes-org/Self harm/res41-self-harm.csv rename to example/codes/clinical-codes-org/Self harm/res41-self-harm.csv diff --git a/example/clinical-codes-org/Symptom code lists/Abdominal pain/Where this code list comes from .docx b/example/codes/clinical-codes-org/Symptom code lists/Abdominal pain/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Abdominal pain/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Symptom code lists/Abdominal pain/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Symptom code lists/Abdominal pain/res176-abdominal-pain.csv b/example/codes/clinical-codes-org/Symptom code lists/Abdominal pain/res176-abdominal-pain.csv similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Abdominal pain/res176-abdominal-pain.csv rename to example/codes/clinical-codes-org/Symptom code lists/Abdominal pain/res176-abdominal-pain.csv diff --git a/example/clinical-codes-org/Symptom code lists/Falls/Where this code list comes from .docx b/example/codes/clinical-codes-org/Symptom code lists/Falls/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Falls/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Symptom code lists/Falls/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Symptom code lists/Falls/res178-fall.csv b/example/codes/clinical-codes-org/Symptom code lists/Falls/res178-fall.csv similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Falls/res178-fall.csv rename to example/codes/clinical-codes-org/Symptom code lists/Falls/res178-fall.csv diff --git a/example/clinical-codes-org/Symptom code lists/Falls/res202-falls-outcome.csv b/example/codes/clinical-codes-org/Symptom code lists/Falls/res202-falls-outcome.csv similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Falls/res202-falls-outcome.csv rename to example/codes/clinical-codes-org/Symptom code lists/Falls/res202-falls-outcome.csv diff --git a/example/clinical-codes-org/Symptom code lists/Fatigue/Where this code list comes from .docx b/example/codes/clinical-codes-org/Symptom code lists/Fatigue/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Fatigue/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Symptom code lists/Fatigue/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Symptom code lists/Fatigue/res175-fatigue.csv b/example/codes/clinical-codes-org/Symptom code lists/Fatigue/res175-fatigue.csv similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Fatigue/res175-fatigue.csv rename to example/codes/clinical-codes-org/Symptom code lists/Fatigue/res175-fatigue.csv diff --git a/example/clinical-codes-org/Symptom code lists/Headache/Where this code list comes from .docx b/example/codes/clinical-codes-org/Symptom code lists/Headache/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Headache/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Symptom code lists/Headache/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Symptom code lists/Headache/res175-headache.csv b/example/codes/clinical-codes-org/Symptom code lists/Headache/res175-headache.csv similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Headache/res175-headache.csv rename to example/codes/clinical-codes-org/Symptom code lists/Headache/res175-headache.csv diff --git a/example/clinical-codes-org/Symptom code lists/Incontinence/Where this code list comes from .docx b/example/codes/clinical-codes-org/Symptom code lists/Incontinence/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Incontinence/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Symptom code lists/Incontinence/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Symptom code lists/Incontinence/res11-incontinence.csv b/example/codes/clinical-codes-org/Symptom code lists/Incontinence/res11-incontinence.csv similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Incontinence/res11-incontinence.csv rename to example/codes/clinical-codes-org/Symptom code lists/Incontinence/res11-incontinence.csv diff --git a/example/clinical-codes-org/Symptom code lists/Musculoskeletal pain/Where this code list comes from .docx b/example/codes/clinical-codes-org/Symptom code lists/Musculoskeletal pain/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Musculoskeletal pain/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Symptom code lists/Musculoskeletal pain/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Symptom code lists/Musculoskeletal pain/res175-msk-pain.csv b/example/codes/clinical-codes-org/Symptom code lists/Musculoskeletal pain/res175-msk-pain.csv similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Musculoskeletal pain/res175-msk-pain.csv rename to example/codes/clinical-codes-org/Symptom code lists/Musculoskeletal pain/res175-msk-pain.csv diff --git a/example/clinical-codes-org/Symptom code lists/Neuropathic pain/Where this code list comes from .docx b/example/codes/clinical-codes-org/Symptom code lists/Neuropathic pain/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Neuropathic pain/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Symptom code lists/Neuropathic pain/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Symptom code lists/Neuropathic pain/res55-neuropathic_pain.csv b/example/codes/clinical-codes-org/Symptom code lists/Neuropathic pain/res55-neuropathic_pain.csv similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Neuropathic pain/res55-neuropathic_pain.csv rename to example/codes/clinical-codes-org/Symptom code lists/Neuropathic pain/res55-neuropathic_pain.csv diff --git a/example/clinical-codes-org/Symptom code lists/Sleep problems/Where this code list comes from .docx b/example/codes/clinical-codes-org/Symptom code lists/Sleep problems/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Sleep problems/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Symptom code lists/Sleep problems/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Symptom code lists/Sleep problems/res175-sleep-problems.csv b/example/codes/clinical-codes-org/Symptom code lists/Sleep problems/res175-sleep-problems.csv similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Sleep problems/res175-sleep-problems.csv rename to example/codes/clinical-codes-org/Symptom code lists/Sleep problems/res175-sleep-problems.csv diff --git a/example/clinical-codes-org/Symptom code lists/Stress/Where this code list comes from .docx b/example/codes/clinical-codes-org/Symptom code lists/Stress/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Stress/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Symptom code lists/Stress/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Symptom code lists/Stress/res175-stress.csv b/example/codes/clinical-codes-org/Symptom code lists/Stress/res175-stress.csv similarity index 100% rename from example/clinical-codes-org/Symptom code lists/Stress/res175-stress.csv rename to example/codes/clinical-codes-org/Symptom code lists/Stress/res175-stress.csv diff --git a/example/clinical-codes-org/Tuberculosis/Where this code list comes from .docx b/example/codes/clinical-codes-org/Tuberculosis/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Tuberculosis/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Tuberculosis/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Tuberculosis/phenotype_PH87_ver_174_concepts_20230719T113229.csv b/example/codes/clinical-codes-org/Tuberculosis/phenotype_PH87_ver_174_concepts_20230719T113229.csv similarity index 100% rename from example/clinical-codes-org/Tuberculosis/phenotype_PH87_ver_174_concepts_20230719T113229.csv rename to example/codes/clinical-codes-org/Tuberculosis/phenotype_PH87_ver_174_concepts_20230719T113229.csv diff --git a/example/clinical-codes-org/Urinary tract stones/Where this code list comes from .docx b/example/codes/clinical-codes-org/Urinary tract stones/Where this code list comes from .docx similarity index 100% rename from example/clinical-codes-org/Urinary tract stones/Where this code list comes from .docx rename to example/codes/clinical-codes-org/Urinary tract stones/Where this code list comes from .docx diff --git a/example/clinical-codes-org/Urinary tract stones/phenotype_PH331_ver_662_concepts_20230719T121545.csv b/example/codes/clinical-codes-org/Urinary tract stones/phenotype_PH331_ver_662_concepts_20230719T121545.csv similarity index 100% rename from example/clinical-codes-org/Urinary tract stones/phenotype_PH331_ver_662_concepts_20230719T121545.csv rename to example/codes/clinical-codes-org/Urinary tract stones/phenotype_PH331_ver_662_concepts_20230719T121545.csv diff --git a/example/phenotype_config.json b/example/config.json similarity index 100% rename from example/phenotype_config.json rename to example/config.json diff --git a/omop.py b/omop.py index 6a9ecde0613d2e26977387d40da6ba61aa9bcc09..e7ec110f43607c49b81f0208de11fa0a29fb88b7 100644 --- a/omop.py +++ b/omop.py @@ -10,8 +10,7 @@ OMOP_DB_PATH = OMOP_DB_DIR / 'omop_54.sqlite' #Populate SQLite3 Database with default OMOP CONCEPTS def install (db_path, omop_install_folder): - - print(f"Installing OMOP files from {omop_install_folder}") + print(f"Installing OMOP database from {omop_install_folder}") # check folder for omop install files is a directory omop_install_path = Path(omop_install_folder) @@ -42,8 +41,10 @@ def install (db_path, omop_install_folder): raise Exception(f"Error reading file {file_path}: {e}") conn.close() + print(f"OMOP installation completed") def clear(db_path): + print(f"Clearing OMOP data from database") omop_db_path = Path(db_path) if not omop_db_path.is_file(): raise FileNotFoundError(f"Error: OMOP DB file '{omop_db_path}' does not exist.") @@ -61,13 +62,16 @@ def clear(db_path): #cur.execute("DROP TABLE CONCEPT_SET_ITEM;") conn.close() - + print(f"OMOP database cleared") + def delete(db_path): + print(f"Deleting OMOP database") omop_db_path = Path(db_path) if not omop_db_path.is_file(): raise FileNotFoundError(f"Error: OMOP DB file '{omop_db_path}' does not exist.") omop_db_path.unlink() + print(f"OMOP database deleted") def table_exists(cursor, table_name): # Query to check if the table exists diff --git a/map.py b/phen.py similarity index 82% rename from map.py rename to phen.py index a4262d63eefa3753aea9a88ad27760de2eb944a8..6e8b2901983f55a87386ab90444ffda1c7c828bf 100644 --- a/map.py +++ b/phen.py @@ -5,9 +5,10 @@ import json import os import sqlite3 import sys -import trud +import shutil from pathlib import Path +import trud from base import log_invalid_code from base import bcolors from base import raise_ @@ -26,14 +27,76 @@ from omop import setup pd.set_option("mode.chained_assignment", None) -OUTPUT_PATH = Path('build') / 'phenotype_mapping.csv' -ERROR_PATH = Path('build') / 'errors.csv' +DEFAULT_PHEN_PATH = Path('build') / 'phen' +CODES_DIR = 'codes' +CONFIG_FILE = 'config.json' +OUTPUT_FILE = 'phen_output.csv' +ERROR_FILE = 'errors.csv' SPLIT_COL_ACTION = "split_col" CODES_COL_ACTION = "codes_col" DIVIDE_COL_ACTION = "divide_col" COL_ACTIONS = [SPLIT_COL_ACTION, CODES_COL_ACTION, DIVIDE_COL_ACTION] +def init(phen_dir): + print(f"Initialising Phenotype in directory: {phen_dir}") + + configure = False + phen_path = Path(phen_dir) + if phen_path.exists() and phen_path.is_dir(): # Check if it exists and is a directory + user_input = input(f"The phen directory '{phen_path}' already exists. Do you want to reinitialise phenotype, deleting all current configuration in this directory? (yes/no): ").strip().lower() + if user_input in ['yes', 'y']: + shutil.rmtree(phen_path) # Remove directory and all contents + configure = True; + else: + print("Phen directory was not recreated.") + else: + configure=True + + if configure: + # create root phen directory + phen_path.mkdir(parents=True, exist_ok=True) # Recreate directory + print(f"Phen directory '{phen_path}' has been created.") + # create codes directory + codes_path = phen_path / CODES_DIR + codes_path.mkdir() + # create config file + config = { + "concept_sets": { + "version": "0.0.1", + "omop": { + "vocabulary_id": "", + "vocabulary_name": "", + "vocabulary_reference": "" + }, + "concept_set": [ + { + "concept_set_name": "", + "concept_set_status": "", + "metadata": { + + } + } + ] + }, + "codes": [ + { + "folder": "", + "description": "", + "files": [ + + ] + } + ] + } + + config_path = phen_path / "phen_config.json" + # Write the JSON data to a file + with open(config_path, "w", encoding="utf-8") as f: + json.dump(config, f, indent=4) # Pretty-printing with indentation + + print(f"Phenotype initialised") + def read_table_file(path, excel_sheet=None): """ Load Code List File @@ -212,12 +275,30 @@ def sql_row_exist(conn, table, column, value): return exists -def process(config_file, source_codes_dir, target_code_type, translate=True, verify=True, error_path=ERROR_PATH, output_path=OUTPUT_PATH): - config_path = Path(config_file) +def map(phen_dir, + target_code_type, + translate=True, + verify=True): + print(f"Processing phenotype directory: {phen_dir}") + print(f"Target coding format: {target_code_type}") + if translate: + print("Translating code types.") + else: + print("Not translating codes") + if verify: + print("Verifying codes.") + else: + print("Not verifying codes.") + + phen_path = Path(phen_dir) + if not phen_path.is_dir(): + raise NotADirectoryError(f"Error: '{phen_path}' is not a directory") + + config_path = phen_path / CONFIG_FILE if not config_path.is_file(): - raise FileNotFoundError(f"Error: phenotype configuration file '{config_path}' does not exist.") + raise FileNotFoundError(f"Error: phen configuration file '{config_path}' does not exist.") - codes_path = Path(source_codes_dir) + codes_path = phen_path / CODES_DIR if not codes_path.is_dir(): raise FileNotFoundError(f"Error: source codes directory {source_codes_dir} does not exist.") @@ -233,7 +314,7 @@ def process(config_file, source_codes_dir, target_code_type, translate=True, ver summary_config = mapping["concept_sets"] folders = mapping["codes"] - out = pd.DataFrame([]) # Create Output File to append to + out = pd.DataFrame([]) # Create Output dataframe to append to # Iterate JSON mapping file (OBJECT FORMAT) for folder in folders: @@ -280,7 +361,7 @@ def process(config_file, source_codes_dir, target_code_type, translate=True, ver print("Action: Dividing Table by", divide_col, "column into: ", df[divide_col].unique(),) df = df.groupby(divide_col) - # Map to MELDB Concept/Phenotype + # Map to Concept/Phenotype if len(df) == 0: pass # out = df @@ -326,8 +407,9 @@ def process(config_file, source_codes_dir, target_code_type, translate=True, ver # Save Output File print(bcolors.HEADER, "---" * 5, "OUTPUT", "---" * 5, bcolors.ENDC) print(out) - if output_path == "atlas": + output_path = phen_path / OUTPUT_FILE + if output_path == "atlas": vocab_id = summary_config["omop"]["vocabulary_id"] vocab_version = summary_config["version"] vocab_name = summary_config["omop"]["vocabulary_name"] @@ -348,8 +430,11 @@ def process(config_file, source_codes_dir, target_code_type, translate=True, ver print("Saved to", output_path) # Save Error File + error_path = phen_path / ERROR_FILE if error_path.exists(): error_df = pd.read_csv(error_path) error_df = error_df.drop_duplicates() # Remove Duplicates from Error file error_df = error_df.sort_values(by=["SOURCE", "VOCABULARY", "CONCEPT"]) error_df.to_csv(error_path, index=False) + + print(f"Phenotype processing completed") \ No newline at end of file diff --git a/trud.py b/trud.py index 0d36c26a7b345bbab226585bbfe4e4080275ff79..869aa09b3bb6b85d9a6a3f2f3d25accca8e0cbbc 100644 --- a/trud.py +++ b/trud.py @@ -273,7 +273,8 @@ def create_map_directories(): MAPS_DOWNLOADS_DIR.mkdir(parents=True, exist_ok=True) MAPS_PROCESSED_DIR.mkdir(parents=True,exist_ok=True) -def install(api_key): +def install(api_key): + print(f"Installing TRUD") create_map_directories() items_latest = True @@ -340,3 +341,4 @@ def install(api_key): print(f"Downloaded {release_ordinal} release(s) for item {item_id}.") + print(f"TRUD installation completed") \ No newline at end of file