diff --git a/acmc.py b/acmc.py index ae4b051122bcf45cc4331b1f91f3394b1f29386d..40a3d78b2a432e7f32d700ac8ba19005e2145362 100644 --- a/acmc.py +++ b/acmc.py @@ -97,7 +97,7 @@ def main(): map_process_parser = map_subparsers.add_parser("process", help="Process map configuration file") map_process_parser.add_argument("-c", "--config-file", required=True, help="Phenotype configuration file") map_process_parser.add_argument("-s", "--source-codes-dir", required=True, help="Source codes root directory") - map_process_parser.add_argument("-t", "--target-coding", required=True, choices=['read2', 'read3', 'icd10', 'snomed', 'opcs4'], help="Specify the target coding (read2, read3, icd10, snomed, opcs4)") + map_process_parser.add_argument("-t", "--target-coding", required=True, choices=['read2_code', 'read3_code', 'icd10_code', 'snomed_code', 'opcs4_code'], help="Specify the target coding (read2, read3, icd10, snomed, opcs4)") map_process_parser.add_argument("-o", "--output-file", type=str, default=str(map.OUTPUT_PATH.resolve()), help="Output directory for CSV or OMOP database") # Flags diff --git a/example/phenotype_config.json b/example/phenotype_config.json index af73d026dde455c0e54ae60f422ed93217ee2d82..c99ffcc76dfcae38c488d72067e5cdd18c78c3e2 100644 --- a/example/phenotype_config.json +++ b/example/phenotype_config.json @@ -22,25 +22,7 @@ "CODING LIST": "https://git.soton.ac.uk/meld/meldb-external/phenotype/-/tree/main/codes/ClinicalCodes.org%20from%20the%20University%20of%20Manchester/Symptom%20code%20lists/Abdominal%20pain/res176-abdominal-pain.csv ", "NOTES": "2023-09-08: Clinical SF confirmed that the clinical view would be that this would need to be recurrent or persistent." } - }, - { - "concept_set_name": "CVD_EVENTS", - "concept_set_status": "AGREED", - "metadata": { - "#": "9", - "CONCEPT DESCRIPTION": "Cardiovascular events", - "CONCEPT TYPE": "Outcome event", - "CONCEPT ONTOLOGY CLASS": "nan", - "DATE ADDED ": "2023-08-25", - "REQUEST REASON ": "SF - requested by email - to explore an examplar 'outcome measure'", - "SOURCE INFO": "YES", - "FUNCTION": "QUERY BY CODING LIST", - "FUNCTION.1": "https://clinicalcodes.rss.mhs.man.ac.uk/", - "CODING LIST": "https://git.soton.ac.uk/meld/meldb-external/phenotype/-/tree/main/codes/ClinicalCodes.org%20from%20the%20University%20of%20Manchester/Cardiovascular%20events%20(ICD10)/res52-cardiovascular-events-icd10.csv", - "NOTES": "2023-11-17 - Code cannot currently be PROCESSED = COMPLETE as no mappings from ICD10 to Read v2 exist, pending confirmation from clinical team on next steps.", - "Relation to Ontology": "YES WITH ISSUES" - } - } + } ] }, "codes": [ @@ -51,7 +33,7 @@ { "file": "Symptom code lists/Abdominal pain/res176-abdominal-pain.csv", "columns": { - "read2": "code", + "read2_code": "code", "metadata": [ "description" ] @@ -59,20 +41,7 @@ "concept_set": [ "ABDO_PAIN" ] - }, - { - "file": "Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv", - "columns": { - "icd10_code": "code", - "metadata": [ - "description" - ] - }, - "concept_set": [ - "CVD_EVENTS" - ] - } - + } ] } ] diff --git a/map.py b/map.py index 5456de729fe0c3a07f25af60b575d711f7370f69..05239c88da45c1fa89e7da709a8012e1156b9071 100644 --- a/map.py +++ b/map.py @@ -203,6 +203,7 @@ def process(config_file, source_codes_dir, target_code_type, translate=True, ver # Perform Structural Changes to file before preprocessing # split column with multiple code types + print("Processing actions") if ("actions" in file and "split_col" in file["actions"] and "codes_col" in file["actions"]): split_col = file["actions"]["split_col"] codes_col = file["actions"]["codes_col"] @@ -213,7 +214,7 @@ def process(config_file, source_codes_dir, target_code_type, translate=True, ver oh[oh == False] = np.nan # replace 0s with None df = pd.concat([df, oh], axis=1) # merge in new columns - # Preprocessing & Validation Checks + # Preprocessing & Validation Checks if "columns" in file: meta_columns = [] # meta columns to keep with codes if "actions" in file and "divide_col" in file["actions"]: @@ -225,17 +226,17 @@ def process(config_file, source_codes_dir, target_code_type, translate=True, ver else: raise Exception("No column format provided") - # partition table by categorical column + # partition table by categorical column if ("actions" in file and "divide_col" in file["actions"] and len(df) > 0): divide_col = file["actions"]["divide_col"] print("Action: Dividing Table by", divide_col, "column into: ", df[divide_col].unique(),) df = df.groupby(divide_col) - - # Map to MELDB Concept/Phenotype - if len(df) == 0: + + # Map to MELDB Concept/Phenotype + if len(df) == 0: pass # out = df - elif ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame): + elif ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame): out = map_file(df, target_code_type, out, concepts=file["concept_set"], meta_columns=meta_columns, translate=translate,) elif ("concept_set_categories" in file) and isinstance(df, pd.core.groupby.generic.DataFrameGroupBy): meta_columns.remove(divide_col) # delete categorical column diff --git a/parse.py b/parse.py index ddf07484c68deb7100268de9c551ec71d31bc38d..8d66f305db0dbb1fb11d0e50d9f4405649b966b1 100644 --- a/parse.py +++ b/parse.py @@ -344,14 +344,14 @@ class Cprd_code(Proto_code): ] code_types = { - "read2": Read2_code, - "read3": Read3_code, - "icd10": Icd10_code, - "snomed": Snomed_code, - "opcs4": Opcs4_code, - "atc": Atc_code, - "med": Med_code, - "cprd": Cprd_code, + "read2_code": Read2_code, + "read3_code": Read3_code, + "icd10_code": Icd10_code, + "snomed_code": Snomed_code, + "opcs4_code": Opcs4_code, + "atc_code": Atc_code, + "med_code": Med_code, + "cprd_code": Cprd_code, } vocab_types = {