diff --git a/acmc/parse.py b/acmc/parse.py index cdbb53d5acea7feacc312c5892d3880e7fda9448..0a9fd578575f5ae6d1318bddbec0597f330c3959 100644 --- a/acmc/parse.py +++ b/acmc/parse.py @@ -394,17 +394,23 @@ class Cprd(Proto): ) ] -# THe medical code type parsers -code_types = { - "read2": Read2(), - "read3": Read3(), - "icd10": Icd10(), - "snomed": Snomed(), - "opcs4": Opcs4(), - "atc": Atc(), - "med": Med(), - "cprd": Cprd(), -} +class CodeTypeParser(): + """A class used in InvalidCodesException to report an error if a code parser check fails""" + def __init__(self, trud_processed_dir=trud.TRUD_PROCESSED_DIR): + + if not trud_processed_dir.exists() or not trud_processed_dir.is_dir(): + raise FileNotFoundError(f"Cannot initialise parsers as the TRUD processed directory {trud_processed_dir} does not exist, please check that TRUD has been installed: acmc trud install") + + self.code_types = { + "read2": Read2(), + "read3": Read3(), + "icd10": Icd10(), + "snomed": Snomed(), + "opcs4": Opcs4(), + "atc": Atc(), + "med": Med(), + "cprd": Cprd(), + } vocab_types = { "read2": "Read", diff --git a/acmc/phen.py b/acmc/phen.py index 52d97ff0cb86662bcbf18985d1b926e7939bfdf6..1b04b37e1f3368e39315e4c77f68cc5dad3c9fc7 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -214,6 +214,7 @@ def validate(phen_dir): validation_errors = [] concept_sets = mapping["concept_sets"] concept_codes = mapping["codes"] + code_types = parse.CodeTypeParser().code_types # check the version number is of the format vn.n.n match = re.match(r"v(\d+\.\d+\.\d+)", concept_sets['version']) @@ -256,7 +257,7 @@ def validate(phen_dir): # check columns specified are a supported medical coding type for column in file['columns']: - if column not in parse.code_types and column != 'metadata': + if column not in code_types and column != 'metadata': validation_errors.append(f"Column type {column} for file {concept_code_file_path} is not supported") # check the actions are supported @@ -342,7 +343,8 @@ def preprocess_codes(df, file, target_code_type=None, codes_file=None): metadata_df = df[meta_columns] # Preprocess codes - for code_type_name, code_type_parser in parse.code_types.items(): + code_types = parse.CodeTypeParser().code_types + for code_type_name, code_type_parser in code_types.items(): if code_type_name in file['columns']: logger.info(f"Processing {code_type_name} codes...") diff --git a/pyproject.toml b/pyproject.toml index 274767b99665fa88fe67f816753bfe5b2984f9bc..a9173943199fa2f1e1a7a6023fd4a1df92304b42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,14 +26,14 @@ dependencies = [ "greenlet==3.1.1", "iniconfig==2.0.0", "lxml==5.3.1", - "numpy<2", + "numpy<2", "openpyxl==3.1.5", "pluggy==1.5.0", "pyarrow==19.0.0", "pyomop==4.3.0", - "tables=3.9.2", + "tables==3.9.2", "pytest==8.3.4", - "requests=2.32.3", + "requests==2.32.3", "simpledbf==0.2.6", "smmap==5.0.2", "sqlalchemy==2.0.38"