From 25fa78ab137fabeb64804cd906f35fd690865176 Mon Sep 17 00:00:00 2001 From: Michael Boniface <m.j.boniface@soton.ac.uk> Date: Thu, 20 Feb 2025 11:17:57 +0000 Subject: [PATCH] added exceptions in phen --- acmc/parse.py | 7 ++----- acmc/phen.py | 43 +++++++++++++++++++++++++++---------------- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/acmc/parse.py b/acmc/parse.py index 2a04067..b1a5414 100644 --- a/acmc/parse.py +++ b/acmc/parse.py @@ -52,7 +52,7 @@ class Proto(): def in_database(self, codes, db, col): return codes.isin(db[col]) - def process(self, codes, codes_file, ignore_errors=False): + def process(self, codes, codes_file): """ identify issues that do not pass and fix them with define/d process """ errors = [] # Iter through each item in check. @@ -66,10 +66,7 @@ class Proto(): codes = fix(codes, codes_file) logger.debug(f"Check: Fixed") except InvalidCodesException as ex: - if ignore_errors: - errors.append(ex) - else: - raise ex + errors.append(ex) else: logger.debug(f"Check: passed") diff --git a/acmc/phen.py b/acmc/phen.py index c232421..df354e2 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -354,6 +354,7 @@ def log_invalid_code(codes, mask, code_type=None, file_path=None, cause=None): def preprocess_codes(df, file, target_code_type=None, codes_file=None): """ Parses each column individually - Order and length will not be preserved! """ out = pd.DataFrame([]) # create output df to append to + code_errors = [] # list of errors from processing meta_columns = [] # meta columns to keep with codes if "actions" in file and "divide_col" in file["actions"]: @@ -375,14 +376,15 @@ def preprocess_codes(df, file, target_code_type=None, codes_file=None): codes = codes.str.strip() # remove excess spaces # process codes, validating them using parser and returning the errors - codes, errors = code_type_parser.process(codes, codes_file, ignore_errors=True) + codes, errors = code_type_parser.process(codes, codes_file) if len(errors) > 0: - raise Exception(f"Code validation failed with {len(errors)} errors") + code_errors = code_errors.append(errors) + logger.warning(f"Code validation failed with {len(errors)} errors") # add metadata columns out = pd.concat([out, pd.DataFrame({code_type_name: codes}).join(metadata_df)], ignore_index=True) - return out, meta_columns + return out, meta_columns, code_errors # Translate Df with multiple codes into single code type Series def translate_codes(df, target_code_type): @@ -460,6 +462,7 @@ def map(phen_dir, target_code_type): # Create output dataframe out = pd.DataFrame([]) + code_errors [] # Process each folder in codes section for folder in codes: @@ -478,10 +481,12 @@ def map(phen_dir, target_code_type): # Preprocessing & Validation Checks logger.debug("Processing and validating code formats") - df, meta_columns = preprocess_codes(df, - file, - codes_file=str(codes_file_path.resolve()), - target_code_type=target_code_type) + df, meta_columns, errors = preprocess_codes( + df, + file, codes_file=str(codes_file_path.resolve()), + target_code_type=target_code_type) + + code_errors = code_errors.append(errors) # partition table by categorical column if ("actions" in file and "divide_col" in file["actions"] and len(df) > 0): @@ -492,26 +497,32 @@ def map(phen_dir, target_code_type): # Map to Concept/Phenotype if len(df.index) != 0: if ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame): - out = map_file(df, - target_code_type, out, - concepts=file["concept_set"], - meta_columns=meta_columns) + out = map_file( + df, + target_code_type, + out, + concepts=file["concept_set"], + meta_columns=meta_columns) elif ("concept_set_categories" in file) and isinstance(df, pd.core.groupby.generic.DataFrameGroupBy): meta_columns.remove(divide_col) # delete categorical column for cat, grp in df: if (cat in file["concept_set_categories"].keys()): # check if category is mapped grp = grp.drop(columns=[divide_col]) # delete categorical column logger.debug("Category:", cat) - out = map_file(grp, - target_code_type, - out, - concepts=file["concept_set_categories"][cat], - meta_columns=meta_columns,) + out = map_file( + grp, + target_code_type, + out, + concepts=file["concept_set_categories"][cat], + meta_columns=meta_columns,) else: raise AttributeError(f"File {file} has either no concept_set or conceot_set_categories or the instance of dataframe objectives associated is incorrect, concept_set must be a DataFrame, conceot_set_categories must be pd.core.groupby.generic.DataFrameGroupBy") else: logger.warning(f"File {file} has no output after preprocessing in config {str(config_path.resolve())}") + if(len(code_errors) > 0): + logger.error(f"The map processing has {len(code_errors)} errors) + # Check there is output from processing if len(out.index) == 0: raise Exception(f"No output after map processing, check config {str(config_path.resolve())}") -- GitLab