From 25fa78ab137fabeb64804cd906f35fd690865176 Mon Sep 17 00:00:00 2001
From: Michael Boniface <m.j.boniface@soton.ac.uk>
Date: Thu, 20 Feb 2025 11:17:57 +0000
Subject: [PATCH] added exceptions in phen

---
 acmc/parse.py |  7 ++-----
 acmc/phen.py  | 43 +++++++++++++++++++++++++++----------------
 2 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/acmc/parse.py b/acmc/parse.py
index 2a04067..b1a5414 100644
--- a/acmc/parse.py
+++ b/acmc/parse.py
@@ -52,7 +52,7 @@ class Proto():
 	def in_database(self, codes, db, col):
 	    return codes.isin(db[col])
 
-	def process(self, codes, codes_file, ignore_errors=False):	
+	def process(self, codes, codes_file):	
 		""" identify issues that do not pass and fix them with define/d process """
 		errors = []
 		# Iter through each item in check. 
@@ -66,10 +66,7 @@ class Proto():
 					codes = fix(codes, codes_file)
 					logger.debug(f"Check: Fixed")							
 				except InvalidCodesException as ex:
-					if ignore_errors:
-						errors.append(ex)
-					else:
-						raise ex
+					errors.append(ex)
 			else:
 				logger.debug(f"Check: passed")
 	
diff --git a/acmc/phen.py b/acmc/phen.py
index c232421..df354e2 100644
--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -354,6 +354,7 @@ def log_invalid_code(codes, mask, code_type=None, file_path=None, cause=None):
 def preprocess_codes(df, file, target_code_type=None, codes_file=None):
 	""" Parses each column individually - Order and length will not be preserved! """
 	out = pd.DataFrame([])  # create output df to append to
+	code_errors = [] # list of errors from processing
 
 	meta_columns = []  # meta columns to keep with codes
 	if "actions" in file and "divide_col" in file["actions"]:
@@ -375,14 +376,15 @@ def preprocess_codes(df, file, target_code_type=None, codes_file=None):
 			codes = codes.str.strip()  # remove excess spaces	
 
 			# process codes, validating them using parser and returning the errors
-			codes, errors = code_type_parser.process(codes, codes_file, ignore_errors=True)  
+			codes, errors = code_type_parser.process(codes, codes_file)  
 			if len(errors) > 0:
-				raise Exception(f"Code validation failed with {len(errors)} errors")
+				code_errors = code_errors.append(errors)
+				logger.warning(f"Code validation failed with {len(errors)} errors")
 				
 			# add metadata columns
 			out = pd.concat([out, pd.DataFrame({code_type_name: codes}).join(metadata_df)], ignore_index=True)
 				
-	return out, meta_columns
+	return out, meta_columns, code_errors
 
 # Translate Df with multiple codes into single code type Series
 def translate_codes(df, target_code_type):
@@ -460,6 +462,7 @@ def map(phen_dir, target_code_type):
 
 	# Create output dataframe
 	out = pd.DataFrame([]) 
+	code_errors []
 
 	# Process each folder in codes section
 	for folder in codes:
@@ -478,10 +481,12 @@ def map(phen_dir, target_code_type):
 
 			# Preprocessing & Validation Checks		
 			logger.debug("Processing and validating code formats")
-			df, meta_columns = preprocess_codes(df, 
-												file,
-												codes_file=str(codes_file_path.resolve()),
-												target_code_type=target_code_type)
+			df, meta_columns, errors = preprocess_codes(
+				df, 
+				file, codes_file=str(codes_file_path.resolve()),
+				target_code_type=target_code_type)
+
+			code_errors = code_errors.append(errors)
 
 			# partition table by categorical column				
 			if ("actions" in file and "divide_col" in file["actions"] and len(df) > 0):
@@ -492,26 +497,32 @@ def map(phen_dir, target_code_type):
 			# Map to Concept/Phenotype	
 			if len(df.index) != 0:			
 				if ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame):					
-					out = map_file(df,
-								   target_code_type, out,
-								   concepts=file["concept_set"],
-								   meta_columns=meta_columns)
+					out = map_file(
+						df,
+						target_code_type,
+						out,
+						concepts=file["concept_set"],
+						meta_columns=meta_columns)
 				elif ("concept_set_categories" in file) and isinstance(df, pd.core.groupby.generic.DataFrameGroupBy):
 					meta_columns.remove(divide_col)  # delete categorical column
 					for cat, grp in df:
 						if (cat in file["concept_set_categories"].keys()):  # check if category is mapped
 							grp = grp.drop(columns=[divide_col])  # delete categorical column
 							logger.debug("Category:", cat)
-							out = map_file(grp,
-										   target_code_type,
-										   out,
-										   concepts=file["concept_set_categories"][cat],
-										   meta_columns=meta_columns,)
+							out = map_file(
+								grp,
+								target_code_type,
+								out,
+								concepts=file["concept_set_categories"][cat],
+								meta_columns=meta_columns,)
 				else:
 					raise AttributeError(f"File {file} has either no concept_set or conceot_set_categories or the instance of dataframe objectives associated is incorrect, concept_set must be a DataFrame, conceot_set_categories must be pd.core.groupby.generic.DataFrameGroupBy")
 			else:
 				logger.warning(f"File {file} has no output after preprocessing in config {str(config_path.resolve())}")
 
+	if(len(code_errors) > 0):
+		logger.error(f"The map processing has {len(code_errors)} errors)
+	
 	# Check there is output from processing
 	if len(out.index) == 0:
 		raise Exception(f"No output after map processing, check config {str(config_path.resolve())}")
-- 
GitLab