From 8e356322c4872929665c04478917790f820e85ac Mon Sep 17 00:00:00 2001
From: Michael Boniface <m.j.boniface@soton.ac.uk>
Date: Thu, 20 Feb 2025 15:30:03 +0000
Subject: [PATCH] moved code types to a CodeTypeParser class they depend on the
 existance of the trud processed directory. Moving from classes to instances
 of the processor instantiated the classes when python starts but as the trud
 processed does not exist after first install things fail

---
 acmc/parse.py  | 28 +++++++++++++++++-----------
 acmc/phen.py   |  6 ++++--
 pyproject.toml |  6 +++---
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/acmc/parse.py b/acmc/parse.py
index cdbb53d..0a9fd57 100644
--- a/acmc/parse.py
+++ b/acmc/parse.py
@@ -394,17 +394,23 @@ class Cprd(Proto):
 			)
 		]
 
-# THe medical code type parsers
-code_types = {
-	"read2": Read2(),
-	"read3": Read3(),
-	"icd10": Icd10(),
-	"snomed": Snomed(),
-	"opcs4": Opcs4(),
-	"atc": Atc(),
-	"med": Med(),
-	"cprd": Cprd(),
-}
+class CodeTypeParser():
+	"""A class used in InvalidCodesException to report an error if a code parser check fails"""			
+	def __init__(self, trud_processed_dir=trud.TRUD_PROCESSED_DIR):
+
+		if not trud_processed_dir.exists() or not trud_processed_dir.is_dir():
+			raise FileNotFoundError(f"Cannot initialise parsers as the TRUD processed directory {trud_processed_dir} does not exist, please check that TRUD has been installed: acmc trud install")
+		
+		self.code_types = {
+			"read2": Read2(),
+			"read3": Read3(),
+			"icd10": Icd10(),
+			"snomed": Snomed(),
+			"opcs4": Opcs4(),
+			"atc": Atc(),
+			"med": Med(),
+			"cprd": Cprd(),
+		}
 
 vocab_types = {
 	"read2": "Read",
diff --git a/acmc/phen.py b/acmc/phen.py
index 52d97ff..1b04b37 100644
--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -214,6 +214,7 @@ def validate(phen_dir):
 	validation_errors = []
 	concept_sets = mapping["concept_sets"]
 	concept_codes = mapping["codes"]
+	code_types = parse.CodeTypeParser().code_types
 	
     # check the version number is of the format vn.n.n
 	match = re.match(r"v(\d+\.\d+\.\d+)", concept_sets['version'])
@@ -256,7 +257,7 @@ def validate(phen_dir):
 					
 				# check columns specified are a supported medical coding type
 				for column in file['columns']:
-					if column not in parse.code_types and column != 'metadata':
+					if column not in code_types and column != 'metadata':
 						validation_errors.append(f"Column type {column} for file {concept_code_file_path} is not supported")
 	
 				# check the actions are supported
@@ -342,7 +343,8 @@ def preprocess_codes(df, file, target_code_type=None, codes_file=None):
 	metadata_df = df[meta_columns]
 	
 	# Preprocess codes
-	for code_type_name, code_type_parser in parse.code_types.items():
+	code_types = parse.CodeTypeParser().code_types
+	for code_type_name, code_type_parser in code_types.items():
 		if code_type_name in file['columns']:
 			logger.info(f"Processing {code_type_name} codes...")
 			
diff --git a/pyproject.toml b/pyproject.toml
index 274767b..a917394 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,14 +26,14 @@ dependencies = [
     "greenlet==3.1.1",
     "iniconfig==2.0.0",
     "lxml==5.3.1",
-	"numpy<2",
+    "numpy<2",
     "openpyxl==3.1.5",
     "pluggy==1.5.0",
     "pyarrow==19.0.0",
     "pyomop==4.3.0",
-	"tables=3.9.2", 	
+    "tables==3.9.2", 	
     "pytest==8.3.4",
-	"requests=2.32.3",	
+    "requests==2.32.3",	
     "simpledbf==0.2.6",
     "smmap==5.0.2",
     "sqlalchemy==2.0.38"
-- 
GitLab