fixed state but between test and running from the command line

75615a67 · mjbonifa · ab787128 · 75615a67 · 75615a67
Commit 75615a67 authored 4 months ago by mjbonifa
--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -15,7 +15,7 @@ from urllib.parse import urlparse, urlunparse

 # acmc imports 
 from acmc import trud, omop
-from acmc.parse import Read2, Read3, Icd10, Snomed, Opcs4, Atc, code_types, vocab_types
+from acmc.parse import code_types
 from acmc.omop import publish_concept_sets, setup

 # setup logging
@@ -33,7 +33,6 @@ CONCEPT_SET_DIR = 'concept-set'
 DEFAULT_PHEN_DIR_LIST = [CODES_DIR, MAP_DIR, CONCEPT_SET_DIR]

 CONFIG_FILE = 'config.json'
-ERROR_FILE = 'errors.csv'
 REPORT_FILE = 'report.md'

 DEFAULT_GIT_BRANCH = 'main'
@@ -362,10 +361,8 @@ def convert_codes(df, target, translate):
 	
 	# Append target column (if exists) - doesn't need conversion
 	if target in df.columns:
-		logger.debug("Has", len(df), target, "in file")
+		logger.debug(f"Has {len(df)} {target} in file")
 		codes = pd.concat([codes, df[target]])
-	# else:
-	# 	logger.debug("No",target,"in file")

 	if translate:
 		# Convert codes to target type
@@ -439,8 +436,6 @@ def map(phen_dir, target_code_type, translate=True):

 	# Process each folder in codes section
 	for folder in codes:
-		logger.debug(folder["description"])
-		if "files" in folder:
 		for file in folder["files"]:
 			logger.debug(f"--- {file["file"]} ---")
 			codes_file_path = codes_path / folder["folder"] / file["file"]
@@ -453,7 +448,7 @@ def map(phen_dir, target_code_type, translate=True):

 			# Perform Structural Changes to file before preprocessing
 			# split column with multiple code types
-				logger.debug("Processing actions")
+			logger.debug("Processing file structural actions")
 			if ("actions" in file and "split_col" in file["actions"] and "codes_col" in file["actions"]):
 				split_col = file["actions"]["split_col"]
 				codes_col = file["actions"]["codes_col"]
@@ -465,7 +460,7 @@ def map(phen_dir, target_code_type, translate=True):
 				df = pd.concat([df, oh], axis=1)  # merge in new columns

 			# Preprocessing & Validation Checks		
-				if "columns" in file:
+			logger.debug("Processing and validating code formats")
 			meta_columns = []  # meta columns to keep with codes
 			if "actions" in file and "divide_col" in file["actions"]:
 				meta_columns += [file["actions"]["divide_col"]]
@@ -478,8 +473,6 @@ def map(phen_dir, target_code_type, translate=True):
 							codes_file=str(codes_file_path.resolve()),
 							target_code_type=target_code_type,
 							translate=translate)
-				else:
-					raise Exception("No column format provided")

 			# partition table by categorical column				
 			if ("actions" in file and "divide_col" in file["actions"] and len(df) > 0):
@@ -488,9 +481,8 @@ def map(phen_dir, target_code_type, translate=True):
 				df = df.groupby(divide_col)
 			
 			# Map to Concept/Phenotype	
-				if len(df) == 0:			
-					pass
-				elif ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame):					
+			if len(df.index) != 0:			
+				if ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame):					
 					out = map_file(df,
 								   target_code_type, out,
 								   concepts=file["concept_set"],
@@ -508,12 +500,9 @@ def map(phen_dir, target_code_type, translate=True):
 										   concepts=file["concept_set_categories"][cat],
 										   meta_columns=meta_columns,)

-		else:
-			logger.warning("Folder is empty")
-
 	# test if there's any output from processing
-	if len(out) <= 0:
-		raise Exception("Processing has not produced any output")
+	if len(out.index) == 0:
+		raise Exception("The output after map processing has no output, check configuration file {str(config_path.resolve())} is not empty")

 	# Final processing
 	out = out.reset_index(drop=True)
@@ -565,14 +554,6 @@ def map(phen_dir, target_code_type, translate=True):
 	shutil.copy(trud.VERSION_PATH, phen_path / trud.VERSION_FILE)
 	shutil.copy(omop.VERSION_PATH, phen_path / omop.VERSION_FILE)
 	
-	# write erros to a file
-	error_path = phen_path / ERROR_FILE
-	if error_path.exists():	
-		error_df = pd.read_csv(error_path)
-		error_df = error_df.drop_duplicates()  # Remove Duplicates from Error file
-		error_df = error_df.sort_values(by=["SOURCE", "VOCABULARY", "CONCEPT"])
-		error_df.to_csv(error_path, index=False)
-	
 	logger.debug(f"Saved concept_sets to {str(concept_set_path.resolve())}")	
 	
 	logger.info(f"Phenotype processed successfully")

--- a/tests/test_acmc.py
+++ b/tests/test_acmc.py
@@ -29,14 +29,6 @@ def logger():
 	stream_handler = logging.StreamHandler(sys.stdout)
 	logger.addHandler(stream_handler)

-def test_phen_init_local_default(tmp_dir, monkeypatch, caplog):
-	with caplog.at_level(logging.DEBUG):
-		monkeypatch.setattr(sys, "argv", ["main.py", "phen", "init"])
-		# Mock input() to return "yes" to the question about reinitialising the directory 
-		monkeypatch.setattr("builtins.input", lambda _: "y")
-		main.main()
-	assert "Phenotype initialised successfully" in caplog.text
-
 def test_phen_init_local_specified(tmp_dir, monkeypatch, caplog):
 	with caplog.at_level(logging.DEBUG):	
 		phen_path = tmp_dir / "phen"