From 78e6fcd6882cec0e60d2f3eab39b824f04991911 Mon Sep 17 00:00:00 2001
From: Michael Boniface <m.j.boniface@soton.ac.uk>
Date: Mon, 24 Feb 2025 12:05:41 +0000
Subject: [PATCH] refactor: converted all examples to yaml and added test for
 config2. Updated readme. Fixed but in diff that assumed all csv files in map
 directory where map files when some where error files if no codes exists,
 moved error files to errors directory in the map directory to avoid this.
 Closes #19

---
 acmc/phen.py       | 12 +++++++-----
 tests/test_acmc.py | 16 +++++++++-------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/acmc/phen.py b/acmc/phen.py
index 14dc583..49eefe1 100644
--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -232,7 +232,9 @@ def validate(phen_dir):
     logger.info(f"Validating phenotype: {phen_dir}")
     phen_path = Path(phen_dir)
     if not phen_path.is_dir():
-        raise NotADirectoryError(f"Error: '{str(phen_path.resolve())}' is not a directory")
+        raise NotADirectoryError(
+            f"Error: '{str(phen_path.resolve())}' is not a directory"
+        )
 
     config_path = phen_path / CONFIG_FILE
     if not config_path.is_file():
@@ -676,8 +678,8 @@ def map(phen_dir, target_code_type):
     if len(code_errors) > 0:
         logger.error(f"The map processing has {len(code_errors)} errors")
         error_path = phen_path / MAP_DIR / "errors"
-        error_path.mkdir(parents=True, exist_ok=True)                
-        error_filename = f"{target_code_type}-code-errors.csv"        
+        error_path.mkdir(parents=True, exist_ok=True)
+        error_filename = f"{target_code_type}-code-errors.csv"
         write_code_errors(code_errors, error_path / error_filename)
 
     # Check there is output from processing
@@ -944,11 +946,11 @@ def diff(phen_dir, phen_old_dir):
         new_output = new_map_path / file
 
         logger.debug(f"Old ouptput: {str(old_output.resolve())}")
-        logger.debug(f"New ouptput: {str(new_output.resolve())}") 
+        logger.debug(f"New ouptput: {str(new_output.resolve())}")
 
         df1 = pd.read_csv(old_output)
         df1 = df1[["CONCEPT", "CONCEPT_SET"]].groupby("CONCEPT_SET").count()
-        df2 = pd.read_csv(new_output)   
+        df2 = pd.read_csv(new_output)
         df2 = df2[["CONCEPT", "CONCEPT_SET"]].groupby("CONCEPT_SET").count()
 
         # Check for added and removed concepts
diff --git a/tests/test_acmc.py b/tests/test_acmc.py
index a89f0e6..3548a1d 100644
--- a/tests/test_acmc.py
+++ b/tests/test_acmc.py
@@ -46,14 +46,16 @@ def test_phen_init_local_specified(tmp_dir, monkeypatch, caplog):
 
 # TODO: This test will need to be refactored so that the expected outputs match the config files
 # right now it just tests that it runs successfully and does not check the contents of the output
-@pytest.mark.parametrize("config_file", [
-    ("config1.yaml"),   # config.yaml test case
-    ("config2.yaml"),   # config.yaml test case    
-    
-])
+@pytest.mark.parametrize(
+    "config_file",
+    [
+        ("config1.yaml"),  # config.yaml test case
+        ("config2.yaml"),  # config.yaml test case
+    ],
+)
 def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
     print(f"Temporary directory: {tmp_dir}")  # Prints path for debugging
-    
+
     with caplog.at_level(logging.DEBUG):
         phen_path = tmp_dir / "phen"
         phen_path = phen_path.resolve()
@@ -79,7 +81,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
                 shutil.copy(source, destination)
 
         # copy the test file to configuration
-        shutil.copy(phen_path / config_file, phen_path / "config.yaml")                
+        shutil.copy(phen_path / config_file, phen_path / "config.yaml")
 
         monkeypatch.setattr(
             sys, "argv", ["main.py", "phen", "validate", "-d", str(phen_path.resolve())]
-- 
GitLab