From e549bf4bc63d56a6abda6a0feac674dcd3498ad0 Mon Sep 17 00:00:00 2001
From: Michael Boniface <m.j.boniface@soton.ac.uk>
Date: Wed, 26 Feb 2025 19:59:56 +0000
Subject: [PATCH] fix: added map definition to the config.yaml so that we know
 which maps are expected for the phenotype. It is still possible to run one of
 them using the -t option but they must be specified the config file. This
 means a user can run acmc phen map and all the required codes are created. It
 also reduces the chance of inconsistency between the map files generated
 between versions. It does not remove it entirely because it is still possible
 for a user to only run with a subset of the coding types but that should be
 discouraged. We retain the option for phenottype development because you
 might not want to run everything all of the time due the time it takes.
 Closes #40.

---
 README.md             |  2 +-
 acmc/main.py          | 15 +++------------
 acmc/omop.py          |  4 ++--
 acmc/parse.py         |  3 +++
 acmc/phen.py          | 35 ++++++++++++++++++++++++++++++++---
 docs/usage.md         |  3 +--
 examples/config1.yaml |  3 +++
 examples/config2.yaml |  3 +++
 examples/config3.yaml |  4 ++++
 tests/test_acmc.py    |  2 +-
 10 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 5dfb6cc..2175c98 100644
--- a/README.md
+++ b/README.md
@@ -215,7 +215,7 @@ Expected Output:
 	Use the following `acmc` command to generate the phenotype in `read2` format:
 
 ```bash
-acmc phen map -t read2
+acmc phen map
 ```
 
 Expected Output:
diff --git a/acmc/main.py b/acmc/main.py
index 89df65d..b702518 100644
--- a/acmc/main.py
+++ b/acmc/main.py
@@ -3,7 +3,7 @@ import logging
 from pathlib import Path
 
 import acmc
-from acmc import trud, omop, phen, logging_config as lc
+from acmc import trud, omop, phen, parse, logging_config as lc
 
 # setup logging
 logger = lc.setup_logger()
@@ -167,17 +167,8 @@ def main():
     phen_map_parser.add_argument(
         "-t",
         "--target-coding",
-        required=True,
-        choices=["read2", "read3", "icd10", "snomed", "opcs4"],
-        help="Specify the target coding (read2, read3, icd10, snomed, opcs4)",
-    )
-    phen_map_parser.add_argument(
-        "-o",
-        "--output",
-        choices=["csv", "omop"],
-        nargs="+",  # allows one or more values
-        default=["csv"],  # default to CSV if not specified
-        help="Specify output format(s): 'csv', 'omop', or both (default: csv)",
+        choices=parse.SUPPORTED_CODE_TYPES,
+        help=f"Specify the target coding {parse.SUPPORTED_CODE_TYPES}",
     )
     phen_map_parser.set_defaults(func=phen_map)
 
diff --git a/acmc/omop.py b/acmc/omop.py
index 4b8cc4c..dd5a461 100644
--- a/acmc/omop.py
+++ b/acmc/omop.py
@@ -16,7 +16,7 @@ logger = logging_config.setup_logger()
 
 # constants
 VOCAB_PATH = Path("./vocab/omop")
-OMOP_CDM_Version ="54"
+OMOP_CDM_Version = "54"
 OMOP_DB_FILENAME = f"omop_{OMOP_CDM_Version}.sqlite"
 DB_PATH = VOCAB_PATH / OMOP_DB_FILENAME
 VERSION_FILE = "omop_version.yaml"
@@ -26,7 +26,7 @@ EXPORT_FILE = "omop_export.db"
 vocabularies = {
     "source": "OHDSI Athena",
     "url": "https://athena.ohdsi.org/vocabulary/list",
-    "cdm_version": OMOP_CDM_Version,        
+    "cdm_version": OMOP_CDM_Version,
     "version": "",
     "vocabularies": [
         {"id": 1, "name": "SNOMED"},  # No license required
diff --git a/acmc/parse.py b/acmc/parse.py
index ca5b3e1..798a10b 100644
--- a/acmc/parse.py
+++ b/acmc/parse.py
@@ -8,6 +8,9 @@ from acmc import trud, logging_config as lc
 # setup logging
 logger = lc.setup_logger()
 
+# Define allowed values
+SUPPORTED_CODE_TYPES = {"read2", "read3", "icd10", "snomed", "opcs4", "atc"}
+
 
 class CodesError:
     """A class used in InvalidCodesException to report an error if a code parser check fails"""
diff --git a/acmc/phen.py b/acmc/phen.py
index a92bdb2..a9131b5 100644
--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -70,6 +70,15 @@ CONFIG_SCHEMA = {
                     },
                 },
             },
+            "map": {
+                "type": "list",
+                "schema": {
+                    "type": "string",
+                    "allowed": list(
+                        parse.SUPPORTED_CODE_TYPES
+                    ),  # Ensure only predefined values are allowed
+                },
+            },
             "concept_sets": {
                 "type": "list",
                 "required": True,
@@ -260,6 +269,7 @@ def init(phen_dir, remote_url):
                 "vocabulary_name": "",
                 "vocabulary_reference": "",
             },
+            "translate": [],
             "concept_sets": [],
         }
     }
@@ -603,7 +613,6 @@ def write_vocab_version(phen_path):
 
 def map(phen_dir, target_code_type):
     logger.info(f"Processing phenotype: {phen_dir}")
-    logger.debug(f"Target coding format: {target_code_type}")
 
     # Validate configuration
     validate(phen_dir)
@@ -611,13 +620,33 @@ def map(phen_dir, target_code_type):
     # initialise paths
     phen_path = Path(phen_dir)
     config_path = phen_path / CONFIG_FILE
-    codes_path = phen_path / CODES_DIR
 
     # load configuration
     with config_path.open("r") as file:
         config = yaml.safe_load(file)
     phenotype = config["phenotype"]
 
+    if len(phenotype["map"]) == 0:
+        raise ValueError(f"No map codes defined in the phenotype configuration")
+
+    if target_code_type is not None and target_code_type not in phenotype["map"]:
+        raise ValueError(
+            f"Target code type {target_code_type} not in phenotype configuration map {phenotype['map']}"
+        )
+
+    if target_code_type is not None:
+        map_target_code_type(phen_path, phenotype, target_code_type)
+    else:
+        for t in phenotype["map"]:
+            map_target_code_type(phen_path, phenotype, t)
+
+    logger.info(f"Phenotype processed successfully")
+
+
+def map_target_code_type(phen_path, phenotype, target_code_type):
+
+    logger.debug(f"Target coding format: {target_code_type}")
+    codes_path = phen_path / CODES_DIR
     # Create output dataframe
     out = pd.DataFrame([])
     code_errors = []
@@ -717,7 +746,7 @@ def map(phen_dir, target_code_type):
 
     write_vocab_version(phen_path)
 
-    logger.info(f"Phenotype processed successfully")
+    logger.info(f"Phenotype processed target code type {target_code_type}")
 
 
 def publish(phen_dir):
diff --git a/docs/usage.md b/docs/usage.md
index 1527165..57271cf 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -96,9 +96,8 @@ The `phen` command is used phenotype-related operations.
   acmc phen map -d <PHENOTYPE_DIRECTORY> -t <TARGET_CODING> -o <OUTPUT_FORMAT>
   ```
 
-  - `-t`, `--target-coding`: Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`).
+  - `-t`, `--target-coding`: (Optional) Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`).
   - `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen).
-  - `-o`, `--output`: Output format(s) (`csv`, `omop`, or both), default is 'csv'.
 
 - **Publish Phenotype Configuration**
 
diff --git a/examples/config1.yaml b/examples/config1.yaml
index 19fd9c6..09d0e80 100644
--- a/examples/config1.yaml
+++ b/examples/config1.yaml
@@ -4,6 +4,9 @@ phenotype:
     vocabulary_id: "ACMC_Example_1"
     vocabulary_name: "ACMC example 1 phenotype"
     vocabulary_reference: "https://git.soton.ac.uk/meldb/concepts-processing/-/tree/main/examples"
+  map:
+    - "read2"
+    - "read3"
   concept_sets:
     - name: "ABDO_PAIN"
       file:
diff --git a/examples/config2.yaml b/examples/config2.yaml
index 33d6df4..4c6252e 100644
--- a/examples/config2.yaml
+++ b/examples/config2.yaml
@@ -4,6 +4,9 @@ phenotype:
     vocabulary_id: "ACMC_Example_2"
     vocabulary_name: "ACMC example 2 phenotype"
     vocabulary_reference: "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example"
+  map:
+    - "read2"
+    - "read3"    
   concept_sets:
     - name: "CVD_EVENTS"
       file: 
diff --git a/examples/config3.yaml b/examples/config3.yaml
index 926ab60..764d7d8 100644
--- a/examples/config3.yaml
+++ b/examples/config3.yaml
@@ -4,6 +4,10 @@ phenotype:
     vocabulary_id: "ACMC_Example_3"
     vocabulary_name: "ACMC example 3 phenotype"
     vocabulary_reference: "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example"
+  map:
+    - "read2"
+    - "read3"  
+    - "snomed"       
   concept_sets:
     - name: "CVD_EVENTS"
       file: 
diff --git a/tests/test_acmc.py b/tests/test_acmc.py
index 6533847..c4cb94e 100644
--- a/tests/test_acmc.py
+++ b/tests/test_acmc.py
@@ -91,7 +91,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
     assert "Phenotype validated successfully" in caplog.text
 
     # map phenotype
-    for code_type in ["read2", "read3", "snomed"]:
+    for code_type in ["read2", "read3"]:
         with caplog.at_level(logging.DEBUG):
             monkeypatch.setattr(
                 sys,
-- 
GitLab