From 47dcd2ebb11d4c5497cf1a9d01e64e7eae3a2ccb Mon Sep 17 00:00:00 2001
From: Jakub Dylag <jjd1c23@soton.ac.uk>
Date: Wed, 23 Apr 2025 11:06:42 +0100
Subject: [PATCH] (feat) flag no metadata in phen map output

---
 acmc/main.py |  8 +++++++-
 acmc/phen.py | 27 +++++++++++++++------------
 docs/cli.md  |  1 +
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/acmc/main.py b/acmc/main.py
index 91a40bc..a831fae 100644
--- a/acmc/main.py
+++ b/acmc/main.py
@@ -58,7 +58,7 @@ def _phen_validate(args: argparse.Namespace):
 
 def _phen_map(args: argparse.Namespace):
     """Handle the `phen map` command."""
-    phen.map(args.phen_dir, args.target_coding, args.not_translate)
+    phen.map(args.phen_dir, args.target_coding, args.not_translate, args.no_metadata)
 
 
 def _phen_export(args: argparse.Namespace):
@@ -223,6 +223,12 @@ def main():
         default=False,
         help="(Optional) Prevent any phenotype translation using NHS TRUD vocabularies.",
     ) 
+    phen_map_parser.add_argument(
+        "--no-metadata",
+        action='store_true',
+        default=False,
+        help="(Optional) Prevent copying of metadata columns to output.",
+    ) 
     phen_map_parser.set_defaults(func=_phen_map)
 
     # phen export
diff --git a/acmc/phen.py b/acmc/phen.py
index 8d6cb82..c745838 100644
--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -783,7 +783,7 @@ def write_vocab_version(phen_path: Path):
         )
 
 
-def map(phen_dir: str, target_code_type: str, not_translate:bool):
+def map(phen_dir: str, target_code_type: str, not_translate:bool, no_metadata:bool):
     _logger.info(f"Processing phenotype: {phen_dir}")
 
     # Validate configuration
@@ -807,15 +807,15 @@ def map(phen_dir: str, target_code_type: str, not_translate:bool):
         )
 
     if target_code_type is not None:
-        _map_target_code_type(phen_path, phenotype, target_code_type, not_translate)
+        _map_target_code_type(phen_path, phenotype, target_code_type, not_translate, no_metadata)
     else:
         for t in phenotype["map"]:
-            _map_target_code_type(phen_path, phenotype, t, not_translate)
+            _map_target_code_type(phen_path, phenotype, t, not_translate, no_metadata)
 
     _logger.info(f"Phenotype processed successfully")
 
 
-def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str, not_translate:bool):
+def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str, not_translate:bool, no_metadata:bool):
     _logger.debug(f"Target coding format: {target_code_type}")
     concepts_path = phen_path / CONCEPTS_DIR
     # Create output dataframe
@@ -886,7 +886,8 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
                         )
                         trans_out = add_metadata(
                             codes=trans_out,
-                            metadata=concept_set_metadata
+                            metadata=concept_set_metadata,
+                            no_metadata=no_metadata,
                         )
                         out = pd.concat([out, trans_out])
             else:
@@ -899,7 +900,8 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
                 )
                 trans_out = add_metadata(
                     codes=trans_out,
-                    metadata=concept_set_metadata
+                    metadata=concept_set_metadata,
+                    no_metadata=no_metadata,
                 )
                 out = pd.concat([out, trans_out])
 
@@ -998,15 +1000,16 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
 
 # Add metadata dict to each row of Df codes
 def add_metadata(
-    codes: pd.DataFrame, metadata: dict
+    codes: pd.DataFrame, metadata: dict, no_metadata:bool,
 ) -> pd.DataFrame:
     """Add concept set metadata, stored as a dictionary, to each concept row"""
     
-    for meta_name, meta_value in metadata.items():
-        codes[meta_name] = meta_value
-        _logger.debug(
-            f"Adding metadata for concept set: metadata name {meta_name}, metadata value {meta_value}"
-        )
+    if not no_metadata:
+        for meta_name, meta_value in metadata.items():
+            codes[meta_name] = meta_value
+            _logger.debug(
+                f"Adding metadata for concept set: metadata name {meta_name}, metadata value {meta_value}"
+            )
 
     return codes
     
diff --git a/docs/cli.md b/docs/cli.md
index 0a1cf39..396f529 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -112,6 +112,7 @@ The `phen` command is used phenotype-related operations.
   - `-t`, `--target-coding`: (Optional) Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`).
   - `-d`, `--phen-dir`: (Optional) Local phenotype workspace directory (default is ./workspace/phen).
   - `--not-translate`: (Optional) Prevent any phenotype translation using NHS TRUD vocabularies. Therefore only concepts in already in the traget coding will be mapped.
+    - `--no-metadata`: (Optional) Prevent copying of metadata columns to output.
 
 
 - **Publish Phenotype Configuration**
-- 
GitLab