From 62c4b649b85f7e4e135f5465e4e8d65fd52791a3 Mon Sep 17 00:00:00 2001
From: Jakub Dylag <jjd1c23@soton.ac.uk>
Date: Thu, 3 Apr 2025 04:01:56 +0100
Subject: [PATCH] (fix) Add metadata dict values for each concept set

---
 acmc/phen.py | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/acmc/phen.py b/acmc/phen.py
index 3c2232d..647f0f2 100644
--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -142,10 +142,11 @@ CONFIG_SCHEMA = {
                                         "type": "dict",
                                         "schema": {"divide_col": {"type": "string"}},
                                     },
+                                    
                                 },
-                                # "metadata": {"type": "dict", "required": True},
                             },
                         },
+                        "metadata": {"type": "dict", "required": False},
                     },
                 },
             },
@@ -820,6 +821,10 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
     # Process each folder in codes section
     for files in phenotype["concept_sets"]:
         concept_set_name = files["name"]
+        if "metadata" in files:
+            concept_set_metadata = files["metadata"]
+        else:
+            concept_set_metadata = {}
         for concept_set in files["files"]:
             _logger.debug(f"--- {concept_set} ---")
 
@@ -874,6 +879,10 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
                             target_code_type=target_code_type,
                             concept_name=concept_set_name,
                         )
+                        trans_out = add_metadata(
+                            codes=trans_out,
+                            metadata=concept_set_metadata
+                        )
                         out = pd.concat([out, trans_out])
             else:
                 source_df = df[source_column_names]
@@ -882,6 +891,10 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
                     target_code_type=target_code_type,
                     concept_name=concept_set_name,
                 )
+                trans_out = add_metadata(
+                    codes=trans_out,
+                    metadata=concept_set_metadata
+                )
                 out = pd.concat([out, trans_out])
 
     if len(code_errors) > 0:
@@ -977,6 +990,21 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
     _logger.info(f"Phenotype processed target code type {target_code_type}")
 
 
+# Add metadata dict to each row of Df codes
+def add_metadata(
+    codes: pd.DataFrame, metadata: dict
+) -> pd.DataFrame:
+    """Add concept set metadata, stored as a dictionary, to each concept row"""
+    
+    for meta_name, meta_value in metadata.items():
+        codes[meta_name] = meta_value
+        _logger.debug(
+            f"Adding metadata for concept set: metadata name {meta_name}, metadata value {meta_value}"
+        )
+
+    return codes
+    
+
 def _generate_version_tag(
     repo: git.Repo, increment: str = DEFAULT_VERSION_INC, use_v_prefix: bool = False
 ) -> str:
-- 
GitLab