From 62c4b649b85f7e4e135f5465e4e8d65fd52791a3 Mon Sep 17 00:00:00 2001 From: Jakub Dylag <jjd1c23@soton.ac.uk> Date: Thu, 3 Apr 2025 04:01:56 +0100 Subject: [PATCH] (fix) Add metadata dict values for each concept set --- acmc/phen.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/acmc/phen.py b/acmc/phen.py index 3c2232d..647f0f2 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -142,10 +142,11 @@ CONFIG_SCHEMA = { "type": "dict", "schema": {"divide_col": {"type": "string"}}, }, + }, - # "metadata": {"type": "dict", "required": True}, }, }, + "metadata": {"type": "dict", "required": False}, }, }, }, @@ -820,6 +821,10 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st # Process each folder in codes section for files in phenotype["concept_sets"]: concept_set_name = files["name"] + if "metadata" in files: + concept_set_metadata = files["metadata"] + else: + concept_set_metadata = {} for concept_set in files["files"]: _logger.debug(f"--- {concept_set} ---") @@ -874,6 +879,10 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st target_code_type=target_code_type, concept_name=concept_set_name, ) + trans_out = add_metadata( + codes=trans_out, + metadata=concept_set_metadata + ) out = pd.concat([out, trans_out]) else: source_df = df[source_column_names] @@ -882,6 +891,10 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st target_code_type=target_code_type, concept_name=concept_set_name, ) + trans_out = add_metadata( + codes=trans_out, + metadata=concept_set_metadata + ) out = pd.concat([out, trans_out]) if len(code_errors) > 0: @@ -977,6 +990,21 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st _logger.info(f"Phenotype processed target code type {target_code_type}") +# Add metadata dict to each row of Df codes +def add_metadata( + codes: pd.DataFrame, metadata: dict +) -> pd.DataFrame: + """Add concept set metadata, stored as a dictionary, to each concept row""" + + for meta_name, meta_value in metadata.items(): + codes[meta_name] = meta_value + _logger.debug( + f"Adding metadata for concept set: metadata name {meta_name}, metadata value {meta_value}" + ) + + return codes + + def _generate_version_tag( repo: git.Repo, increment: str = DEFAULT_VERSION_INC, use_v_prefix: bool = False ) -> str: -- GitLab