From 47dcd2ebb11d4c5497cf1a9d01e64e7eae3a2ccb Mon Sep 17 00:00:00 2001 From: Jakub Dylag <jjd1c23@soton.ac.uk> Date: Wed, 23 Apr 2025 11:06:42 +0100 Subject: [PATCH] (feat) flag no metadata in phen map output --- acmc/main.py | 8 +++++++- acmc/phen.py | 27 +++++++++++++++------------ docs/cli.md | 1 + 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/acmc/main.py b/acmc/main.py index 91a40bc..a831fae 100644 --- a/acmc/main.py +++ b/acmc/main.py @@ -58,7 +58,7 @@ def _phen_validate(args: argparse.Namespace): def _phen_map(args: argparse.Namespace): """Handle the `phen map` command.""" - phen.map(args.phen_dir, args.target_coding, args.not_translate) + phen.map(args.phen_dir, args.target_coding, args.not_translate, args.no_metadata) def _phen_export(args: argparse.Namespace): @@ -223,6 +223,12 @@ def main(): default=False, help="(Optional) Prevent any phenotype translation using NHS TRUD vocabularies.", ) + phen_map_parser.add_argument( + "--no-metadata", + action='store_true', + default=False, + help="(Optional) Prevent copying of metadata columns to output.", + ) phen_map_parser.set_defaults(func=_phen_map) # phen export diff --git a/acmc/phen.py b/acmc/phen.py index 8d6cb82..c745838 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -783,7 +783,7 @@ def write_vocab_version(phen_path: Path): ) -def map(phen_dir: str, target_code_type: str, not_translate:bool): +def map(phen_dir: str, target_code_type: str, not_translate:bool, no_metadata:bool): _logger.info(f"Processing phenotype: {phen_dir}") # Validate configuration @@ -807,15 +807,15 @@ def map(phen_dir: str, target_code_type: str, not_translate:bool): ) if target_code_type is not None: - _map_target_code_type(phen_path, phenotype, target_code_type, not_translate) + _map_target_code_type(phen_path, phenotype, target_code_type, not_translate, no_metadata) else: for t in phenotype["map"]: - _map_target_code_type(phen_path, phenotype, t, not_translate) + _map_target_code_type(phen_path, phenotype, t, not_translate, no_metadata) _logger.info(f"Phenotype processed successfully") -def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str, not_translate:bool): +def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str, not_translate:bool, no_metadata:bool): _logger.debug(f"Target coding format: {target_code_type}") concepts_path = phen_path / CONCEPTS_DIR # Create output dataframe @@ -886,7 +886,8 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st ) trans_out = add_metadata( codes=trans_out, - metadata=concept_set_metadata + metadata=concept_set_metadata, + no_metadata=no_metadata, ) out = pd.concat([out, trans_out]) else: @@ -899,7 +900,8 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st ) trans_out = add_metadata( codes=trans_out, - metadata=concept_set_metadata + metadata=concept_set_metadata, + no_metadata=no_metadata, ) out = pd.concat([out, trans_out]) @@ -998,15 +1000,16 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st # Add metadata dict to each row of Df codes def add_metadata( - codes: pd.DataFrame, metadata: dict + codes: pd.DataFrame, metadata: dict, no_metadata:bool, ) -> pd.DataFrame: """Add concept set metadata, stored as a dictionary, to each concept row""" - for meta_name, meta_value in metadata.items(): - codes[meta_name] = meta_value - _logger.debug( - f"Adding metadata for concept set: metadata name {meta_name}, metadata value {meta_value}" - ) + if not no_metadata: + for meta_name, meta_value in metadata.items(): + codes[meta_name] = meta_value + _logger.debug( + f"Adding metadata for concept set: metadata name {meta_name}, metadata value {meta_value}" + ) return codes diff --git a/docs/cli.md b/docs/cli.md index 0a1cf39..396f529 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -112,6 +112,7 @@ The `phen` command is used phenotype-related operations. - `-t`, `--target-coding`: (Optional) Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`). - `-d`, `--phen-dir`: (Optional) Local phenotype workspace directory (default is ./workspace/phen). - `--not-translate`: (Optional) Prevent any phenotype translation using NHS TRUD vocabularies. Therefore only concepts in already in the traget coding will be mapped. + - `--no-metadata`: (Optional) Prevent copying of metadata columns to output. - **Publish Phenotype Configuration** -- GitLab