diff --git a/acmc/main.py b/acmc/main.py index 5f796b0087daf8346fb7daef05e2e29bb9e8669a..91a40bcd074594430beb217f85defed2332752a0 100644 --- a/acmc/main.py +++ b/acmc/main.py @@ -58,7 +58,7 @@ def _phen_validate(args: argparse.Namespace): def _phen_map(args: argparse.Namespace): """Handle the `phen map` command.""" - phen.map(args.phen_dir, args.target_coding) + phen.map(args.phen_dir, args.target_coding, args.not_translate) def _phen_export(args: argparse.Namespace): @@ -217,6 +217,12 @@ def main(): choices=parse.SUPPORTED_CODE_TYPES, help=f"Specify the target coding {parse.SUPPORTED_CODE_TYPES}", ) + phen_map_parser.add_argument( + "--not-translate", + action='store_true', + default=False, + help="(Optional) Prevent any phenotype translation using NHS TRUD vocabularies.", + ) phen_map_parser.set_defaults(func=_phen_map) # phen export diff --git a/acmc/phen.py b/acmc/phen.py index 052e1a0f9d0476bd4ebc2f5aec59a1398d4b0623..8d6cb8292e274fc041a074e38eb20a6a70de8059 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -663,7 +663,7 @@ def _preprocess_source_concepts( # Translate Df with multiple codes into single code type Series def translate_codes( - source_df: pd.DataFrame, target_code_type: str, concept_name: str + source_df: pd.DataFrame, target_code_type: str, concept_name: str, not_translate:bool ) -> pd.DataFrame: """Translates each source code type the source coding list into a target type and returns all conversions as a concept set""" @@ -688,7 +688,7 @@ def translate_codes( _logger.debug( f"Target code type {target_code_type} is the same as source code type {len(source_df)}, copying codes rather than translating" ) - else: + elif not not_translate: # get the translation filename using source to target code types filename = f"{source_code_type}_to_{target_code_type}.parquet" map_path = trud.PROCESSED_PATH / filename @@ -783,7 +783,7 @@ def write_vocab_version(phen_path: Path): ) -def map(phen_dir: str, target_code_type: str): +def map(phen_dir: str, target_code_type: str, not_translate:bool): _logger.info(f"Processing phenotype: {phen_dir}") # Validate configuration @@ -807,15 +807,15 @@ def map(phen_dir: str, target_code_type: str): ) if target_code_type is not None: - _map_target_code_type(phen_path, phenotype, target_code_type) + _map_target_code_type(phen_path, phenotype, target_code_type, not_translate) else: for t in phenotype["map"]: - _map_target_code_type(phen_path, phenotype, t) + _map_target_code_type(phen_path, phenotype, t, not_translate) _logger.info(f"Phenotype processed successfully") -def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str): +def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str, not_translate:bool): _logger.debug(f"Target coding format: {target_code_type}") concepts_path = phen_path / CONCEPTS_DIR # Create output dataframe @@ -882,6 +882,7 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st source_df, target_code_type=target_code_type, concept_name=concept_set_name, + not_translate=not_translate, ) trans_out = add_metadata( codes=trans_out, @@ -894,6 +895,7 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st source_df, target_code_type=target_code_type, concept_name=concept_set_name, + not_translate=not_translate, ) trans_out = add_metadata( codes=trans_out, diff --git a/docs/cli.md b/docs/cli.md index 4b756dcf5c094bdb2825dc614db8947c9be179cd..0a1cf39ea52a1cf9a916358bec3f4ecedcee3d78 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -111,6 +111,8 @@ The `phen` command is used phenotype-related operations. - `-t`, `--target-coding`: (Optional) Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`). - `-d`, `--phen-dir`: (Optional) Local phenotype workspace directory (default is ./workspace/phen). + - `--not-translate`: (Optional) Prevent any phenotype translation using NHS TRUD vocabularies. Therefore only concepts in already in the traget coding will be mapped. + - **Publish Phenotype Configuration**