Skip to content
Snippets Groups Projects
Commit c5bbb8ac authored by Jakub Dylag's avatar Jakub Dylag
Browse files

(feat) flag do not translate in phen map

parent 230240d1
No related branches found
No related tags found
No related merge requests found
......@@ -58,7 +58,7 @@ def _phen_validate(args: argparse.Namespace):
def _phen_map(args: argparse.Namespace):
"""Handle the `phen map` command."""
phen.map(args.phen_dir, args.target_coding)
phen.map(args.phen_dir, args.target_coding, args.not_translate)
def _phen_export(args: argparse.Namespace):
......@@ -217,6 +217,12 @@ def main():
choices=parse.SUPPORTED_CODE_TYPES,
help=f"Specify the target coding {parse.SUPPORTED_CODE_TYPES}",
)
phen_map_parser.add_argument(
"--not-translate",
action='store_true',
default=False,
help="(Optional) Prevent any phenotype translation using NHS TRUD vocabularies.",
)
phen_map_parser.set_defaults(func=_phen_map)
# phen export
......
......@@ -663,7 +663,7 @@ def _preprocess_source_concepts(
# Translate Df with multiple codes into single code type Series
def translate_codes(
source_df: pd.DataFrame, target_code_type: str, concept_name: str
source_df: pd.DataFrame, target_code_type: str, concept_name: str, not_translate:bool
) -> pd.DataFrame:
"""Translates each source code type the source coding list into a target type and returns all conversions as a concept set"""
......@@ -688,7 +688,7 @@ def translate_codes(
_logger.debug(
f"Target code type {target_code_type} is the same as source code type {len(source_df)}, copying codes rather than translating"
)
else:
elif not not_translate:
# get the translation filename using source to target code types
filename = f"{source_code_type}_to_{target_code_type}.parquet"
map_path = trud.PROCESSED_PATH / filename
......@@ -783,7 +783,7 @@ def write_vocab_version(phen_path: Path):
)
def map(phen_dir: str, target_code_type: str):
def map(phen_dir: str, target_code_type: str, not_translate:bool):
_logger.info(f"Processing phenotype: {phen_dir}")
# Validate configuration
......@@ -807,15 +807,15 @@ def map(phen_dir: str, target_code_type: str):
)
if target_code_type is not None:
_map_target_code_type(phen_path, phenotype, target_code_type)
_map_target_code_type(phen_path, phenotype, target_code_type, not_translate)
else:
for t in phenotype["map"]:
_map_target_code_type(phen_path, phenotype, t)
_map_target_code_type(phen_path, phenotype, t, not_translate)
_logger.info(f"Phenotype processed successfully")
def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str):
def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str, not_translate:bool):
_logger.debug(f"Target coding format: {target_code_type}")
concepts_path = phen_path / CONCEPTS_DIR
# Create output dataframe
......@@ -882,6 +882,7 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
source_df,
target_code_type=target_code_type,
concept_name=concept_set_name,
not_translate=not_translate,
)
trans_out = add_metadata(
codes=trans_out,
......@@ -894,6 +895,7 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
source_df,
target_code_type=target_code_type,
concept_name=concept_set_name,
not_translate=not_translate,
)
trans_out = add_metadata(
codes=trans_out,
......
......@@ -111,6 +111,8 @@ The `phen` command is used phenotype-related operations.
- `-t`, `--target-coding`: (Optional) Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`).
- `-d`, `--phen-dir`: (Optional) Local phenotype workspace directory (default is ./workspace/phen).
- `--not-translate`: (Optional) Prevent any phenotype translation using NHS TRUD vocabularies. Therefore only concepts in already in the traget coding will be mapped.
- **Publish Phenotype Configuration**
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment