diff --git a/acmc/main.py b/acmc/main.py index d1591b58908e5ec74b1bf90e25c7d1e6e697bd59..edb185a0b57f4bdbb0ead6c8c2a6740e7302e417 100644 --- a/acmc/main.py +++ b/acmc/main.py @@ -37,8 +37,7 @@ def phen_validate(args): def phen_map(args): """Handle the `phen map` command.""" phen.map(args.phen_dir, - args.target_coding, - args.translate) + args.target_coding) def phen_publish(args): """Handle the `phen publish` command.""" @@ -114,8 +113,6 @@ def main(): phen_map_parser = phen_subparsers.add_parser("map", help="Process phen mapping") phen_map_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory") phen_map_parser.add_argument("-t", "--target-coding", required=True, choices=['read2', 'read3', 'icd10', 'snomed', 'opcs4'], help="Specify the target coding (read2, read3, icd10, snomed, opcs4)") - # phen map flags - phen_map_parser.add_argument("-tr", "--translate", action="store_true", default=False, help="Translate code types") phen_map_parser.set_defaults(func=phen_map) # phen publish diff --git a/acmc/phen.py b/acmc/phen.py index 4ac53e222c20f34ceddcd6878e8f30826289db11..532ecab6f62ac259e2c4624d2bbfc217e4907507 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -367,7 +367,7 @@ def preprocess_code(out, codes, codes_file, checker, output_col, metadata_df): return out # Perform QA Checks on columns individually and append to df -def preprocess(df, file, target_code_type=None, codes_file=None, translate=True,): +def preprocess(df, file, target_code_type=None, codes_file=None): """ Parses each column individually - Order and length will not be preserved! """ out = pd.DataFrame([]) # create output df to append to @@ -406,7 +406,7 @@ def preprocess(df, file, target_code_type=None, codes_file=None, translate=True, return out, meta_columns # Translate Df with multiple codes into single code type Series -def convert_codes(df, target_code_type): +def translate_codes(df, target_code_type): codes = pd.Series([], dtype=str) # Convert codes to target type @@ -440,7 +440,7 @@ def map_file(df, target_code_type, out, concepts, meta_columns=[]): # seperate out meta_columns metadata_df = df[meta_columns] df = df.drop(columns=meta_columns) - codes = convert_codes(df, target_code_type) + codes = translate_codes(df, target_code_type) codes = codes.dropna() # delete NaNs # Append to out df @@ -464,10 +464,9 @@ def sql_row_exist(conn, table, column, value): return exists -def map(phen_dir, target_code_type, translate=True): +def map(phen_dir, target_code_type): logger.info(f"Processing phenotype: {phen_dir}") logger.debug(f"Target coding format: {target_code_type}") - logger.debug(f"Translating: {translate}") # Validate configuration validate(phen_dir) @@ -505,8 +504,7 @@ def map(phen_dir, target_code_type, translate=True): df, meta_columns = preprocess(df, file, codes_file=str(codes_file_path.resolve()), - target_code_type=target_code_type, - translate=translate) + target_code_type=target_code_type) # partition table by categorical column if ("actions" in file and "divide_col" in file["actions"] and len(df) > 0): @@ -533,7 +531,7 @@ def map(phen_dir, target_code_type, translate=True): concepts=file["concept_set_categories"][cat], meta_columns=meta_columns,) - # test if there's any output from processing + # Check there is output from processing if len(out.index) == 0: raise Exception(f"No output after map processing, check configuration {str(config_path.resolve())}") @@ -552,10 +550,7 @@ def map(phen_dir, target_code_type, translate=True): out = out.merge(concept_sets_df, how="left", on="CONCEPT_SET") # merge with output # Save output to map directory - if translate: - output_filename = target_code_type + '.csv' - else: - output_filename = target_code_type + '_no_translate.csv' + output_filename = target_code_type + '.csv' map_path = phen_path / MAP_DIR / output_filename diff --git a/tests/test_acmc.py b/tests/test_acmc.py index ee92004ce03c3518e17bb71c30dee6b30eb14c3b..874d4d5cd8cf9a71c74625b254ece4301dc6cc40 100644 --- a/tests/test_acmc.py +++ b/tests/test_acmc.py @@ -68,7 +68,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog): # map phenotype with caplog.at_level(logging.DEBUG): - monkeypatch.setattr(sys, "argv", ["main.py", "phen", "map", "-d", str(phen_path.resolve()), "-t", "read3", "-tr"]) + monkeypatch.setattr(sys, "argv", ["main.py", "phen", "map", "-d", str(phen_path.resolve()), "-t", "read3"]) main.main() assert "Phenotype processed successfully" in caplog.text