Skip to content
Snippets Groups Projects
Commit e7c14011 authored by mjbonifa's avatar mjbonifa
Browse files

removed the translate flag from teh phen code, not necessary and made the...

removed the translate flag from teh phen code, not necessary and made the logic a little confusing to understand
parent 3c02f68f
No related branches found
No related tags found
No related merge requests found
......@@ -37,8 +37,7 @@ def phen_validate(args):
def phen_map(args):
"""Handle the `phen map` command."""
phen.map(args.phen_dir,
args.target_coding,
args.translate)
args.target_coding)
def phen_publish(args):
"""Handle the `phen publish` command."""
......@@ -114,8 +113,6 @@ def main():
phen_map_parser = phen_subparsers.add_parser("map", help="Process phen mapping")
phen_map_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory")
phen_map_parser.add_argument("-t", "--target-coding", required=True, choices=['read2', 'read3', 'icd10', 'snomed', 'opcs4'], help="Specify the target coding (read2, read3, icd10, snomed, opcs4)")
# phen map flags
phen_map_parser.add_argument("-tr", "--translate", action="store_true", default=False, help="Translate code types")
phen_map_parser.set_defaults(func=phen_map)
# phen publish
......
......@@ -367,7 +367,7 @@ def preprocess_code(out, codes, codes_file, checker, output_col, metadata_df):
return out
# Perform QA Checks on columns individually and append to df
def preprocess(df, file, target_code_type=None, codes_file=None, translate=True,):
def preprocess(df, file, target_code_type=None, codes_file=None):
""" Parses each column individually - Order and length will not be preserved! """
out = pd.DataFrame([]) # create output df to append to
......@@ -406,7 +406,7 @@ def preprocess(df, file, target_code_type=None, codes_file=None, translate=True,
return out, meta_columns
# Translate Df with multiple codes into single code type Series
def convert_codes(df, target_code_type):
def translate_codes(df, target_code_type):
codes = pd.Series([], dtype=str)
# Convert codes to target type
......@@ -440,7 +440,7 @@ def map_file(df, target_code_type, out, concepts, meta_columns=[]):
# seperate out meta_columns
metadata_df = df[meta_columns]
df = df.drop(columns=meta_columns)
codes = convert_codes(df, target_code_type)
codes = translate_codes(df, target_code_type)
codes = codes.dropna() # delete NaNs
# Append to out df
......@@ -464,10 +464,9 @@ def sql_row_exist(conn, table, column, value):
return exists
def map(phen_dir, target_code_type, translate=True):
def map(phen_dir, target_code_type):
logger.info(f"Processing phenotype: {phen_dir}")
logger.debug(f"Target coding format: {target_code_type}")
logger.debug(f"Translating: {translate}")
# Validate configuration
validate(phen_dir)
......@@ -505,8 +504,7 @@ def map(phen_dir, target_code_type, translate=True):
df, meta_columns = preprocess(df,
file,
codes_file=str(codes_file_path.resolve()),
target_code_type=target_code_type,
translate=translate)
target_code_type=target_code_type)
# partition table by categorical column
if ("actions" in file and "divide_col" in file["actions"] and len(df) > 0):
......@@ -533,7 +531,7 @@ def map(phen_dir, target_code_type, translate=True):
concepts=file["concept_set_categories"][cat],
meta_columns=meta_columns,)
# test if there's any output from processing
# Check there is output from processing
if len(out.index) == 0:
raise Exception(f"No output after map processing, check configuration {str(config_path.resolve())}")
......@@ -552,10 +550,7 @@ def map(phen_dir, target_code_type, translate=True):
out = out.merge(concept_sets_df, how="left", on="CONCEPT_SET") # merge with output
# Save output to map directory
if translate:
output_filename = target_code_type + '.csv'
else:
output_filename = target_code_type + '_no_translate.csv'
output_filename = target_code_type + '.csv'
map_path = phen_path / MAP_DIR / output_filename
......
......@@ -68,7 +68,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog):
# map phenotype
with caplog.at_level(logging.DEBUG):
monkeypatch.setattr(sys, "argv", ["main.py", "phen", "map", "-d", str(phen_path.resolve()), "-t", "read3", "-tr"])
monkeypatch.setattr(sys, "argv", ["main.py", "phen", "map", "-d", str(phen_path.resolve()), "-t", "read3"])
main.main()
assert "Phenotype processed successfully" in caplog.text
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment