Skip to content
Snippets Groups Projects
Commit 19918ce3 authored by mjbonifa's avatar mjbonifa
Browse files

standardised code_type import in phen

parent 8c8ce07c
Branches
Tags
No related merge requests found
......@@ -14,8 +14,7 @@ from pathlib import Path
from urllib.parse import urlparse, urlunparse
# acmc imports
from acmc import trud, omop
from acmc.parse import code_types
from acmc import trud, omop, parse
from acmc.omop import publish_concept_sets, setup
# setup logging
......@@ -258,7 +257,7 @@ def validate(phen_dir):
# check columns specified are a supported medical coding type
for column in file['columns']:
if column not in code_types and column != 'metadata':
if column not in parse.code_types and column != 'metadata':
validation_errors.append(f"Column type {column} for file {concept_code_file_path} is not supported")
# check the actions are supported
......@@ -328,6 +327,28 @@ def process_actions(df, file):
return df
def log_invalid_code(codes, mask, code_type=None, file_path=None, cause=None):
# print("ERROR WITH CODES", file_path, codes[~mask])
errors = pd.DataFrame([])
errors["CONCEPT"] = codes[~mask].astype(str)
errors["VOCABULARY"] = code_type
errors["SOURCE"] = file_path
errors["CAUSE"] = cause
#append to error log csv
if os.path.exists(log_errors_path):
print("FILE EXISTS")
df_error = pd.read_csv(log_errors_path)
df_error = pd.concat([df_error, errors])
df_error.to_csv(log_errors_path, index=False)
else:
print("FILE NOT EXIST")
df_error = errors
df_error.to_csv(log_errors_path, index=False)
return codes[mask]
def preprocess_code(out, codes, codes_file, checker, output_col, metadata_df):
# preprocess codes
......@@ -338,6 +359,7 @@ def preprocess_code(out, codes, codes_file, checker, output_col, metadata_df):
if len(errors) > 0:
raise Exception(f"Code validation failed with {len(errors)} errors")
# add metadata columns
out = pd.concat([out, pd.DataFrame({output_col: codes}).join(metadata_df)], ignore_index=True)
......@@ -362,14 +384,14 @@ def preprocess(df, file, target_code_type=None, codes_file=None, translate=True,
out = preprocess_code(out=out,
codes=df[file[columns][target_code_type]].dropna(),
codes_file=codes_file,
checker=code_types[target_code_type](file_path),
checker=parse.code_types[target_code_type](),
output_col=target_code_type,
metadata_df=df[meta_columns])
else:
logger.warning(f"No {target_code_type} Codes to process")
else:
# QA for every code type in df run preprocess_code()
for k, v in code_types.items():
for k, v in parse.code_types.items():
if k in file['columns']:
logger.info(f"Processing {k} Codes...")
out = preprocess_code(out=out,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment