Skip to content
Snippets Groups Projects
Commit 19918ce3 authored by mjbonifa's avatar mjbonifa
Browse files

standardised code_type import in phen

parent 8c8ce07c
No related branches found
No related tags found
No related merge requests found
...@@ -14,8 +14,7 @@ from pathlib import Path ...@@ -14,8 +14,7 @@ from pathlib import Path
from urllib.parse import urlparse, urlunparse from urllib.parse import urlparse, urlunparse
# acmc imports # acmc imports
from acmc import trud, omop from acmc import trud, omop, parse
from acmc.parse import code_types
from acmc.omop import publish_concept_sets, setup from acmc.omop import publish_concept_sets, setup
# setup logging # setup logging
...@@ -258,7 +257,7 @@ def validate(phen_dir): ...@@ -258,7 +257,7 @@ def validate(phen_dir):
# check columns specified are a supported medical coding type # check columns specified are a supported medical coding type
for column in file['columns']: for column in file['columns']:
if column not in code_types and column != 'metadata': if column not in parse.code_types and column != 'metadata':
validation_errors.append(f"Column type {column} for file {concept_code_file_path} is not supported") validation_errors.append(f"Column type {column} for file {concept_code_file_path} is not supported")
# check the actions are supported # check the actions are supported
...@@ -328,6 +327,28 @@ def process_actions(df, file): ...@@ -328,6 +327,28 @@ def process_actions(df, file):
return df return df
def log_invalid_code(codes, mask, code_type=None, file_path=None, cause=None):
# print("ERROR WITH CODES", file_path, codes[~mask])
errors = pd.DataFrame([])
errors["CONCEPT"] = codes[~mask].astype(str)
errors["VOCABULARY"] = code_type
errors["SOURCE"] = file_path
errors["CAUSE"] = cause
#append to error log csv
if os.path.exists(log_errors_path):
print("FILE EXISTS")
df_error = pd.read_csv(log_errors_path)
df_error = pd.concat([df_error, errors])
df_error.to_csv(log_errors_path, index=False)
else:
print("FILE NOT EXIST")
df_error = errors
df_error.to_csv(log_errors_path, index=False)
return codes[mask]
def preprocess_code(out, codes, codes_file, checker, output_col, metadata_df): def preprocess_code(out, codes, codes_file, checker, output_col, metadata_df):
# preprocess codes # preprocess codes
...@@ -338,6 +359,7 @@ def preprocess_code(out, codes, codes_file, checker, output_col, metadata_df): ...@@ -338,6 +359,7 @@ def preprocess_code(out, codes, codes_file, checker, output_col, metadata_df):
if len(errors) > 0: if len(errors) > 0:
raise Exception(f"Code validation failed with {len(errors)} errors") raise Exception(f"Code validation failed with {len(errors)} errors")
# add metadata columns # add metadata columns
out = pd.concat([out, pd.DataFrame({output_col: codes}).join(metadata_df)], ignore_index=True) out = pd.concat([out, pd.DataFrame({output_col: codes}).join(metadata_df)], ignore_index=True)
...@@ -362,14 +384,14 @@ def preprocess(df, file, target_code_type=None, codes_file=None, translate=True, ...@@ -362,14 +384,14 @@ def preprocess(df, file, target_code_type=None, codes_file=None, translate=True,
out = preprocess_code(out=out, out = preprocess_code(out=out,
codes=df[file[columns][target_code_type]].dropna(), codes=df[file[columns][target_code_type]].dropna(),
codes_file=codes_file, codes_file=codes_file,
checker=code_types[target_code_type](file_path), checker=parse.code_types[target_code_type](),
output_col=target_code_type, output_col=target_code_type,
metadata_df=df[meta_columns]) metadata_df=df[meta_columns])
else: else:
logger.warning(f"No {target_code_type} Codes to process") logger.warning(f"No {target_code_type} Codes to process")
else: else:
# QA for every code type in df run preprocess_code() # QA for every code type in df run preprocess_code()
for k, v in code_types.items(): for k, v in parse.code_types.items():
if k in file['columns']: if k in file['columns']:
logger.info(f"Processing {k} Codes...") logger.info(f"Processing {k} Codes...")
out = preprocess_code(out=out, out = preprocess_code(out=out,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment