Skip to content
Snippets Groups Projects
Commit 8c0be40f authored by mjbonifa's avatar mjbonifa
Browse files

added phen validate as a command to validate the configuration in a specified directory

parent c156c2d0
No related branches found
No related tags found
No related merge requests found
...@@ -26,6 +26,10 @@ def phen_init(args): ...@@ -26,6 +26,10 @@ def phen_init(args):
"""Handle the `phen init` command.""" """Handle the `phen init` command."""
phen.init(args.phen_dir) phen.init(args.phen_dir)
def phen_validate(args):
"""Handle the `phen validate` command."""
phen.validate(args.phen_dir)
def phen_map(args): def phen_map(args):
"""Handle the `phen map` command.""" """Handle the `phen map` command."""
phen.map(args.phen_dir, phen.map(args.phen_dir,
...@@ -74,6 +78,11 @@ def main(): ...@@ -74,6 +78,11 @@ def main():
phen_init_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory") phen_init_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory")
phen_init_parser.set_defaults(func=phen_init) phen_init_parser.set_defaults(func=phen_init)
# phen validate
phen_validate_parser = phen_subparsers.add_parser("validate", help="Validate phenotype configuration")
phen_validate_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory")
phen_validate_parser.set_defaults(func=phen_validate)
# phen map # phen map
phen_map_parser = phen_subparsers.add_parser("map", help="Process phen configuration file") phen_map_parser = phen_subparsers.add_parser("map", help="Process phen configuration file")
phen_map_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory") phen_map_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory")
......
...@@ -97,6 +97,69 @@ def init(phen_dir): ...@@ -97,6 +97,69 @@ def init(phen_dir):
print(f"Phenotype initialised") print(f"Phenotype initialised")
def validate(phen_dir):
print(f"Validating phenotype configuration {phen_dir}")
phen_path = Path(phen_dir)
if not phen_path.is_dir():
raise NotADirectoryError(f"Error: '{phen_path}' is not a directory")
config_path = phen_path / CONFIG_FILE
if not config_path.is_file():
raise FileNotFoundError(f"Error: phen configuration file '{config_path}' does not exist.")
codes_path = phen_path / CODES_DIR
if not codes_path.is_dir():
raise FileNotFoundError(f"Error: source codes directory {source_codes_dir} does not exist.")
# Load configuration File
if config_path.suffix == ".json":
mapping = json.load(open(config_path, "rb"))
else:
raise Exception(f"Unsupported configuration filetype: {str(config_path.resolve())}")
concept_sets = mapping["concept_sets"]
concept_codes = mapping["codes"]
validation_errors = []
concept_set_names = []
for item in concept_sets['concept_set']:
concept_set_names.append(item['concept_set_name'])
for item in concept_codes:
# check concept codes path is a directory
concept_code_dir_path = codes_path / item['folder']
if not concept_code_dir_path.is_dir():
validation_errors.append(f"Folder directory {str(concept_code_dir_path.resolve())} is not a directory")
for file in item["files"]:
# check concepte code file exists
concept_code_file_path = concept_code_dir_path / file['file']
if not concept_code_file_path.exists():
validation_errors.append(f"Coding file {str(concept_code_file_path.resolve())} does not exist")
# check columns specified are a supported medical coding type
for column in file['columns']:
if column not in code_types and column != 'metadata':
validation_errors.append(f"Column type {column} for file {concept_code_file_path} is not supported")
# check concept_set defined for the mapping
for concept_set_mapping in file['concept_set']:
if concept_set_mapping not in concept_set_names:
validation_errors.append(f"Concept set name {concept_set_mapping} for file {concept_code_file_path} does not exist in concept set list")
# check the actions are supported
if 'actions' in file:
for action in file['actions']:
if action not in COL_ACTIONS:
validation_errors.append(f"Action {action} is not supported")
if len(validation_errors) > 0:
print(validation_errors)
raise Exception(f"Configuration file {str(config_path.resolve())} failed validation")
print(f"Phenotype configuration validated successfully")
def read_table_file(path, excel_sheet=None): def read_table_file(path, excel_sheet=None):
""" """
Load Code List File Load Code List File
...@@ -115,7 +178,6 @@ def read_table_file(path, excel_sheet=None): ...@@ -115,7 +178,6 @@ def read_table_file(path, excel_sheet=None):
return df return df
def preprocess_code(out, codes, checker, output_col, df_meta, verify=True): def preprocess_code(out, codes, checker, output_col, df_meta, verify=True):
codes = codes.astype(str) # convert to string codes = codes.astype(str) # convert to string
codes = codes.str.strip() # remove excess spaces codes = codes.str.strip() # remove excess spaces
...@@ -226,46 +288,6 @@ def map_file(df, target_code_type, out, concepts, meta_columns=[], translate=Tru ...@@ -226,46 +288,6 @@ def map_file(df, target_code_type, out, concepts, meta_columns=[], translate=Tru
out = pd.concat([out, codes]) out = pd.concat([out, codes])
return out return out
def validate_config(codes_path, mapping):
concept_sets = mapping["concept_sets"]
concept_codes = mapping["codes"]
validation_errors = []
concept_set_names = []
for item in concept_sets['concept_set']:
concept_set_names.append(item['concept_set_name'])
for item in concept_codes:
# check concept codes path is a directory
concept_code_dir_path = codes_path / item['folder']
if not concept_code_dir_path.is_dir():
validation_errors.append(f"Folder directory {str(concept_code_dir_path.resolve())} is not a directory")
for file in item["files"]:
# check concepte code file exists
concept_code_file_path = concept_code_dir_path / file['file']
if not concept_code_file_path.exists():
validation_errors.append(f"Coding file {str(concept_code_file_path.resolve())} does not exist")
# check columns specified are a supported medical coding type
for column in file['columns']:
if column not in code_types and column != 'metadata':
validation_errors.append(f"Column type {column} for file {concept_code_file_path} is not supported")
# check concept_set defined for the mapping
for concept_set_mapping in file['concept_set']:
if concept_set_mapping not in concept_set_names:
validation_errors.append(f"Concept set name {concept_set_mapping} for file {concept_code_file_path} does not exist in concept set list")
# check the actions are supported
if 'actions' in file:
for action in file['actions']:
if action not in COL_ACTIONS:
validation_errors.append(f"Action {action} is not supported")
return validation_errors
def sql_row_exist(conn, table, column, value): def sql_row_exist(conn, table, column, value):
# Execute and check if a result exists # Execute and check if a result exists
cur = conn.cursor() cur = conn.cursor()
...@@ -290,28 +312,14 @@ def map(phen_dir, ...@@ -290,28 +312,14 @@ def map(phen_dir,
else: else:
print("Not verifying codes.") print("Not verifying codes.")
phen_path = Path(phen_dir) # Validate the configuration
if not phen_path.is_dir(): validate(phen_dir)
raise NotADirectoryError(f"Error: '{phen_path}' is not a directory")
phen_path = Path(phen_dir)
config_path = phen_path / CONFIG_FILE config_path = phen_path / CONFIG_FILE
if not config_path.is_file():
raise FileNotFoundError(f"Error: phen configuration file '{config_path}' does not exist.")
codes_path = phen_path / CODES_DIR codes_path = phen_path / CODES_DIR
if not codes_path.is_dir():
raise FileNotFoundError(f"Error: source codes directory {source_codes_dir} does not exist.")
# Load configuration File
if config_path.suffix == ".json":
mapping = json.load(open(config_path, "rb")) mapping = json.load(open(config_path, "rb"))
validation_errors = validate_config(codes_path, mapping)
if len(validation_errors) > 0:
print(validation_errors)
raise Exception(f"Configuration file {str(config_path.resolve())} failed validation")
else:
raise Exception(f"Unsupported configuration filetype: {str(config_path.resolve())}")
summary_config = mapping["concept_sets"] summary_config = mapping["concept_sets"]
folders = mapping["codes"] folders = mapping["codes"]
out = pd.DataFrame([]) # Create Output dataframe to append to out = pd.DataFrame([]) # Create Output dataframe to append to
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment