Skip to content
Snippets Groups Projects
Commit 75615a67 authored by mjbonifa's avatar mjbonifa
Browse files

fixed state but between test and running from the command line

parent ab787128
No related branches found
No related tags found
No related merge requests found
...@@ -15,7 +15,7 @@ from urllib.parse import urlparse, urlunparse ...@@ -15,7 +15,7 @@ from urllib.parse import urlparse, urlunparse
# acmc imports # acmc imports
from acmc import trud, omop from acmc import trud, omop
from acmc.parse import Read2, Read3, Icd10, Snomed, Opcs4, Atc, code_types, vocab_types from acmc.parse import code_types
from acmc.omop import publish_concept_sets, setup from acmc.omop import publish_concept_sets, setup
# setup logging # setup logging
...@@ -33,7 +33,6 @@ CONCEPT_SET_DIR = 'concept-set' ...@@ -33,7 +33,6 @@ CONCEPT_SET_DIR = 'concept-set'
DEFAULT_PHEN_DIR_LIST = [CODES_DIR, MAP_DIR, CONCEPT_SET_DIR] DEFAULT_PHEN_DIR_LIST = [CODES_DIR, MAP_DIR, CONCEPT_SET_DIR]
CONFIG_FILE = 'config.json' CONFIG_FILE = 'config.json'
ERROR_FILE = 'errors.csv'
REPORT_FILE = 'report.md' REPORT_FILE = 'report.md'
DEFAULT_GIT_BRANCH = 'main' DEFAULT_GIT_BRANCH = 'main'
...@@ -362,10 +361,8 @@ def convert_codes(df, target, translate): ...@@ -362,10 +361,8 @@ def convert_codes(df, target, translate):
# Append target column (if exists) - doesn't need conversion # Append target column (if exists) - doesn't need conversion
if target in df.columns: if target in df.columns:
logger.debug("Has", len(df), target, "in file") logger.debug(f"Has {len(df)} {target} in file")
codes = pd.concat([codes, df[target]]) codes = pd.concat([codes, df[target]])
# else:
# logger.debug("No",target,"in file")
if translate: if translate:
# Convert codes to target type # Convert codes to target type
...@@ -439,8 +436,6 @@ def map(phen_dir, target_code_type, translate=True): ...@@ -439,8 +436,6 @@ def map(phen_dir, target_code_type, translate=True):
# Process each folder in codes section # Process each folder in codes section
for folder in codes: for folder in codes:
logger.debug(folder["description"])
if "files" in folder:
for file in folder["files"]: for file in folder["files"]:
logger.debug(f"--- {file["file"]} ---") logger.debug(f"--- {file["file"]} ---")
codes_file_path = codes_path / folder["folder"] / file["file"] codes_file_path = codes_path / folder["folder"] / file["file"]
...@@ -453,7 +448,7 @@ def map(phen_dir, target_code_type, translate=True): ...@@ -453,7 +448,7 @@ def map(phen_dir, target_code_type, translate=True):
# Perform Structural Changes to file before preprocessing # Perform Structural Changes to file before preprocessing
# split column with multiple code types # split column with multiple code types
logger.debug("Processing actions") logger.debug("Processing file structural actions")
if ("actions" in file and "split_col" in file["actions"] and "codes_col" in file["actions"]): if ("actions" in file and "split_col" in file["actions"] and "codes_col" in file["actions"]):
split_col = file["actions"]["split_col"] split_col = file["actions"]["split_col"]
codes_col = file["actions"]["codes_col"] codes_col = file["actions"]["codes_col"]
...@@ -465,7 +460,7 @@ def map(phen_dir, target_code_type, translate=True): ...@@ -465,7 +460,7 @@ def map(phen_dir, target_code_type, translate=True):
df = pd.concat([df, oh], axis=1) # merge in new columns df = pd.concat([df, oh], axis=1) # merge in new columns
# Preprocessing & Validation Checks # Preprocessing & Validation Checks
if "columns" in file: logger.debug("Processing and validating code formats")
meta_columns = [] # meta columns to keep with codes meta_columns = [] # meta columns to keep with codes
if "actions" in file and "divide_col" in file["actions"]: if "actions" in file and "divide_col" in file["actions"]:
meta_columns += [file["actions"]["divide_col"]] meta_columns += [file["actions"]["divide_col"]]
...@@ -478,8 +473,6 @@ def map(phen_dir, target_code_type, translate=True): ...@@ -478,8 +473,6 @@ def map(phen_dir, target_code_type, translate=True):
codes_file=str(codes_file_path.resolve()), codes_file=str(codes_file_path.resolve()),
target_code_type=target_code_type, target_code_type=target_code_type,
translate=translate) translate=translate)
else:
raise Exception("No column format provided")
# partition table by categorical column # partition table by categorical column
if ("actions" in file and "divide_col" in file["actions"] and len(df) > 0): if ("actions" in file and "divide_col" in file["actions"] and len(df) > 0):
...@@ -488,9 +481,8 @@ def map(phen_dir, target_code_type, translate=True): ...@@ -488,9 +481,8 @@ def map(phen_dir, target_code_type, translate=True):
df = df.groupby(divide_col) df = df.groupby(divide_col)
# Map to Concept/Phenotype # Map to Concept/Phenotype
if len(df) == 0: if len(df.index) != 0:
pass if ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame):
elif ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame):
out = map_file(df, out = map_file(df,
target_code_type, out, target_code_type, out,
concepts=file["concept_set"], concepts=file["concept_set"],
...@@ -508,12 +500,9 @@ def map(phen_dir, target_code_type, translate=True): ...@@ -508,12 +500,9 @@ def map(phen_dir, target_code_type, translate=True):
concepts=file["concept_set_categories"][cat], concepts=file["concept_set_categories"][cat],
meta_columns=meta_columns,) meta_columns=meta_columns,)
else:
logger.warning("Folder is empty")
# test if there's any output from processing # test if there's any output from processing
if len(out) <= 0: if len(out.index) == 0:
raise Exception("Processing has not produced any output") raise Exception("The output after map processing has no output, check configuration file {str(config_path.resolve())} is not empty")
# Final processing # Final processing
out = out.reset_index(drop=True) out = out.reset_index(drop=True)
...@@ -565,14 +554,6 @@ def map(phen_dir, target_code_type, translate=True): ...@@ -565,14 +554,6 @@ def map(phen_dir, target_code_type, translate=True):
shutil.copy(trud.VERSION_PATH, phen_path / trud.VERSION_FILE) shutil.copy(trud.VERSION_PATH, phen_path / trud.VERSION_FILE)
shutil.copy(omop.VERSION_PATH, phen_path / omop.VERSION_FILE) shutil.copy(omop.VERSION_PATH, phen_path / omop.VERSION_FILE)
# write erros to a file
error_path = phen_path / ERROR_FILE
if error_path.exists():
error_df = pd.read_csv(error_path)
error_df = error_df.drop_duplicates() # Remove Duplicates from Error file
error_df = error_df.sort_values(by=["SOURCE", "VOCABULARY", "CONCEPT"])
error_df.to_csv(error_path, index=False)
logger.debug(f"Saved concept_sets to {str(concept_set_path.resolve())}") logger.debug(f"Saved concept_sets to {str(concept_set_path.resolve())}")
logger.info(f"Phenotype processed successfully") logger.info(f"Phenotype processed successfully")
......
...@@ -29,14 +29,6 @@ def logger(): ...@@ -29,14 +29,6 @@ def logger():
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
def test_phen_init_local_default(tmp_dir, monkeypatch, caplog):
with caplog.at_level(logging.DEBUG):
monkeypatch.setattr(sys, "argv", ["main.py", "phen", "init"])
# Mock input() to return "yes" to the question about reinitialising the directory
monkeypatch.setattr("builtins.input", lambda _: "y")
main.main()
assert "Phenotype initialised successfully" in caplog.text
def test_phen_init_local_specified(tmp_dir, monkeypatch, caplog): def test_phen_init_local_specified(tmp_dir, monkeypatch, caplog):
with caplog.at_level(logging.DEBUG): with caplog.at_level(logging.DEBUG):
phen_path = tmp_dir / "phen" phen_path = tmp_dir / "phen"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment