Skip to content
Snippets Groups Projects
Commit 75615a67 authored by mjbonifa's avatar mjbonifa
Browse files

fixed state but between test and running from the command line

parent ab787128
No related branches found
No related tags found
No related merge requests found
......@@ -15,7 +15,7 @@ from urllib.parse import urlparse, urlunparse
# acmc imports
from acmc import trud, omop
from acmc.parse import Read2, Read3, Icd10, Snomed, Opcs4, Atc, code_types, vocab_types
from acmc.parse import code_types
from acmc.omop import publish_concept_sets, setup
# setup logging
......@@ -33,7 +33,6 @@ CONCEPT_SET_DIR = 'concept-set'
DEFAULT_PHEN_DIR_LIST = [CODES_DIR, MAP_DIR, CONCEPT_SET_DIR]
CONFIG_FILE = 'config.json'
ERROR_FILE = 'errors.csv'
REPORT_FILE = 'report.md'
DEFAULT_GIT_BRANCH = 'main'
......@@ -362,10 +361,8 @@ def convert_codes(df, target, translate):
# Append target column (if exists) - doesn't need conversion
if target in df.columns:
logger.debug("Has", len(df), target, "in file")
logger.debug(f"Has {len(df)} {target} in file")
codes = pd.concat([codes, df[target]])
# else:
# logger.debug("No",target,"in file")
if translate:
# Convert codes to target type
......@@ -439,8 +436,6 @@ def map(phen_dir, target_code_type, translate=True):
# Process each folder in codes section
for folder in codes:
logger.debug(folder["description"])
if "files" in folder:
for file in folder["files"]:
logger.debug(f"--- {file["file"]} ---")
codes_file_path = codes_path / folder["folder"] / file["file"]
......@@ -453,7 +448,7 @@ def map(phen_dir, target_code_type, translate=True):
# Perform Structural Changes to file before preprocessing
# split column with multiple code types
logger.debug("Processing actions")
logger.debug("Processing file structural actions")
if ("actions" in file and "split_col" in file["actions"] and "codes_col" in file["actions"]):
split_col = file["actions"]["split_col"]
codes_col = file["actions"]["codes_col"]
......@@ -465,7 +460,7 @@ def map(phen_dir, target_code_type, translate=True):
df = pd.concat([df, oh], axis=1) # merge in new columns
# Preprocessing & Validation Checks
if "columns" in file:
logger.debug("Processing and validating code formats")
meta_columns = [] # meta columns to keep with codes
if "actions" in file and "divide_col" in file["actions"]:
meta_columns += [file["actions"]["divide_col"]]
......@@ -478,8 +473,6 @@ def map(phen_dir, target_code_type, translate=True):
codes_file=str(codes_file_path.resolve()),
target_code_type=target_code_type,
translate=translate)
else:
raise Exception("No column format provided")
# partition table by categorical column
if ("actions" in file and "divide_col" in file["actions"] and len(df) > 0):
......@@ -488,9 +481,8 @@ def map(phen_dir, target_code_type, translate=True):
df = df.groupby(divide_col)
# Map to Concept/Phenotype
if len(df) == 0:
pass
elif ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame):
if len(df.index) != 0:
if ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame):
out = map_file(df,
target_code_type, out,
concepts=file["concept_set"],
......@@ -508,12 +500,9 @@ def map(phen_dir, target_code_type, translate=True):
concepts=file["concept_set_categories"][cat],
meta_columns=meta_columns,)
else:
logger.warning("Folder is empty")
# test if there's any output from processing
if len(out) <= 0:
raise Exception("Processing has not produced any output")
if len(out.index) == 0:
raise Exception("The output after map processing has no output, check configuration file {str(config_path.resolve())} is not empty")
# Final processing
out = out.reset_index(drop=True)
......@@ -565,14 +554,6 @@ def map(phen_dir, target_code_type, translate=True):
shutil.copy(trud.VERSION_PATH, phen_path / trud.VERSION_FILE)
shutil.copy(omop.VERSION_PATH, phen_path / omop.VERSION_FILE)
# write erros to a file
error_path = phen_path / ERROR_FILE
if error_path.exists():
error_df = pd.read_csv(error_path)
error_df = error_df.drop_duplicates() # Remove Duplicates from Error file
error_df = error_df.sort_values(by=["SOURCE", "VOCABULARY", "CONCEPT"])
error_df.to_csv(error_path, index=False)
logger.debug(f"Saved concept_sets to {str(concept_set_path.resolve())}")
logger.info(f"Phenotype processed successfully")
......
......@@ -29,14 +29,6 @@ def logger():
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
def test_phen_init_local_default(tmp_dir, monkeypatch, caplog):
with caplog.at_level(logging.DEBUG):
monkeypatch.setattr(sys, "argv", ["main.py", "phen", "init"])
# Mock input() to return "yes" to the question about reinitialising the directory
monkeypatch.setattr("builtins.input", lambda _: "y")
main.main()
assert "Phenotype initialised successfully" in caplog.text
def test_phen_init_local_specified(tmp_dir, monkeypatch, caplog):
with caplog.at_level(logging.DEBUG):
phen_path = tmp_dir / "phen"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment