Skip to content
Snippets Groups Projects
Commit 32cf82d6 authored by mjbonifa's avatar mjbonifa
Browse files

Merge branch '22-write-extended-example-using-all-config-elements' into 'dev'

test: added new config3.yaml that includes hanlon. had to refactor phen map to...

Closes #22

See merge request meldb/concepts-processing!8
parents 55bf62f0 d42cf541
No related branches found
No related tags found
No related merge requests found
......@@ -336,10 +336,12 @@ def validate(phen_dir):
)
# check concept_set defined for the mapping
logger.debug(f"file {file}")
for concept_set_mapping in file["concept_set"]:
# store the concept set names found for later set operations
if concept_set_mapping not in concept_set_mapping_names:
concept_set_mapping_names.append(concept_set_mapping)
logger.debug(f"mapping {concept_set_mapping}")
if concept_set_mapping['name'] not in concept_set_mapping_names:
concept_set_mapping_names.append(concept_set_mapping['name'])
else:
validation_errors.append(
f"Missing required elements {required_keys} in codes {item}"
......@@ -489,7 +491,7 @@ def translate_codes(df, target_code_type):
# Append file's codes to output Df with concept
def map_file(df, target_code_type, out, concepts, meta_columns=[]):
def map_file(df, target_code_type, out, concept_names, meta_columns=[]):
# seperate out meta_columns
metadata_df = df[meta_columns]
df = df.drop(columns=meta_columns)
......@@ -502,7 +504,7 @@ def map_file(df, target_code_type, out, concepts, meta_columns=[]):
if len(codes) > 0:
codes = pd.DataFrame({"CONCEPT": codes})
codes = codes.join(metadata_df)
for concept in concepts:
for concept in concept_names:
codes["CONCEPT_SET"] = np.repeat(concept.strip(), len(codes))
out = pd.concat([out, codes])
else:
......@@ -628,52 +630,44 @@ def map(phen_dir, target_code_type):
# partition table by categorical column
if "actions" in file and "divide_col" in file["actions"] and len(df) > 0:
divide_col = file["actions"]["divide_col"]
logger.debug(
"Action: Dividing Table by",
divide_col,
"column into: ",
df[divide_col].unique(),
)
logger.debug(f"Action: Dividing Table by {divide_col} column into: {df[divide_col].unique()}")
df = df.groupby(divide_col)
# Map to Concept/Phenotype
# TODO: This code needs refactory as it seems handling of the concept_set_categories should happen at another place
if len(df.index) != 0:
if ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame):
logger.debug(f"instance of df before if: {type(df)}")
if isinstance(df, pd.core.frame.DataFrame):
concept_names = [concept['name'] for concept in file["concept_set"]]
out = map_file(
df,
target_code_type,
out,
concepts=file["concept_set"],
concept_names=concept_names,
meta_columns=meta_columns,
)
elif ("concept_set_categories" in file) and isinstance(
df, pd.core.groupby.generic.DataFrameGroupBy
):
elif isinstance(df, pd.core.groupby.generic.DataFrameGroupBy):
meta_columns.remove(divide_col) # delete categorical column
for cat, grp in df:
if (
cat in file["concept_set_categories"].keys()
): # check if category is mapped
for concept_set in file['concept_set']:
# what if there's no category, there's going to be an error
if cat == concept_set["category"]:
grp = grp.drop(
columns=[divide_col]
) # delete categorical column
logger.debug("Category:", cat)
logger.debug(f"Mapping category: {cat}")
concept_names = [concept_set["name"]]
out = map_file(
grp,
target_code_type,
out,
concepts=file["concept_set_categories"][cat],
concept_names=concept_names,
meta_columns=meta_columns,
)
else:
raise AttributeError(
f"File {file} has either no concept_set or conceot_set_categories or the instance of dataframe objectives associated is incorrect, concept_set must be a DataFrame, conceot_set_categories must be pd.core.groupby.generic.DataFrameGroupBy"
)
else:
logger.warning(
f"File {file} has no output after preprocessing in config {str(config_path.resolve())}"
)
logger.debug(f"instance of df: {type(df)}")
# raise AttributeError(
# f"File {file} has either no concept_set or conceot_set_categories or the instance of dataframe objectives associated is incorrect, concept_set must be a DataFrame, conceot_set_categories must be pd.core.groupby.generic.DataFrameGroupBy"
# )
if len(code_errors) > 0:
logger.error(f"The map processing has {len(code_errors)} errors")
......
This diff is collapsed.
MMCode,Condition,
1,,
2,Hypertension,
3,Depression,
4,Painful_condition,
5,Asthma,
6,Coronary Heart disease,
7,Treated_dyspepsia,
8,Diabetes,
9,Thyroid_disease,
10,Rheumatoid_arthritis_Inflammatory_arthropathies_and_connective_tissue_disorders,
11,Deafness,
12,COPD,
13,Anxiety,
14,Irritable_bowel_syndrome,
15,Cancer,
16,Alcohol_problem,
17,Other_psychoactive_substance_misuse,
18,Treated_constipation,
19,Stroke_or_TIA,
20,Chronic_kidney_disease,
21,Diverticular_disease,Need to add
22,Atrial_fibrillation,
23,Peripheral_vascular_disease,
24,Heart_failure,
25,Prostate_disorders,
26,Glaucoma,
27,Epilepsy_(Currently_treated),
28,Dementia,
29,Schizophrenia_or_bipolar_disorder,
30,Psoriasis_or_eczema,
31,Inflammatory_bowel_disease,
32,Migraine,
33,Blindness_and_low_vision,
34,Chronic_sinusitis,
35,Learning_disability,
36,Anorexia_or_bulimia,
37,Bronchiectasis,
38,Parkinson's_disease,
39,Multiple_sclerosis,
40,Viral_hepatitis,
41,Chronic_liver_disease,
This diff is collapsed.
This diff is collapsed.
......@@ -6,7 +6,6 @@ concept_sets:
vocabulary_reference: "https://git.soton.ac.uk/meldb/concepts-processing/-/tree/main/examples"
concept_set:
- concept_set_name: "ABDO_PAIN"
concept_set_status: "AGREED"
metadata: {}
codes:
......@@ -19,5 +18,6 @@ codes:
metadata:
- "description"
concept_set:
- "ABDO_PAIN"
- name: "ABDO_PAIN"
......@@ -6,10 +6,8 @@ concept_sets:
vocabulary_reference: "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example"
concept_set:
- concept_set_name: "CVD_EVENTS"
concept_set_status: "AGREED"
metadata: {}
- concept_set_name: "DID_NOT_ATTEND"
concept_set_status: "AGREED"
metadata: {}
codes:
......@@ -21,11 +19,11 @@ codes:
icd10: "code"
metadata: []
concept_set:
- "CVD_EVENTS"
- name: "CVD_EVENTS"
- file: "Non-attendance codes/res201-did-not-attend-appointment.csv"
columns:
read2: "code"
metadata: []
concept_set:
- "DID_NOT_ATTEND"
- name: "DID_NOT_ATTEND"
concept_sets:
version: "v1.0.4"
omop:
vocabulary_id: "ACMC_Example"
vocabulary_name: "ACMC example phenotype"
vocabulary_reference: "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example"
concept_set:
- concept_set_name: "CVD_EVENTS"
metadata: {}
- concept_set_name: "DID_NOT_ATTEND"
metadata: {}
- concept_set_name: "HYPERTENSION"
metadata: {}
- concept_set_name: "DEPRESSION"
metadata: {}
codes:
- folder: "clinical-codes-org"
description: "Downloaded 16/11/23"
files:
- file: "Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv"
columns:
icd10: "code"
metadata: []
concept_set:
- name: "CVD_EVENTS"
- file: "Non-attendance codes/res201-did-not-attend-appointment.csv"
columns:
read2: "code"
metadata: []
concept_set:
- name: "DID_NOT_ATTEND"
- folder: hanlon
description: Hanlon Paper Code Lists
files:
- file: Read_codes_for_diagnoses.csv
columns:
read2: Read Code
actions:
divide_col: MMCode
concept_set:
- name: HYPERTENSION
category: "2"
- name: DEPRESSION
category: "3"
......@@ -51,6 +51,7 @@ def test_phen_init_local_specified(tmp_dir, monkeypatch, caplog):
[
("config1.yaml"), # config.yaml test case
("config2.yaml"), # config.yaml test case
("config3.yaml"), # config.yaml test case
],
)
def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment