Skip to content
Snippets Groups Projects
Commit 32cf82d6 authored by mjbonifa's avatar mjbonifa
Browse files

Merge branch '22-write-extended-example-using-all-config-elements' into 'dev'

test: added new config3.yaml that includes hanlon. had to refactor phen map to...

Closes #22

See merge request meldb/concepts-processing!8
parents 55bf62f0 d42cf541
No related branches found
No related tags found
No related merge requests found
...@@ -336,10 +336,12 @@ def validate(phen_dir): ...@@ -336,10 +336,12 @@ def validate(phen_dir):
) )
# check concept_set defined for the mapping # check concept_set defined for the mapping
logger.debug(f"file {file}")
for concept_set_mapping in file["concept_set"]: for concept_set_mapping in file["concept_set"]:
# store the concept set names found for later set operations # store the concept set names found for later set operations
if concept_set_mapping not in concept_set_mapping_names: logger.debug(f"mapping {concept_set_mapping}")
concept_set_mapping_names.append(concept_set_mapping) if concept_set_mapping['name'] not in concept_set_mapping_names:
concept_set_mapping_names.append(concept_set_mapping['name'])
else: else:
validation_errors.append( validation_errors.append(
f"Missing required elements {required_keys} in codes {item}" f"Missing required elements {required_keys} in codes {item}"
...@@ -489,7 +491,7 @@ def translate_codes(df, target_code_type): ...@@ -489,7 +491,7 @@ def translate_codes(df, target_code_type):
# Append file's codes to output Df with concept # Append file's codes to output Df with concept
def map_file(df, target_code_type, out, concepts, meta_columns=[]): def map_file(df, target_code_type, out, concept_names, meta_columns=[]):
# seperate out meta_columns # seperate out meta_columns
metadata_df = df[meta_columns] metadata_df = df[meta_columns]
df = df.drop(columns=meta_columns) df = df.drop(columns=meta_columns)
...@@ -502,7 +504,7 @@ def map_file(df, target_code_type, out, concepts, meta_columns=[]): ...@@ -502,7 +504,7 @@ def map_file(df, target_code_type, out, concepts, meta_columns=[]):
if len(codes) > 0: if len(codes) > 0:
codes = pd.DataFrame({"CONCEPT": codes}) codes = pd.DataFrame({"CONCEPT": codes})
codes = codes.join(metadata_df) codes = codes.join(metadata_df)
for concept in concepts: for concept in concept_names:
codes["CONCEPT_SET"] = np.repeat(concept.strip(), len(codes)) codes["CONCEPT_SET"] = np.repeat(concept.strip(), len(codes))
out = pd.concat([out, codes]) out = pd.concat([out, codes])
else: else:
...@@ -628,52 +630,44 @@ def map(phen_dir, target_code_type): ...@@ -628,52 +630,44 @@ def map(phen_dir, target_code_type):
# partition table by categorical column # partition table by categorical column
if "actions" in file and "divide_col" in file["actions"] and len(df) > 0: if "actions" in file and "divide_col" in file["actions"] and len(df) > 0:
divide_col = file["actions"]["divide_col"] divide_col = file["actions"]["divide_col"]
logger.debug( logger.debug(f"Action: Dividing Table by {divide_col} column into: {df[divide_col].unique()}")
"Action: Dividing Table by",
divide_col,
"column into: ",
df[divide_col].unique(),
)
df = df.groupby(divide_col) df = df.groupby(divide_col)
# Map to Concept/Phenotype # Map to Concept/Phenotype
# TODO: This code needs refactory as it seems handling of the concept_set_categories should happen at another place # TODO: This code needs refactory as it seems handling of the concept_set_categories should happen at another place
if len(df.index) != 0: logger.debug(f"instance of df before if: {type(df)}")
if ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame): if isinstance(df, pd.core.frame.DataFrame):
out = map_file( concept_names = [concept['name'] for concept in file["concept_set"]]
df, out = map_file(
target_code_type, df,
out, target_code_type,
concepts=file["concept_set"], out,
meta_columns=meta_columns, concept_names=concept_names,
) meta_columns=meta_columns,
elif ("concept_set_categories" in file) and isinstance( )
df, pd.core.groupby.generic.DataFrameGroupBy elif isinstance(df, pd.core.groupby.generic.DataFrameGroupBy):
): meta_columns.remove(divide_col) # delete categorical column
meta_columns.remove(divide_col) # delete categorical column for cat, grp in df:
for cat, grp in df: for concept_set in file['concept_set']:
if ( # what if there's no category, there's going to be an error
cat in file["concept_set_categories"].keys() if cat == concept_set["category"]:
): # check if category is mapped
grp = grp.drop( grp = grp.drop(
columns=[divide_col] columns=[divide_col]
) # delete categorical column ) # delete categorical column
logger.debug("Category:", cat) logger.debug(f"Mapping category: {cat}")
concept_names = [concept_set["name"]]
out = map_file( out = map_file(
grp, grp,
target_code_type, target_code_type,
out, out,
concepts=file["concept_set_categories"][cat], concept_names=concept_names,
meta_columns=meta_columns, meta_columns=meta_columns,
) )
else: else:
raise AttributeError( logger.debug(f"instance of df: {type(df)}")
f"File {file} has either no concept_set or conceot_set_categories or the instance of dataframe objectives associated is incorrect, concept_set must be a DataFrame, conceot_set_categories must be pd.core.groupby.generic.DataFrameGroupBy" # raise AttributeError(
) # f"File {file} has either no concept_set or conceot_set_categories or the instance of dataframe objectives associated is incorrect, concept_set must be a DataFrame, conceot_set_categories must be pd.core.groupby.generic.DataFrameGroupBy"
else: # )
logger.warning(
f"File {file} has no output after preprocessing in config {str(config_path.resolve())}"
)
if len(code_errors) > 0: if len(code_errors) > 0:
logger.error(f"The map processing has {len(code_errors)} errors") logger.error(f"The map processing has {len(code_errors)} errors")
......
This diff is collapsed.
MMCode,Condition,
1,,
2,Hypertension,
3,Depression,
4,Painful_condition,
5,Asthma,
6,Coronary Heart disease,
7,Treated_dyspepsia,
8,Diabetes,
9,Thyroid_disease,
10,Rheumatoid_arthritis_Inflammatory_arthropathies_and_connective_tissue_disorders,
11,Deafness,
12,COPD,
13,Anxiety,
14,Irritable_bowel_syndrome,
15,Cancer,
16,Alcohol_problem,
17,Other_psychoactive_substance_misuse,
18,Treated_constipation,
19,Stroke_or_TIA,
20,Chronic_kidney_disease,
21,Diverticular_disease,Need to add
22,Atrial_fibrillation,
23,Peripheral_vascular_disease,
24,Heart_failure,
25,Prostate_disorders,
26,Glaucoma,
27,Epilepsy_(Currently_treated),
28,Dementia,
29,Schizophrenia_or_bipolar_disorder,
30,Psoriasis_or_eczema,
31,Inflammatory_bowel_disease,
32,Migraine,
33,Blindness_and_low_vision,
34,Chronic_sinusitis,
35,Learning_disability,
36,Anorexia_or_bulimia,
37,Bronchiectasis,
38,Parkinson's_disease,
39,Multiple_sclerosis,
40,Viral_hepatitis,
41,Chronic_liver_disease,
This diff is collapsed.
This diff is collapsed.
...@@ -6,7 +6,6 @@ concept_sets: ...@@ -6,7 +6,6 @@ concept_sets:
vocabulary_reference: "https://git.soton.ac.uk/meldb/concepts-processing/-/tree/main/examples" vocabulary_reference: "https://git.soton.ac.uk/meldb/concepts-processing/-/tree/main/examples"
concept_set: concept_set:
- concept_set_name: "ABDO_PAIN" - concept_set_name: "ABDO_PAIN"
concept_set_status: "AGREED"
metadata: {} metadata: {}
codes: codes:
...@@ -19,5 +18,6 @@ codes: ...@@ -19,5 +18,6 @@ codes:
metadata: metadata:
- "description" - "description"
concept_set: concept_set:
- "ABDO_PAIN" - name: "ABDO_PAIN"
...@@ -6,10 +6,8 @@ concept_sets: ...@@ -6,10 +6,8 @@ concept_sets:
vocabulary_reference: "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example" vocabulary_reference: "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example"
concept_set: concept_set:
- concept_set_name: "CVD_EVENTS" - concept_set_name: "CVD_EVENTS"
concept_set_status: "AGREED"
metadata: {} metadata: {}
- concept_set_name: "DID_NOT_ATTEND" - concept_set_name: "DID_NOT_ATTEND"
concept_set_status: "AGREED"
metadata: {} metadata: {}
codes: codes:
...@@ -21,11 +19,11 @@ codes: ...@@ -21,11 +19,11 @@ codes:
icd10: "code" icd10: "code"
metadata: [] metadata: []
concept_set: concept_set:
- "CVD_EVENTS" - name: "CVD_EVENTS"
- file: "Non-attendance codes/res201-did-not-attend-appointment.csv" - file: "Non-attendance codes/res201-did-not-attend-appointment.csv"
columns: columns:
read2: "code" read2: "code"
metadata: [] metadata: []
concept_set: concept_set:
- "DID_NOT_ATTEND" - name: "DID_NOT_ATTEND"
concept_sets:
version: "v1.0.4"
omop:
vocabulary_id: "ACMC_Example"
vocabulary_name: "ACMC example phenotype"
vocabulary_reference: "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example"
concept_set:
- concept_set_name: "CVD_EVENTS"
metadata: {}
- concept_set_name: "DID_NOT_ATTEND"
metadata: {}
- concept_set_name: "HYPERTENSION"
metadata: {}
- concept_set_name: "DEPRESSION"
metadata: {}
codes:
- folder: "clinical-codes-org"
description: "Downloaded 16/11/23"
files:
- file: "Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv"
columns:
icd10: "code"
metadata: []
concept_set:
- name: "CVD_EVENTS"
- file: "Non-attendance codes/res201-did-not-attend-appointment.csv"
columns:
read2: "code"
metadata: []
concept_set:
- name: "DID_NOT_ATTEND"
- folder: hanlon
description: Hanlon Paper Code Lists
files:
- file: Read_codes_for_diagnoses.csv
columns:
read2: Read Code
actions:
divide_col: MMCode
concept_set:
- name: HYPERTENSION
category: "2"
- name: DEPRESSION
category: "3"
...@@ -51,6 +51,7 @@ def test_phen_init_local_specified(tmp_dir, monkeypatch, caplog): ...@@ -51,6 +51,7 @@ def test_phen_init_local_specified(tmp_dir, monkeypatch, caplog):
[ [
("config1.yaml"), # config.yaml test case ("config1.yaml"), # config.yaml test case
("config2.yaml"), # config.yaml test case ("config2.yaml"), # config.yaml test case
("config3.yaml"), # config.yaml test case
], ],
) )
def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file): def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment