From 28f44a19540e968f0da1e2f237f8c9021feb5f95 Mon Sep 17 00:00:00 2001
From: Jakub Dylag <jjd1c23@soton.ac.uk>
Date: Fri, 24 Jan 2025 13:49:21 +0000
Subject: [PATCH] rename JSON "meldb_phenotypes" -> "concept_set"

---
 README.md |  8 ++++----
 main.py   | 10 +++++-----
 report.py |  8 ++++----
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index aa00cae..0eab45d 100644
--- a/README.md
+++ b/README.md
@@ -103,7 +103,7 @@ The JSON configuration file specifies how input codes are grouped into **concept
 
 ### Folder and File Definitions
 
-The `"codes"` section defines the location and description of all input files required for processing. Each `"folder"` is defined as an object of within the `"codes"` list. Similarily all files are objects within the `"files"` list.
+The `"codes"` section defines the location and description of all input medical coding lists required for processing. Each `"folder"` is defined as an object of within the `"codes"` list. Similarily all files are objects within the `"files"` list.
 
 - **`folder`**: Specifies the directory containing the input files.  
 - **`description`**: Provides a brief summary of the content or purpose of the files, often including additional context such as the date the data was downloaded.  
@@ -163,9 +163,9 @@ The `"concept_sets"` object defines the structure and rules for grouping input c
   - **`concept_set_status`**: Maps to the column indicating the status of the concept set. Only concept sets the **"AGREED"** status will be outputted! 
   - **`metadata`**: A list of additional columns in the CSV file that should be copied to the output for descriptive or contextual purposes.
 
-The `"codes"` object specifies the source files containing input codes and assigns them to the corresponding concept sets through the `"meldb_phenotypes"` field. 
+The `"codes"` object specifies the source files containing input codes and assigns them to the corresponding concept sets through the `"concept_set"` field. 
 
- - **`meldb_phenotypes`**: Lists the concept sets to which all codes within this file will be assigned.
+ - **`concept_set`**: Lists the concept sets to which all codes within this file will be assigned.
 
 ```json
 {
@@ -185,7 +185,7 @@ The `"codes"` object specifies the source files containing input codes and assig
 			"files": [
 				{
 					"file": "WP02_SAIL_WILK_matched_drug_codes_with_categories.xlsx",
-					"meldb_phenotypes": ["ALL_MEDICATIONS"]
+					"concept_set": ["ALL_MEDICATIONS"]
 				}
 			]
 		}
diff --git a/main.py b/main.py
index f1c082a..81832ed 100644
--- a/main.py
+++ b/main.py
@@ -226,21 +226,21 @@ def run_all(mapping_file, target_code_type,
 				if len(df) == 0:
 					pass
 					# out = df
-				elif ("meldb_phenotypes" in file) and isinstance(df, pd.core.frame.DataFrame):
+				elif ("concept_set" in file) and isinstance(df, pd.core.frame.DataFrame):
 					out = map_file(df,
 								   target_code_type,
 								   out, 
-								   concepts=file["meldb_phenotypes"],
+								   concepts=file["concept_set"],
 								   meta_columns=meta_columns,
 								   no_translate=no_translate)
-				elif ("meldb_phenotypes_categories" in file) and isinstance(df, pd.core.groupby.generic.DataFrameGroupBy):
+				elif ("concept_set_categories" in file) and isinstance(df, pd.core.groupby.generic.DataFrameGroupBy):
 					meta_columns.remove(divide_col) #delete categorical column
 					for cat, grp in df:		
-						if cat in file["meldb_phenotypes_categories"].keys(): #check if category is mapped
+						if cat in file["concept_set_categories"].keys(): #check if category is mapped
 							grp = grp.drop(columns=[divide_col]) #delete categorical column
 							print("Category:", cat)
 							out = map_file(grp, target_code_type, out, 
-										   concepts = file["meldb_phenotypes_categories"][cat],
+										   concepts = file["concept_set_categories"][cat],
 										   meta_columns=meta_columns)
 					
 		else:
diff --git a/report.py b/report.py
index c36a298..d4069c3 100644
--- a/report.py
+++ b/report.py
@@ -11,11 +11,11 @@ def get_json_files(folders):
         if "files" in folder:
             for file in folder["files"]:
                 file_path = folder["folder"]+"/"+file["file"]
-                if "meldb_phenotypes" in file:
-                    for concept in file["meldb_phenotypes"]:
+                if "concept_set" in file:
+                    for concept in file["concept_set"]:
                         out.append({"json_concept":concept, "filepath":file_path, "json_code_types":list(file["columns"].keys())})
-                elif "meldb_phenotypes_categories" in file:
-                    for code, concept in file["meldb_phenotypes_categories"].items():
+                elif "concept_set_categories" in file:
+                    for code, concept in file["concept_set_categories"].items():
                         out.append({"json_concept":concept[0], "filepath":file_path, "json_code_types":list(file["columns"].keys())})
                 else:
                     out.append({"json_concept":None, "filepath":file_path})
-- 
GitLab