diff --git a/main.py b/main.py
index 81832ed393b8d42e30bdd2285c3110b3d783202d..ab32f81b2327f73e395b386fa009d18867216bc2 100644
--- a/main.py
+++ b/main.py
@@ -255,23 +255,14 @@ def run_all(mapping_file, target_code_type,
 	out = out.drop_duplicates(subset=["CONCEPT_SET", "CONCEPT"])
 	out = out.sort_values(by=["CONCEPT_SET", "CONCEPT"])
 	
-	#Merge with Concept Types in Summary Excel File
-	if "excel_sheet" in summary_config:
-		summary_df = read_table_file(summary_config["file"], excel_sheet=summary_config["excel_sheet"])
-	else:
-		summary_df = read_table_file(summary_config["file"])
-	summary_cols_all = [] #get all column names
-	for v in summary_config["columns"].values(): #TODO: put in seperate function - get all columns in JSON file object
-		if type(v) == str:
-			summary_cols_all.append(v)
-		else:
-			summary_cols_all += v
-
-	output_version = summary_config["version"]
-	summary_df = summary_df[summary_cols_all] #select all relevant columns 
-	summary_df = summary_df.rename(columns={summary_config["columns"]["concept_set_name"]: "CONCEPT_SET"})
+	#Add Concept Set Defintions metadata
+	summary_df = pd.DataFrame(summary_config["concept_set"]) #transform to dataframe
+	if "metadata" in summary_df.columns:
+		summary_df = summary_df.join(pd.json_normalize(summary_df["metadata"])) #metadata to columns
+		summary_df = summary_df.drop(columns=["metadata"])
+	summary_df = summary_df.rename(columns={"concept_set_name":"CONCEPT_SET"})
 	summary_df = summary_df.drop_duplicates() #remove duplicates
-	out = out.merge(summary_df, how="left", on='CONCEPT_SET')
+	out = out.merge(summary_df, how="left", on='CONCEPT_SET') #merge with output
 	
 	# Save Output File
 	print(bcolors.HEADER, "---"*5, "OUTPUT", "---"*5, bcolors.ENDC)
diff --git a/process_codes_WP.ipynb b/process_codes_WP.ipynb
index 8be13222375ebc345db334976473ced2d27994df..68e26be260fd3eece0d460dc54adf53d712bb8cc 100644
--- a/process_codes_WP.ipynb
+++ b/process_codes_WP.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "8c8f4cdf-04a5-4762-895e-6555781a1f05",
    "metadata": {},
    "outputs": [],
@@ -113,163 +113,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": null,
    "id": "f155b635-b459-4aff-81b2-e065fc223858",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0    False\n",
-       "dtype: bool"
-      ]
-     },
-     "execution_count": 64,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": []
   },
   {
    "cell_type": "code",
-   "execution_count": 94,
+   "execution_count": null,
    "id": "d040eda5-4028-4047-834c-7315e307e415",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>icd10_code</th>\n",
-       "      <th>icd10_alt_code</th>\n",
-       "      <th>description</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>A00</td>\n",
-       "      <td>A00</td>\n",
-       "      <td>Cholera</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>A00.0</td>\n",
-       "      <td>A000</td>\n",
-       "      <td>Cholera due to Vibrio cholerae 01, biovar chol...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>A00.1</td>\n",
-       "      <td>A001</td>\n",
-       "      <td>Cholera due to Vibrio cholerae 01, biovar eltor</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>A00.9</td>\n",
-       "      <td>A009</td>\n",
-       "      <td>Cholera, unspecified</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>A01</td>\n",
-       "      <td>A01</td>\n",
-       "      <td>Typhoid and paratyphoid fevers</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17929</th>\n",
-       "      <td>U84.3</td>\n",
-       "      <td>U843</td>\n",
-       "      <td>Resistance to tuberculostatic drug(s)</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17930</th>\n",
-       "      <td>U84.7</td>\n",
-       "      <td>U847</td>\n",
-       "      <td>Resistance to multiple antimicrobial drugs</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17931</th>\n",
-       "      <td>U84.8</td>\n",
-       "      <td>U848</td>\n",
-       "      <td>Resistance to other specified antimicrobial drug</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17932</th>\n",
-       "      <td>U84.9</td>\n",
-       "      <td>U849</td>\n",
-       "      <td>Resistance to unspecified antimicrobial drugs</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17933</th>\n",
-       "      <td>U85</td>\n",
-       "      <td>U85X</td>\n",
-       "      <td>Resistance to antineoplastic drugs</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>17934 rows × 3 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      icd10_code icd10_alt_code  \\\n",
-       "0            A00            A00   \n",
-       "1          A00.0           A000   \n",
-       "2          A00.1           A001   \n",
-       "3          A00.9           A009   \n",
-       "4            A01            A01   \n",
-       "...          ...            ...   \n",
-       "17929      U84.3           U843   \n",
-       "17930      U84.7           U847   \n",
-       "17931      U84.8           U848   \n",
-       "17932      U84.9           U849   \n",
-       "17933        U85           U85X   \n",
-       "\n",
-       "                                             description  \n",
-       "0                                                Cholera  \n",
-       "1      Cholera due to Vibrio cholerae 01, biovar chol...  \n",
-       "2        Cholera due to Vibrio cholerae 01, biovar eltor  \n",
-       "3                                   Cholera, unspecified  \n",
-       "4                         Typhoid and paratyphoid fevers  \n",
-       "...                                                  ...  \n",
-       "17929              Resistance to tuberculostatic drug(s)  \n",
-       "17930         Resistance to multiple antimicrobial drugs  \n",
-       "17931   Resistance to other specified antimicrobial drug  \n",
-       "17932      Resistance to unspecified antimicrobial drugs  \n",
-       "17933                 Resistance to antineoplastic drugs  \n",
-       "\n",
-       "[17934 rows x 3 columns]"
-      ]
-     },
-     "execution_count": 94,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "df = pd.read_parquet(\"maps/processed/icd10_code.parquet\")\n",
     "df\n"
@@ -277,35 +132,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 113,
+   "execution_count": null,
    "id": "e0228ac9-8852-4818-b7f0-98429ca5229c",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0     True\n",
-      "1    False\n",
-      "dtype: bool\n",
-      "0    False\n",
-      "1    False\n",
-      "dtype: bool\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0     True\n",
-       "1    False\n",
-       "dtype: bool"
-      ]
-     },
-     "execution_count": 113,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "code = [\"A00.0\", \"*00.0\"]\n",
     "code = pd.Series(code)\n",
@@ -332,29 +162,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 119,
+   "execution_count": null,
+   "id": "85dc197b-451e-4fa9-a53b-e6770c132123",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "\n",
+    "path_json = \"../concepts/PHEN_assign_v3.json\"\n",
+    "\n",
+    "#Load JSON Concept Definitions\n",
+    "mapping = json.load(open(path_json,'rb'))\n",
+    "summary_config = mapping[\"concept_sets\"][\"concept_set\"]\n",
+    "summary_df = pd.DataFrame(summary_config) #change to dataframe\n",
+    "\n",
+    "summary_df = summary_df.join(pd.json_normalize(summary_df[\"metadata\"])) #metadata to columns\n",
+    "summary_df = summary_df.drop(columns=[\"metadata\"])\n",
+    "summary_df = summary_df.rename(columns={\"concept_set_name\":\"CONCEPT_SET\"})\n",
+    "summary_df = summary_df.drop_duplicates() #remove duplicates\n",
+    " \n",
+    "summary_df\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "id": "4c9b6b3f-08aa-4f61-b9b2-44a24b5d00a0",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ALL FILES 878 878\n",
-      "JSON CONCEPTS 436 397\n",
-      "EXCEL CONCEPTS 440 397\n",
-      "1755 878\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/conda/lib/python3.9/site-packages/openpyxl/worksheet/_reader.py:329: UserWarning: Data Validation extension is not supported and will be removed\n",
-      "  warn(msg)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import json\n",
     "import os\n",
@@ -438,111 +274,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 120,
+   "execution_count": null,
    "id": "f8e70c33-c869-46f8-953e-f6b52992cfbb",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'JSON MISSING'"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>filepath</th>\n",
-       "      <th>json_concept</th>\n",
-       "      <th>json_code_types</th>\n",
-       "      <th>excel_concept</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "Empty DataFrame\n",
-       "Columns: [filepath, json_concept, json_code_types, excel_concept]\n",
-       "Index: []"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'EXCEL MISSING'"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>filepath</th>\n",
-       "      <th>json_concept</th>\n",
-       "      <th>json_code_types</th>\n",
-       "      <th>excel_concept</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "Empty DataFrame\n",
-       "Columns: [filepath, json_concept, json_code_types, excel_concept]\n",
-       "Index: []"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "display(\"JSON MISSING\", outs[outs[\"json_concept\"].isna() & outs[\"excel_concept\"].notna()])\n",
     "display(\"EXCEL MISSING\", outs[outs[\"json_concept\"].notna() & outs[\"excel_concept\"].isna()])"
@@ -550,7 +285,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 118,
+   "execution_count": null,
    "id": "9d84465f-f064-4df2-b0e4-2dfb217aea21",
    "metadata": {},
    "outputs": [],
@@ -567,21 +302,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "7f7fc771-e406-42c7-8a09-16a20b5298f5",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "65307"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "total_length = 0\n",
     "for file in all_files[\"filepath\"]:\n",
@@ -620,6 +344,7 @@
    "cell_type": "markdown",
    "id": "357bb84c-90c2-4b5f-95c0-443191783a7f",
    "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
     "tags": []
    },
    "source": [
@@ -628,48 +353,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "7d3f9cb7-be86-4f1f-92f6-991094eb7bb7",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "--------- output/V2_2_2_MELD_concepts_readv2.csv ---------\n",
-      "MELDB missing concepts  0\n",
-      "Chars present: ['.' '0' '1' '2' '3' '4' '5' '6' '7' '8' '9' 'A' 'B' 'C' 'D' 'E' 'F' 'G'\n",
-      " 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y'\n",
-      " 'Z' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' 'p' 'q'\n",
-      " 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z']\n",
-      "--------- output/V2_2_2_MELD_snomed_no_translate.csv ---------\n",
-      "MELDB missing concepts  0\n",
-      "--------- output/V2_2_2_MELD_icd10_no_translate.csv ---------\n",
-      "MELDB missing concepts  0\n",
-      "Chars present: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H'\n",
-      " 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'T' 'W' 'X' 'Y' 'Z']\n",
-      "--------- output/V2_2_2_MELD_atc_no_translate.csv ---------\n",
-      "MELDB missing concepts  0\n",
-      "Chars present: ['0' '1' '2' '3' '6' 'A' 'F' 'N' 'X']\n",
-      "--------- output/V2_2_2_MELD_errors.csv ---------\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "CODE_TYPE\n",
-       "snomed_code    1261\n",
-       "read2_code      464\n",
-       "read3_code       80\n",
-       "icd10_code        1\n",
-       "Name: count, dtype: int64"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "version = \"V2_2_2\"\n",
     "output_files = [f\"output/{version}_MELD_concepts_readv2.csv\",\n",
@@ -700,141 +387,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 209,
+   "execution_count": null,
    "id": "08e0ecc1-9271-48c3-9c5b-094800072906",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "## Compare Concepts V2_1_4 to V2_2_3\n",
-      "output/V2_1_4_MELD_concepts_readv2.csv output/V2_2_3_MELD_concepts_readv2.csv\n",
-      "- Removed Concepts ['THYROID_DISEASE', 'SCHIZOPHRENIA_BIPOLAR_DISORDER', 'PSIORIASIS_ECZEMA', 'HAEMATOLOGICAL_CANCERS', 'INFLAMM_ARTHROPATHIES', 'ALL_CANCER', 'STROKE_TIA', 'DIABETES', 'PMR_AND_GCD', 'LONG_TERM_MS_PROBLEMS', 'ALL_CKD', 'INFLAMM_ARTHROPATHIES_CONNECTIVE_TISSUE_DIS', 'RENAL_TRANSPLANT_DIALYSIS']\n",
-      "- Added Concepts []\n",
-      "- Changed Concepts \n",
-      "\t - ANXIETY -7.0\n",
-      "\t - ARRHYTHMIA -1.0\n",
-      "\t - ASTHMA -1.0\n",
-      "\t - AUTISM_AND_ADHD -4.0\n",
-      "\t - BIPOLAR_DISORDER -1.0\n",
-      "\t - BLINDNESS_AND_LOW_VISION -3.0\n",
-      "\t - COELIAC_DISEASE -1.0\n",
-      "\t - CORONARY_HEART_DISEASE -8.0\n",
-      "\t - DEAFNESS -33.0\n",
-      "\t - DEMENTIA_ALZHEIMER -2.0\n",
-      "\t - DEPRESSION -5.0\n",
-      "\t - DIABETES_T1 -1.0\n",
-      "\t - DIABETES_T2 -1.0\n",
-      "\t - DIALYSIS -14.0\n",
-      "\t - DIVERTICULAR_DISEASE -11.0\n",
-      "\t - DRUG_ALCOHOL_MISUSE -3.0\n",
-      "\t - EATING_DISORDERS -2.0\n",
-      "\t - EPILEPSY -1.0\n",
-      "\t - FATIGUE -27.0\n",
-      "\t - HEADACHE -48.0\n",
-      "\t - HF -3.0\n",
-      "\t - INCONTINENCE -21.0\n",
-      "\t - LEARNING_DISABILITY -3.0\n",
-      "\t - MSK_PAIN -36.0\n",
-      "\t - MULTIPLE_SCLEROSIS -1.0\n",
-      "\t - PALLIATIVE_CARE -8.0\n",
-      "\t - PLASMACELL -1.0\n",
-      "\t - PTSD -1.0\n",
-      "\t - SCHIZOPHRENIA -1.0\n",
-      "\t - SELF_HARM -37.0\n",
-      "\t - SLEEP_PROBLEMS -74.0\n",
-      "\t - STRESS -31.0\n",
-      "\t - SYSTEMIC_LUPUS_ERYTHEMATOSUS -2.0\n",
-      "\n",
-      "output/V2_1_4_MELD_snomed_no_translate.csv output/V2_2_3_MELD_snomed_no_translate.csv\n",
-      "- Removed Concepts ['THYROID_DISEASE', 'SCHIZOPHRENIA_BIPOLAR_DISORDER', 'PSIORIASIS_ECZEMA', 'HAEMATOLOGICAL_CANCERS', 'INFLAMM_ARTHROPATHIES', 'ALL_CANCER', 'STROKE_TIA', 'DIABETES', 'PMR_AND_GCD', 'LONG_TERM_MS_PROBLEMS', 'ALL_CKD', 'INFLAMM_ARTHROPATHIES_CONNECTIVE_TISSUE_DIS', 'RENAL_TRANSPLANT_DIALYSIS']\n",
-      "- Added Concepts []\n",
-      "- Changed Concepts \n",
-      "\t - ANAEMIA -2.0\n",
-      "\t - ANEURYSM -3.0\n",
-      "\t - ANXIETY -7.0\n",
-      "\t - ARRHYTHMIA -25.0\n",
-      "\t - ASTHMA -34.0\n",
-      "\t - ATOPIC_ECZEMA -6.0\n",
-      "\t - AUTISM_AND_ADHD -2.0\n",
-      "\t - BIPOLAR_DISORDER -3.0\n",
-      "\t - BLINDNESS_AND_LOW_VISION -4.0\n",
-      "\t - BREAST_CANCER -2.0\n",
-      "\t - BRONCHIECSTASIS -1.0\n",
-      "\t - CHRONIC_BACK_PAIN -1.0\n",
-      "\t - CHRONIC_FATIGUE_SYNDROME -3.0\n",
-      "\t - CHRONIC_LIVER_DISEASE -14.0\n",
-      "\t - CHRONIC_PAIN -2.0\n",
-      "\t - CKD_STAGE3_5 -3.0\n",
-      "\t - COELIAC_DISEASE -6.0\n",
-      "\t - COLON_CANCER -6.0\n",
-      "\t - CONGENITAL_DIS_CHROMOSOMAL_ABNORMALITIES -1.0\n",
-      "\t - COPD -31.0\n",
-      "\t - CORONARY_HEART_DISEASE -21.0\n",
-      "\t - CYSTIC_FIBROSIS -24.0\n",
-      "\t - DEAFNESS -15.0\n",
-      "\t - DEMENTIA_ALZHEIMER -111.0\n",
-      "\t - DEPRESSION -34.0\n",
-      "\t - DIABETES_T2 -2.0\n",
-      "\t - DIABETIC_RETINOPATHY -13.0\n",
-      "\t - DIALYSIS -1.0\n",
-      "\t - DIVERTICULAR_DISEASE -4.0\n",
-      "\t - DRUG_ALCOHOL_MISUSE -310.0\n",
-      "\t - EATING_DISORDERS -4.0\n",
-      "\t - ENDOMETRIOSIS -1.0\n",
-      "\t - EPILEPSY -11.0\n",
-      "\t - GLAUCOMA -3.0\n",
-      "\t - GOUT -4.0\n",
-      "\t - HEART_VALVE_DISORDERS -6.0\n",
-      "\t - HF -4.0\n",
-      "\t - HIVAIDS -18.0\n",
-      "\t - HYPERTENSION -11.0\n",
-      "\t - HYPERTHYROIDISM -1.0\n",
-      "\t - HYPOTHYROIDISM -8.0\n",
-      "\t - IBD -2.0\n",
-      "\t - ILD -2.0\n",
-      "\t - LEARNING_DISABILITY -40.0\n",
-      "\t - LEUKAEMIA -1.0\n",
-      "\t - LYMPHOMA -2.0\n",
-      "\t - MENIERES_DISEASE -1.0\n",
-      "\t - METASTATIC_CANCER -3.0\n",
-      "\t - MOBILITY_PROBLEMS -45.0\n",
-      "\t - MULTIPLE_SCLEROSIS -13.0\n",
-      "\t - OBESITY -63.0\n",
-      "\t - OSTEOARTHRITIS -3.0\n",
-      "\t - OSTEOPOROSIS -4.0\n",
-      "\t - PARALYSIS -3.0\n",
-      "\t - PARKINSONS -2.0\n",
-      "\t - PLASMACELL -1.0\n",
-      "\t - PROSTATE_CANCER -2.0\n",
-      "\t - PROSTATE_DISORDERS -2.0\n",
-      "\t - PSORIASIS -3.0\n",
-      "\t - PTSD -38.0\n",
-      "\t - RENAL_TRANSPLANT -1.0\n",
-      "\t - RHEUMATOID_ARTHRITIS -8.0\n",
-      "\t - SCHIZOPHRENIA -85.0\n",
-      "\t - SKIN_CANCER -4.0\n",
-      "\t - STROKE -4.0\n",
-      "\t - SYSTEMIC_LUPUS_ERYTHEMATOSUS -1.0\n",
-      "\t - TIA -1.0\n",
-      "\t - VIRAL_HEPATITIS -9.0\n",
-      "\t - VTD -5.0\n",
-      "\n",
-      "output/V2_1_4_MELD_icd10_no_translate.csv output/V2_2_3_MELD_icd10_no_translate.csv\n",
-      "- Removed Concepts ['THYROID_DISEASE', 'SCHIZOPHRENIA_BIPOLAR_DISORDER', 'PSIORIASIS_ECZEMA', 'HAEMATOLOGICAL_CANCERS', 'INFLAMM_ARTHROPATHIES', 'ALL_CANCER', 'STROKE_TIA', 'DIABETES', 'PMR_AND_GCD', 'LONG_TERM_MS_PROBLEMS', 'ALL_CKD', 'INFLAMM_ARTHROPATHIES_CONNECTIVE_TISSUE_DIS']\n",
-      "- Added Concepts []\n",
-      "- Changed Concepts \n",
-      "\t - CVD_EVENTS -1.0\n",
-      "\n",
-      "output/V2_1_4_MELD_atc_no_translate.csv output/V2_2_3_MELD_atc_no_translate.csv\n",
-      "- Removed Concepts []\n",
-      "- Added Concepts []\n",
-      "- Changed Concepts \n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "def get_output_files(version):\n",
     "    output_files = [f\"output/{version}_MELD_concepts_readv2.csv\",\n",
@@ -884,6 +440,33 @@
     "    "
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cc60c137-5a85-4155-af6b-6796f8c05980",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import glob\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "\n",
+    "df = pd.read_csv(\"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/PHEN_summary_working.csv\")\n",
+    "df = df.set_index(\"#\")\n",
+    "\n",
+    "for vocab in [\"atc\", \"icd10\", \"readv2\", \"snomed\"]:\n",
+    "    df[vocab.upper()] = \"\"\n",
+    "\n",
+    "    for file in glob.glob(f\"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/{vocab}/*.csv\"):\n",
+    "        concept_set = os.path.basename(file)[:-4]\n",
+    "        row_index = df[df[\"CONCEPT NAME \"] == concept_set].index[0]\n",
+    "\n",
+    "        df.loc[row_index, vocab.upper()] = \"YES\"\n",
+    "\n",
+    "df = df.drop(columns=[\"READv2_CODE\", \"ICD10_CODE\"])\n",
+    "df.to_csv(\"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/PHEN_summary_working_labelled.csv\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "e5c4291f-847b-4c82-976e-bd5b3a7b6bcc",
@@ -1095,7 +678,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 170,
+   "execution_count": null,
    "id": "a968ffb1-4337-456b-8d20-419888b4044f",
    "metadata": {},
    "outputs": [],
@@ -1112,7 +695,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 184,
+   "execution_count": null,
    "id": "c70b1ce2-0f41-4d02-ad17-6fc44bc3c6bf",
    "metadata": {},
    "outputs": [],
@@ -1125,127 +708,558 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 194,
+   "execution_count": null,
    "id": "d5d34237-02d4-4dea-8c20-5adaf337f6b5",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>bnf_code</th>\n",
-       "      <th>snomed_code</th>\n",
-       "      <th>read2_code</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "Empty DataFrame\n",
-       "Columns: [bnf_code, snomed_code, read2_code]\n",
-       "Index: []"
-      ]
-     },
-     "execution_count": 194,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "df1.merge(df2, how='inner', on='snomed_code')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 180,
-   "id": "d0cbadfe-ef55-40a8-a0f1-a9fc69d7456b",
+   "execution_count": null,
+   "id": "b3166cf0-e4a5-43e0-aeac-78827427422e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    ".astype(str).dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c0a766f9-7959-4a10-b58f-cd946a878b60",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"../concepts/PHEN_summary_working.csv\")\n",
+    "cols = list(df.columns)\n",
+    "cols.remove('CONCEPT NAME ')\n",
+    "cols.remove('AGREED')\n",
+    "df = df.applymap(lambda x: str(x) if isinstance(x, (int, float)) else x) #change to int\n",
+    "\n",
+    "df_copy = df.rename(columns={\n",
+    "    \"CONCEPT NAME \":\"concept_set_name\",\n",
+    "    \"AGREED\":\"concept_set_status\"\n",
+    "})\n",
+    "df_copy[\"concept_set_status\"] = df_copy[\"concept_set_status\"].replace(\"USE\", \"AGREED\")\n",
+    "df_copy = df_copy[[\"concept_set_name\", \"concept_set_status\"]]\n",
+    "outs = df_copy.to_dict(orient='records')\n",
+    "\n",
+    "for i, out in enumerate(outs):\n",
+    "    out[\"metadata\"] = dict(df[cols].iloc[i])\n",
+    "    \n",
+    "json.dumps(outs)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8a204a95-dc4c-4183-9ea7-f5c5e95e9087",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>bnf_code</th>\n",
-       "      <th>snomed_code</th>\n",
-       "      <th>read2_code</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "Empty DataFrame\n",
-       "Columns: [bnf_code, snomed_code, read2_code]\n",
-       "Index: []"
-      ]
-     },
-     "execution_count": 180,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": []
   },
   {
    "cell_type": "code",
-   "execution_count": 164,
-   "id": "b3166cf0-e4a5-43e0-aeac-78827427422e",
+   "execution_count": null,
+   "id": "5ce1ab58-50b4-4c22-b72b-c698de6830f7",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "BNF Code       object\n",
-       "SNOMED Code    object\n",
-       "dtype: object"
-      ]
-     },
-     "execution_count": 164,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    ".astype(str).dtypes"
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1ea81c6-d1db-408f-9d3a-b96f44efe21f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5eb544a3-9dd1-41e8-88c2-a808646c6112",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### OMOP Database"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c9e58e62-9e44-4d0c-9d8d-35c175c07e6c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sqlite3\n",
+    "import csv\n",
+    "import pandas as pd\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f67c9a1-373f-4799-8a85-72767662d912",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0ecdf69-ee90-42c1-ad25-d8357b603d1b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#IMPORT OMOP VOCABS\n",
+    "conn = sqlite3.connect(\"codes/omop_54.sqlite\") # change to 'sqlite:///your_filename.db'\n",
+    "folder_path = \"codes/vocabulary_download_v5_{9424944c-2b76-4127-8f05-f535e0f15e2a}_1731661390540\"\n",
+    "\n",
+    "# Check if the folder exists\n",
+    "if not os.path.isdir(folder_path):\n",
+    "    raise Exception(f\"Error: The folder '{folder_path}' does not exist.\")  \n",
+    "\n",
+    "# Iterate through files in the folder\n",
+    "for filename in os.listdir(folder_path):\n",
+    "    if filename.endswith(\".csv\"):  # Check if the file is a CSV\n",
+    "        file_path = os.path.join(folder_path, filename)\n",
+    "        try:\n",
+    "            print(f\"Reading file: {file_path}\")\n",
+    "            # Read the CSV file with the specified delimiter\n",
+    "            df = pd.read_csv(file_path, delimiter=\"\\t\", low_memory=False)\n",
+    "            table_name = os.path.splitext(os.path.basename(file_path))[0] #Get name of file\n",
+    "            \n",
+    "            #Export Table to sqlite db\n",
+    "            df.to_sql(table_name, conn, if_exists='replace', index=False)\n",
+    "            \n",
+    "        except Exception as e:\n",
+    "             raise Exception(f\"Error reading file {file_path}: {e}\")\n",
+    "\n",
+    "conn.commit()\n",
+    "conn.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b9cafd0c-a3bd-408b-bca8-b0de2acde1cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a SQL connection to our SQLite database\n",
+    "conn = sqlite3.connect(\"codes/omop_54.sqlite\")\n",
+    "cur = conn.cursor()\n",
+    "\n",
+    "#Print ALL Columns in Table\n",
+    "# table=\"CONCEPT_SET\"\n",
+    "# cur.execute(f\"PRAGMA table_info({table});\")\n",
+    "# print(pd.DataFrame(cur.fetchall()))\n",
+    "\n",
+    "#Print ALL TABLE NAMES\n",
+    "# cur.execute(\"SELECT name FROM sqlite_master WHERE type='table' AND name=? ;\", (\"VOCABULARY\",))\n",
+    "# print(cur.fetchone())\n",
+    "            \n",
+    "cur.execute(\"SELECT vocabulary_id FROM VOCABULARY WHERE vocabulary_id=? ;\", (\"MELDB\",))\n",
+    "print(cur.fetchone())\n",
+    "\n",
+    "    \n",
+    "    \n",
+    "#Print WHOLE TABLE\n",
+    "# cur.execute('SELECT * FROM CONCEPT;')\n",
+    "# cur.execute('SELECT * FROM CONCEPT WHERE standard_concept = \"C\";')\n",
+    "# cur.execute('SELECT * FROM CONCEPT WHERE concept_code = \"119768002\" LIMIT 1;')\n",
+    "# cur.execute('SELECT * FROM CONCEPT WHERE concept_code IN (\"119768002\", \"5905001\");')\n",
+    "# cur.execute('SELECT DISTINCT VOCABULARY_ID FROM CONCEPT;')\n",
+    "# df = pd.DataFrame(cur.fetchall())\n",
+    "# print(list(df[0]))\n",
+    "# display(df)\n",
+    "# for row in :\n",
+    "    # print(row)\n",
+    "\n",
+    "\n",
+    "\n",
+    "#Get Header of Table\n",
+    "# table=\"CONCEPT_CLASS\"\n",
+    "# cur.execute(f\"SELECT * FROM {table} LIMIT 3;\")\n",
+    "# print(cur.fetchall())\n",
+    "\n",
+    "#create meldb VOCABULARY\n",
+    "# meldb_version='v3.2.10'\n",
+    "# meldb_description = 'Multidisciplinary Ecosystem to study Lifecourse Determinants and Prevention of Early-onset Burdensome Multimorbidity'\n",
+    "# meldb_reference = 'https://www.it-innovation.soton.ac.uk/projects/meldb'\n",
+    "# df_test = pd.DataFrame([{\n",
+    "#     \"vocabulary_id\": 'MELDB',\n",
+    "#     \"vocabulary_name\": meldb_description,\n",
+    "#     \"vocabulary_reference\": meldb_reference,\n",
+    "#     \"vocabulary_version\": meldb_version,\n",
+    "#     # \"vocabulary_concept_id\": 0,\n",
+    "# }])\n",
+    "# df_test.to_sql(\"VOCABULARY\", conn, if_exists='append', index=False)\n",
+    "\n",
+    "\n",
+    "# cur.execute(\"\"\"\n",
+    "# CREATE TABLE CONCEPT_SET (\n",
+    "#     concept_set_id INTEGER PRIMARY KEY AUTOINCREMENT, -- Unique identifier for each concept set\n",
+    "#     atlas_id INTEGER,                                -- Unique identifier generated by ATLAS\n",
+    "#     concept_set_name TEXT,                           -- Optional name for the concept set\n",
+    "#     concept_set_description TEXT,                    -- Optional description for the concept set\n",
+    "#     vocabulary_id TEXT NOT NULL,                     -- Foreign key to VOCABULARY table\n",
+    "#     FOREIGN KEY (vocabulary_id) REFERENCES VOCABULARY(vocabulary_id)\n",
+    "# );\"\"\")\n",
+    "# cur.execute(\"DROP TABLE CONCEPT_SET;\")\n",
+    "\n",
+    "# cur.execute(\"\"\"\n",
+    "# CREATE TABLE CONCEPT_SET_ITEM (\n",
+    "#     concept_set_item_id INTEGER PRIMARY KEY AUTOINCREMENT, -- Unique identifier for each mapping\n",
+    "#     concept_set_id INTEGER NOT NULL,                      -- Foreign key to CONCEPT_SET table\n",
+    "#     concept_id INTEGER NOT NULL,                          -- Foreign key to CONCEPT table\n",
+    "#     FOREIGN KEY (concept_set_id) REFERENCES CONCEPT_SET(concept_set_id),\n",
+    "#     FOREIGN KEY (concept_id) REFERENCES CONCEPT(concept_id)\n",
+    "# );\"\"\")\n",
+    "# cur.execute(\"DROP TABLE CONCEPT_SET_ITEM;\")\n",
+    "\n",
+    "# Be sure to close the connection\n",
+    "conn.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d03b75f3-902f-42d7-b52f-dac7e79ecb11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conn = sqlite3.connect(\"codes/omop_54.sqlite\") # change to 'sqlite:///your_filename.db'\n",
+    "cur = conn.cursor()\n",
+    "\n",
+    "file_path = \"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/snomed/HEART_VALVE_DISORDERS.csv\"\n",
+    "df = pd.read_csv(file_path, low_memory=False)\n",
+    "df = df.set_index(\"code\")\n",
+    "\n",
+    "df.to_sql(name='test', con=conn, if_exists='replace')\n",
+    "\n",
+    "conn.commit()\n",
+    "conn.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d96c3511-3831-400e-ba40-0a36abcc60d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#DISPLAY SQL TABLE\n",
+    "table=\"CONCEPT_SET_ITEM\"\n",
+    "\n",
+    "# Create a SQL connection to our SQLite database\n",
+    "conn = sqlite3.connect(\"codes/omop_54.sqlite\")\n",
+    "cur = conn.cursor()\n",
+    "\n",
+    "#Print ALL Columns in Table\n",
+    "cur.execute(f\"PRAGMA table_info({table});\")\n",
+    "df_cols = pd.DataFrame(cur.fetchall())\n",
+    "print(df_cols)\n",
+    "df_cols = df_cols[1]\n",
+    "\n",
+    "#Print TABLE\n",
+    "cur.execute(f\"SELECT * FROM {table};\")\n",
+    "df = pd.DataFrame(cur.fetchall())\n",
+    "df = df.rename(columns={i:s for i, s in enumerate(df_cols)})\n",
+    "display(df)\n",
+    "\n",
+    "conn.close()\n",
+    "\n",
+    "\n",
+    "# a+s = 13364 \n",
+    "# a+s+i = 13591\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "42d49a00-9646-4ba4-afb6-12297289b7a7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sql_row_exist(conn, table, column, value):\n",
+    "\t# Execute and check if a result exists\n",
+    "\tcur = conn.cursor()\n",
+    "\tquery = f\"SELECT 1 FROM {table} WHERE {column} = ? LIMIT 1;\"\n",
+    "\tcur.execute(query, (value,))\n",
+    "\texists = cur.fetchone() is not None\n",
+    "\t\n",
+    "\treturn exists"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f7b51bcd-6ee1-4023-8d36-7f419ce4120d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#EXPORT MELDB CSV OUTPUT\n",
+    "\n",
+    "conn = sqlite3.connect(\"codes/omop_54.sqlite\") # change to 'sqlite:///your_filename.db'\n",
+    "cur = conn.cursor()\n",
+    "\n",
+    "vocab_output = \"MELDB\"\n",
+    "vocab_type = \"SNOMED\"\n",
+    "file_path = \"/home/jjd1c23/ssd/meldb/jjd1c23/phenotype/output/V3_2_10_MELD_snomed_no_translate.csv\"\n",
+    "# file_path = \"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/snomed/HEART_VALVE_DISORDERS.csv\"\n",
+    "\n",
+    "# Read the CSV file with the specified delimiter\n",
+    "out = pd.read_csv(file_path, low_memory=False)\n",
+    "print(df.columns)\n",
+    "\n",
+    "for concept_set_name, grp in out.groupby(\"MELDB_concept\"):\n",
+    "    # display(concept_set_name, grp[[\"code\", \"MELDB_concept\"]])\n",
+    "    \n",
+    "    #Create Concept_Set\n",
+    "    if not sql_row_exist(conn, \"CONCEPT_SET\", \"concept_set_name\", concept_set_name):\n",
+    "        cur.execute(f\"INSERT INTO CONCEPT_SET (concept_set_name, vocabulary_id) VALUES ('{concept_set_name}', 'MELDB');\")\n",
+    "    else:\n",
+    "        print(\"concept_set\", concept_set_name, \"already exists\")\n",
+    "        #TODO: ask to remove old concept_set?\n",
+    "    \n",
+    "    #Get Concept_set_Id\n",
+    "    query = \"SELECT concept_set_id FROM CONCEPT_SET WHERE concept_set_name = ? AND vocabulary_id = ?;\"\n",
+    "    cur.execute(query, (concept_set_name, vocab_output, ))    \n",
+    "    concept_set_id = cur.fetchone()[0]\n",
+    "    \n",
+    "    #Get corresponing Concept_id (OMOP) for each Concept_code (e.g. SNOMED)\n",
+    "    concept_codes = \"'\"+\"', '\".join(list(grp[\"code\"].astype(str)))+\"'\"\n",
+    "    query = f\"SELECT concept_id FROM CONCEPT WHERE vocabulary_id = ? AND concept_code IN ({concept_codes});\"\n",
+    "    print(query)\n",
+    "    cur.execute(query, (vocab_type, ))\n",
+    "    df_out = pd.DataFrame(cur.fetchall(), columns=[\"concept_id\"])\n",
+    "    \n",
+    "    if not len(grp) == len(df_out):\n",
+    "        print(\"ERROR: Some\", vocab_type, \"Codes do not exist in OMOP Database\")\n",
+    "    \n",
+    "    #Create Concept_set_item\n",
+    "    df_out[\"concept_set_id\"] = concept_set_id\n",
+    "    df_out.to_sql(\"CONCEPT_SET_ITEM\", conn, if_exists='append', index=False)\n",
+    "    \n",
+    "    display(df_out)\n",
+    "    \n",
+    "        \n",
+    "    \n",
+    "    # break\n",
+    "    \n",
+    "    \n",
+    "\n",
+    "#         #Create New CONCEPT_SET\n",
+    "#         table_name = os.path.splitext(os.path.basename(file_path))[0] #Get name of file\n",
+    "#         cur.execute(f\"INSERT INTO CONCEPT_SET (concept_class_name) VALUES ('{table_name}');\")\n",
+    "        \n",
+    "        \n",
+    "        \n",
+    "\n",
+    "        \n",
+    "        \n",
+    "\n",
+    "\n",
+    "conn.commit()\n",
+    "conn.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "85007741-e34c-4112-a63c-9fb302b76958",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"'\"+\"', '\".join(list(grp[\"code\"].astype(str)))+\"'\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "423e7c21-f3bd-439d-9dcb-c17cc2cc6854",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "### ATLAS"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c6b45e4d-c7d2-42e7-9b4a-0e9c1c86d34b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Create ATLAS Concept Set\n",
+    "\n",
+    "def atlas_create_concept(name, description=\"\", items=[]):\n",
+    "    data={\n",
+    "      \"id\": 0,\n",
+    "      \"name\": name,\n",
+    "      \"description\": description,\n",
+    "      \"expression\": {\n",
+    "          \"items\":items \n",
+    "      }\n",
+    "    }\n",
+    "\n",
+    "    try:\n",
+    "        # Sending the POST request\n",
+    "        response = requests.post(url, json=data, headers=headers)\n",
+    "\n",
+    "        # Check the response status\n",
+    "        if response.status_code == 200 or response.status_code == 201:\n",
+    "            print(\"POST request successful:\")\n",
+    "            print(response.json())  # Assuming the response is JSON\n",
+    "            return response[\"id\"]\n",
+    "        else:\n",
+    "            print(f\"POST request failed. HTTP Status Code: {response.status_code}\")\n",
+    "            print(\"Response content:\")\n",
+    "            print(response.text)\n",
+    "            return None\n",
+    "\n",
+    "    except requests.exceptions.RequestException as e:\n",
+    "        print(f\"An error occurred: {e}\")\n",
+    "\n",
+    "# Heart Test 1 - 1885487\n",
+    "# Heart Test 2 - 1885488\n",
+    "# Heart Valve Disorders - 1885449\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "45497623-1da0-4f74-b21e-da8811c89b04",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_omop_concepts(cur, codes, vocab_id):    \n",
+    "    #Create List for SQL\n",
+    "    mask = \"\"\n",
+    "    for c in codes:\n",
+    "        mask+=f'\"{c}\", '\n",
+    "    mask = mask[:-2] #remove last comma\n",
+    "    \n",
+    "    #Execute SQL\n",
+    "    cur.execute(f'SELECT * FROM CONCEPT WHERE concept_code IN ({mask}) AND VOCABULARY_ID = \"{vocab_id}\";')\n",
+    "    df = pd.DataFrame(cur.fetchall()) #convert to pandas df\n",
+    "    \n",
+    "    print(\"Identified\", len(df[0]) ,\"OMOP Concepts:\", list(df[0]))\n",
+    "    \n",
+    "    return df\n",
+    "    \n",
+    "def omop_concepts_to_atlas_json(df):\n",
+    "    json = []\n",
+    "    for i, row in df.iterrows():\n",
+    "        #template for atlas api\n",
+    "        out = { \n",
+    "            \"concept\": {\n",
+    "                'CONCEPT_ID': row[0],\n",
+    "                'CONCEPT_NAME': row[1],\n",
+    "                'STANDARD_CONCEPT': 'S',\n",
+    "                'STANDARD_CONCEPT_CAPTION': 'Standard',\n",
+    "                'INVALID_REASON': 'V',\n",
+    "                'INVALID_REASON_CAPTION': 'Valid',\n",
+    "                'CONCEPT_CODE': row[6],\n",
+    "                'DOMAIN_ID': row[2],\n",
+    "                'VOCABULARY_ID': row[3],\n",
+    "                'CONCEPT_CLASS_ID': row[4],\n",
+    "                'VALID_START_DATE': int(row[7]),\n",
+    "                'VALID_END_DATE': int(row[8])\n",
+    "            },\n",
+    "            'isExcluded': False,\n",
+    "            'includeDescendants': False,\n",
+    "            'includeMapped': False\n",
+    "        }\n",
+    "        json.append(out)\n",
+    "    return json \n",
+    "\n",
+    "conn = sqlite3.connect(\"codes/omop_54.sqlite\")\n",
+    "cur = conn.cursor()\n",
+    "\n",
+    "vocab_id=\"SNOMED\" #SNOMED, ATC, ICD10CM, ICD9CM, Read\n",
+    "csv_output = \"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/snomed/ANGER.csv\"\n",
+    "\n",
+    "#Load CSV Output File\n",
+    "df_in = pd.read_csv(csv_output)\n",
+    "print(len(df_in))\n",
+    "\n",
+    "# df = get_omop_concepts(cur, [\"119768002\", \"5905001\"], \"SNOMED\")\n",
+    "df = get_omop_concepts(cur, list(df_in[\"code\"]), vocab_id)\n",
+    "json = omop_concepts_to_atlas_json(df)\n",
+    "# display(json)\n",
+    "\n",
+    "conn.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ea759907-c085-472a-82e2-07b6b19e2c8f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#ATLAS GET CONCEPT SET\n",
+    "import requests\n",
+    "\n",
+    "def request_get(url):\n",
+    "    try:\n",
+    "        # Sending the GET request\n",
+    "        response = requests.get(url)\n",
+    "\n",
+    "        # Check if the response status code is 200 (OK)\n",
+    "        if response.status_code == 200:\n",
+    "            print(\"Response data:\")\n",
+    "            # print(response.json())  # Assuming the response is in JSON format\n",
+    "            return response.json()\n",
+    "        else:\n",
+    "            print(f\"Failed to fetch data. HTTP Status Code: {response.status_code}\")\n",
+    "            print(\"Response content:\")\n",
+    "            print(response.text)\n",
+    "            return None\n",
+    "\n",
+    "    except requests.exceptions.RequestException as e:\n",
+    "        print(f\"An error occurred: {e}\")\n",
+    "\n",
+    "\n",
+    "#GET SET INFO\n",
+    "set_id = \"1885449\"\n",
+    "url = f\"https://atlas-demo.ohdsi.org/WebAPI/conceptset/{set_id}\"\n",
+    "request_get(url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5a70e636-6051-4930-bf1b-30d093fd0552",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#GET SET ITEMS (Concepts)\n",
+    "set_id = \"1885449\"\n",
+    "url = f\"https://atlas-demo.ohdsi.org/WebAPI/conceptset/{set_id}/expression/ATLASPROD\"\n",
+    "response = request_get(url)\n",
+    "display(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "96bfcd9c-27e8-4be4-a680-7553d908790e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#ATLAS CREATE CONCEPT SET\n"
    ]
   }
  ],