diff --git a/main.py b/main.py index 81832ed393b8d42e30bdd2285c3110b3d783202d..ab32f81b2327f73e395b386fa009d18867216bc2 100644 --- a/main.py +++ b/main.py @@ -255,23 +255,14 @@ def run_all(mapping_file, target_code_type, out = out.drop_duplicates(subset=["CONCEPT_SET", "CONCEPT"]) out = out.sort_values(by=["CONCEPT_SET", "CONCEPT"]) - #Merge with Concept Types in Summary Excel File - if "excel_sheet" in summary_config: - summary_df = read_table_file(summary_config["file"], excel_sheet=summary_config["excel_sheet"]) - else: - summary_df = read_table_file(summary_config["file"]) - summary_cols_all = [] #get all column names - for v in summary_config["columns"].values(): #TODO: put in seperate function - get all columns in JSON file object - if type(v) == str: - summary_cols_all.append(v) - else: - summary_cols_all += v - - output_version = summary_config["version"] - summary_df = summary_df[summary_cols_all] #select all relevant columns - summary_df = summary_df.rename(columns={summary_config["columns"]["concept_set_name"]: "CONCEPT_SET"}) + #Add Concept Set Defintions metadata + summary_df = pd.DataFrame(summary_config["concept_set"]) #transform to dataframe + if "metadata" in summary_df.columns: + summary_df = summary_df.join(pd.json_normalize(summary_df["metadata"])) #metadata to columns + summary_df = summary_df.drop(columns=["metadata"]) + summary_df = summary_df.rename(columns={"concept_set_name":"CONCEPT_SET"}) summary_df = summary_df.drop_duplicates() #remove duplicates - out = out.merge(summary_df, how="left", on='CONCEPT_SET') + out = out.merge(summary_df, how="left", on='CONCEPT_SET') #merge with output # Save Output File print(bcolors.HEADER, "---"*5, "OUTPUT", "---"*5, bcolors.ENDC) diff --git a/process_codes_WP.ipynb b/process_codes_WP.ipynb index 8be13222375ebc345db334976473ced2d27994df..68e26be260fd3eece0d460dc54adf53d712bb8cc 100644 --- a/process_codes_WP.ipynb +++ b/process_codes_WP.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "8c8f4cdf-04a5-4762-895e-6555781a1f05", "metadata": {}, "outputs": [], @@ -113,163 +113,18 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": null, "id": "f155b635-b459-4aff-81b2-e065fc223858", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 False\n", - "dtype: bool" - ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [] }, { "cell_type": "code", - "execution_count": 94, + "execution_count": null, "id": "d040eda5-4028-4047-834c-7315e307e415", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>icd10_code</th>\n", - " <th>icd10_alt_code</th>\n", - " <th>description</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>A00</td>\n", - " <td>A00</td>\n", - " <td>Cholera</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>A00.0</td>\n", - " <td>A000</td>\n", - " <td>Cholera due to Vibrio cholerae 01, biovar chol...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>A00.1</td>\n", - " <td>A001</td>\n", - " <td>Cholera due to Vibrio cholerae 01, biovar eltor</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>A00.9</td>\n", - " <td>A009</td>\n", - " <td>Cholera, unspecified</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>A01</td>\n", - " <td>A01</td>\n", - " <td>Typhoid and paratyphoid fevers</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17929</th>\n", - " <td>U84.3</td>\n", - " <td>U843</td>\n", - " <td>Resistance to tuberculostatic drug(s)</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17930</th>\n", - " <td>U84.7</td>\n", - " <td>U847</td>\n", - " <td>Resistance to multiple antimicrobial drugs</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17931</th>\n", - " <td>U84.8</td>\n", - " <td>U848</td>\n", - " <td>Resistance to other specified antimicrobial drug</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17932</th>\n", - " <td>U84.9</td>\n", - " <td>U849</td>\n", - " <td>Resistance to unspecified antimicrobial drugs</td>\n", - " </tr>\n", - " <tr>\n", - " <th>17933</th>\n", - " <td>U85</td>\n", - " <td>U85X</td>\n", - " <td>Resistance to antineoplastic drugs</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>17934 rows × 3 columns</p>\n", - "</div>" - ], - "text/plain": [ - " icd10_code icd10_alt_code \\\n", - "0 A00 A00 \n", - "1 A00.0 A000 \n", - "2 A00.1 A001 \n", - "3 A00.9 A009 \n", - "4 A01 A01 \n", - "... ... ... \n", - "17929 U84.3 U843 \n", - "17930 U84.7 U847 \n", - "17931 U84.8 U848 \n", - "17932 U84.9 U849 \n", - "17933 U85 U85X \n", - "\n", - " description \n", - "0 Cholera \n", - "1 Cholera due to Vibrio cholerae 01, biovar chol... \n", - "2 Cholera due to Vibrio cholerae 01, biovar eltor \n", - "3 Cholera, unspecified \n", - "4 Typhoid and paratyphoid fevers \n", - "... ... \n", - "17929 Resistance to tuberculostatic drug(s) \n", - "17930 Resistance to multiple antimicrobial drugs \n", - "17931 Resistance to other specified antimicrobial drug \n", - "17932 Resistance to unspecified antimicrobial drugs \n", - "17933 Resistance to antineoplastic drugs \n", - "\n", - "[17934 rows x 3 columns]" - ] - }, - "execution_count": 94, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df = pd.read_parquet(\"maps/processed/icd10_code.parquet\")\n", "df\n" @@ -277,35 +132,10 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": null, "id": "e0228ac9-8852-4818-b7f0-98429ca5229c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0 True\n", - "1 False\n", - "dtype: bool\n", - "0 False\n", - "1 False\n", - "dtype: bool\n" - ] - }, - { - "data": { - "text/plain": [ - "0 True\n", - "1 False\n", - "dtype: bool" - ] - }, - "execution_count": 113, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "code = [\"A00.0\", \"*00.0\"]\n", "code = pd.Series(code)\n", @@ -332,29 +162,35 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": null, + "id": "85dc197b-451e-4fa9-a53b-e6770c132123", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import os\n", + "\n", + "path_json = \"../concepts/PHEN_assign_v3.json\"\n", + "\n", + "#Load JSON Concept Definitions\n", + "mapping = json.load(open(path_json,'rb'))\n", + "summary_config = mapping[\"concept_sets\"][\"concept_set\"]\n", + "summary_df = pd.DataFrame(summary_config) #change to dataframe\n", + "\n", + "summary_df = summary_df.join(pd.json_normalize(summary_df[\"metadata\"])) #metadata to columns\n", + "summary_df = summary_df.drop(columns=[\"metadata\"])\n", + "summary_df = summary_df.rename(columns={\"concept_set_name\":\"CONCEPT_SET\"})\n", + "summary_df = summary_df.drop_duplicates() #remove duplicates\n", + " \n", + "summary_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "4c9b6b3f-08aa-4f61-b9b2-44a24b5d00a0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ALL FILES 878 878\n", - "JSON CONCEPTS 436 397\n", - "EXCEL CONCEPTS 440 397\n", - "1755 878\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.9/site-packages/openpyxl/worksheet/_reader.py:329: UserWarning: Data Validation extension is not supported and will be removed\n", - " warn(msg)\n" - ] - } - ], + "outputs": [], "source": [ "import json\n", "import os\n", @@ -438,111 +274,10 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": null, "id": "f8e70c33-c869-46f8-953e-f6b52992cfbb", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'JSON MISSING'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>filepath</th>\n", - " <th>json_concept</th>\n", - " <th>json_code_types</th>\n", - " <th>excel_concept</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [filepath, json_concept, json_code_types, excel_concept]\n", - "Index: []" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'EXCEL MISSING'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>filepath</th>\n", - " <th>json_concept</th>\n", - " <th>json_code_types</th>\n", - " <th>excel_concept</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [filepath, json_concept, json_code_types, excel_concept]\n", - "Index: []" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "display(\"JSON MISSING\", outs[outs[\"json_concept\"].isna() & outs[\"excel_concept\"].notna()])\n", "display(\"EXCEL MISSING\", outs[outs[\"json_concept\"].notna() & outs[\"excel_concept\"].isna()])" @@ -550,7 +285,7 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": null, "id": "9d84465f-f064-4df2-b0e4-2dfb217aea21", "metadata": {}, "outputs": [], @@ -567,21 +302,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "7f7fc771-e406-42c7-8a09-16a20b5298f5", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "65307" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "total_length = 0\n", "for file in all_files[\"filepath\"]:\n", @@ -620,6 +344,7 @@ "cell_type": "markdown", "id": "357bb84c-90c2-4b5f-95c0-443191783a7f", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -628,48 +353,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "7d3f9cb7-be86-4f1f-92f6-991094eb7bb7", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--------- output/V2_2_2_MELD_concepts_readv2.csv ---------\n", - "MELDB missing concepts 0\n", - "Chars present: ['.' '0' '1' '2' '3' '4' '5' '6' '7' '8' '9' 'A' 'B' 'C' 'D' 'E' 'F' 'G'\n", - " 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y'\n", - " 'Z' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' 'p' 'q'\n", - " 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z']\n", - "--------- output/V2_2_2_MELD_snomed_no_translate.csv ---------\n", - "MELDB missing concepts 0\n", - "--------- output/V2_2_2_MELD_icd10_no_translate.csv ---------\n", - "MELDB missing concepts 0\n", - "Chars present: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H'\n", - " 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'T' 'W' 'X' 'Y' 'Z']\n", - "--------- output/V2_2_2_MELD_atc_no_translate.csv ---------\n", - "MELDB missing concepts 0\n", - "Chars present: ['0' '1' '2' '3' '6' 'A' 'F' 'N' 'X']\n", - "--------- output/V2_2_2_MELD_errors.csv ---------\n" - ] - }, - { - "data": { - "text/plain": [ - "CODE_TYPE\n", - "snomed_code 1261\n", - "read2_code 464\n", - "read3_code 80\n", - "icd10_code 1\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "version = \"V2_2_2\"\n", "output_files = [f\"output/{version}_MELD_concepts_readv2.csv\",\n", @@ -700,141 +387,10 @@ }, { "cell_type": "code", - "execution_count": 209, + "execution_count": null, "id": "08e0ecc1-9271-48c3-9c5b-094800072906", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "## Compare Concepts V2_1_4 to V2_2_3\n", - "output/V2_1_4_MELD_concepts_readv2.csv output/V2_2_3_MELD_concepts_readv2.csv\n", - "- Removed Concepts ['THYROID_DISEASE', 'SCHIZOPHRENIA_BIPOLAR_DISORDER', 'PSIORIASIS_ECZEMA', 'HAEMATOLOGICAL_CANCERS', 'INFLAMM_ARTHROPATHIES', 'ALL_CANCER', 'STROKE_TIA', 'DIABETES', 'PMR_AND_GCD', 'LONG_TERM_MS_PROBLEMS', 'ALL_CKD', 'INFLAMM_ARTHROPATHIES_CONNECTIVE_TISSUE_DIS', 'RENAL_TRANSPLANT_DIALYSIS']\n", - "- Added Concepts []\n", - "- Changed Concepts \n", - "\t - ANXIETY -7.0\n", - "\t - ARRHYTHMIA -1.0\n", - "\t - ASTHMA -1.0\n", - "\t - AUTISM_AND_ADHD -4.0\n", - "\t - BIPOLAR_DISORDER -1.0\n", - "\t - BLINDNESS_AND_LOW_VISION -3.0\n", - "\t - COELIAC_DISEASE -1.0\n", - "\t - CORONARY_HEART_DISEASE -8.0\n", - "\t - DEAFNESS -33.0\n", - "\t - DEMENTIA_ALZHEIMER -2.0\n", - "\t - DEPRESSION -5.0\n", - "\t - DIABETES_T1 -1.0\n", - "\t - DIABETES_T2 -1.0\n", - "\t - DIALYSIS -14.0\n", - "\t - DIVERTICULAR_DISEASE -11.0\n", - "\t - DRUG_ALCOHOL_MISUSE -3.0\n", - "\t - EATING_DISORDERS -2.0\n", - "\t - EPILEPSY -1.0\n", - "\t - FATIGUE -27.0\n", - "\t - HEADACHE -48.0\n", - "\t - HF -3.0\n", - "\t - INCONTINENCE -21.0\n", - "\t - LEARNING_DISABILITY -3.0\n", - "\t - MSK_PAIN -36.0\n", - "\t - MULTIPLE_SCLEROSIS -1.0\n", - "\t - PALLIATIVE_CARE -8.0\n", - "\t - PLASMACELL -1.0\n", - "\t - PTSD -1.0\n", - "\t - SCHIZOPHRENIA -1.0\n", - "\t - SELF_HARM -37.0\n", - "\t - SLEEP_PROBLEMS -74.0\n", - "\t - STRESS -31.0\n", - "\t - SYSTEMIC_LUPUS_ERYTHEMATOSUS -2.0\n", - "\n", - "output/V2_1_4_MELD_snomed_no_translate.csv output/V2_2_3_MELD_snomed_no_translate.csv\n", - "- Removed Concepts ['THYROID_DISEASE', 'SCHIZOPHRENIA_BIPOLAR_DISORDER', 'PSIORIASIS_ECZEMA', 'HAEMATOLOGICAL_CANCERS', 'INFLAMM_ARTHROPATHIES', 'ALL_CANCER', 'STROKE_TIA', 'DIABETES', 'PMR_AND_GCD', 'LONG_TERM_MS_PROBLEMS', 'ALL_CKD', 'INFLAMM_ARTHROPATHIES_CONNECTIVE_TISSUE_DIS', 'RENAL_TRANSPLANT_DIALYSIS']\n", - "- Added Concepts []\n", - "- Changed Concepts \n", - "\t - ANAEMIA -2.0\n", - "\t - ANEURYSM -3.0\n", - "\t - ANXIETY -7.0\n", - "\t - ARRHYTHMIA -25.0\n", - "\t - ASTHMA -34.0\n", - "\t - ATOPIC_ECZEMA -6.0\n", - "\t - AUTISM_AND_ADHD -2.0\n", - "\t - BIPOLAR_DISORDER -3.0\n", - "\t - BLINDNESS_AND_LOW_VISION -4.0\n", - "\t - BREAST_CANCER -2.0\n", - "\t - BRONCHIECSTASIS -1.0\n", - "\t - CHRONIC_BACK_PAIN -1.0\n", - "\t - CHRONIC_FATIGUE_SYNDROME -3.0\n", - "\t - CHRONIC_LIVER_DISEASE -14.0\n", - "\t - CHRONIC_PAIN -2.0\n", - "\t - CKD_STAGE3_5 -3.0\n", - "\t - COELIAC_DISEASE -6.0\n", - "\t - COLON_CANCER -6.0\n", - "\t - CONGENITAL_DIS_CHROMOSOMAL_ABNORMALITIES -1.0\n", - "\t - COPD -31.0\n", - "\t - CORONARY_HEART_DISEASE -21.0\n", - "\t - CYSTIC_FIBROSIS -24.0\n", - "\t - DEAFNESS -15.0\n", - "\t - DEMENTIA_ALZHEIMER -111.0\n", - "\t - DEPRESSION -34.0\n", - "\t - DIABETES_T2 -2.0\n", - "\t - DIABETIC_RETINOPATHY -13.0\n", - "\t - DIALYSIS -1.0\n", - "\t - DIVERTICULAR_DISEASE -4.0\n", - "\t - DRUG_ALCOHOL_MISUSE -310.0\n", - "\t - EATING_DISORDERS -4.0\n", - "\t - ENDOMETRIOSIS -1.0\n", - "\t - EPILEPSY -11.0\n", - "\t - GLAUCOMA -3.0\n", - "\t - GOUT -4.0\n", - "\t - HEART_VALVE_DISORDERS -6.0\n", - "\t - HF -4.0\n", - "\t - HIVAIDS -18.0\n", - "\t - HYPERTENSION -11.0\n", - "\t - HYPERTHYROIDISM -1.0\n", - "\t - HYPOTHYROIDISM -8.0\n", - "\t - IBD -2.0\n", - "\t - ILD -2.0\n", - "\t - LEARNING_DISABILITY -40.0\n", - "\t - LEUKAEMIA -1.0\n", - "\t - LYMPHOMA -2.0\n", - "\t - MENIERES_DISEASE -1.0\n", - "\t - METASTATIC_CANCER -3.0\n", - "\t - MOBILITY_PROBLEMS -45.0\n", - "\t - MULTIPLE_SCLEROSIS -13.0\n", - "\t - OBESITY -63.0\n", - "\t - OSTEOARTHRITIS -3.0\n", - "\t - OSTEOPOROSIS -4.0\n", - "\t - PARALYSIS -3.0\n", - "\t - PARKINSONS -2.0\n", - "\t - PLASMACELL -1.0\n", - "\t - PROSTATE_CANCER -2.0\n", - "\t - PROSTATE_DISORDERS -2.0\n", - "\t - PSORIASIS -3.0\n", - "\t - PTSD -38.0\n", - "\t - RENAL_TRANSPLANT -1.0\n", - "\t - RHEUMATOID_ARTHRITIS -8.0\n", - "\t - SCHIZOPHRENIA -85.0\n", - "\t - SKIN_CANCER -4.0\n", - "\t - STROKE -4.0\n", - "\t - SYSTEMIC_LUPUS_ERYTHEMATOSUS -1.0\n", - "\t - TIA -1.0\n", - "\t - VIRAL_HEPATITIS -9.0\n", - "\t - VTD -5.0\n", - "\n", - "output/V2_1_4_MELD_icd10_no_translate.csv output/V2_2_3_MELD_icd10_no_translate.csv\n", - "- Removed Concepts ['THYROID_DISEASE', 'SCHIZOPHRENIA_BIPOLAR_DISORDER', 'PSIORIASIS_ECZEMA', 'HAEMATOLOGICAL_CANCERS', 'INFLAMM_ARTHROPATHIES', 'ALL_CANCER', 'STROKE_TIA', 'DIABETES', 'PMR_AND_GCD', 'LONG_TERM_MS_PROBLEMS', 'ALL_CKD', 'INFLAMM_ARTHROPATHIES_CONNECTIVE_TISSUE_DIS']\n", - "- Added Concepts []\n", - "- Changed Concepts \n", - "\t - CVD_EVENTS -1.0\n", - "\n", - "output/V2_1_4_MELD_atc_no_translate.csv output/V2_2_3_MELD_atc_no_translate.csv\n", - "- Removed Concepts []\n", - "- Added Concepts []\n", - "- Changed Concepts \n", - "\n" - ] - } - ], + "outputs": [], "source": [ "def get_output_files(version):\n", " output_files = [f\"output/{version}_MELD_concepts_readv2.csv\",\n", @@ -884,6 +440,33 @@ " " ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc60c137-5a85-4155-af6b-6796f8c05980", + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "import os\n", + "import pandas as pd\n", + "\n", + "df = pd.read_csv(\"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/PHEN_summary_working.csv\")\n", + "df = df.set_index(\"#\")\n", + "\n", + "for vocab in [\"atc\", \"icd10\", \"readv2\", \"snomed\"]:\n", + " df[vocab.upper()] = \"\"\n", + "\n", + " for file in glob.glob(f\"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/{vocab}/*.csv\"):\n", + " concept_set = os.path.basename(file)[:-4]\n", + " row_index = df[df[\"CONCEPT NAME \"] == concept_set].index[0]\n", + "\n", + " df.loc[row_index, vocab.upper()] = \"YES\"\n", + "\n", + "df = df.drop(columns=[\"READv2_CODE\", \"ICD10_CODE\"])\n", + "df.to_csv(\"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/PHEN_summary_working_labelled.csv\")" + ] + }, { "cell_type": "markdown", "id": "e5c4291f-847b-4c82-976e-bd5b3a7b6bcc", @@ -1095,7 +678,7 @@ }, { "cell_type": "code", - "execution_count": 170, + "execution_count": null, "id": "a968ffb1-4337-456b-8d20-419888b4044f", "metadata": {}, "outputs": [], @@ -1112,7 +695,7 @@ }, { "cell_type": "code", - "execution_count": 184, + "execution_count": null, "id": "c70b1ce2-0f41-4d02-ad17-6fc44bc3c6bf", "metadata": {}, "outputs": [], @@ -1125,127 +708,558 @@ }, { "cell_type": "code", - "execution_count": 194, + "execution_count": null, "id": "d5d34237-02d4-4dea-8c20-5adaf337f6b5", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>bnf_code</th>\n", - " <th>snomed_code</th>\n", - " <th>read2_code</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [bnf_code, snomed_code, read2_code]\n", - "Index: []" - ] - }, - "execution_count": 194, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df1.merge(df2, how='inner', on='snomed_code')" ] }, { "cell_type": "code", - "execution_count": 180, - "id": "d0cbadfe-ef55-40a8-a0f1-a9fc69d7456b", + "execution_count": null, + "id": "b3166cf0-e4a5-43e0-aeac-78827427422e", + "metadata": {}, + "outputs": [], + "source": [ + ".astype(str).dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0a766f9-7959-4a10-b58f-cd946a878b60", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"../concepts/PHEN_summary_working.csv\")\n", + "cols = list(df.columns)\n", + "cols.remove('CONCEPT NAME ')\n", + "cols.remove('AGREED')\n", + "df = df.applymap(lambda x: str(x) if isinstance(x, (int, float)) else x) #change to int\n", + "\n", + "df_copy = df.rename(columns={\n", + " \"CONCEPT NAME \":\"concept_set_name\",\n", + " \"AGREED\":\"concept_set_status\"\n", + "})\n", + "df_copy[\"concept_set_status\"] = df_copy[\"concept_set_status\"].replace(\"USE\", \"AGREED\")\n", + "df_copy = df_copy[[\"concept_set_name\", \"concept_set_status\"]]\n", + "outs = df_copy.to_dict(orient='records')\n", + "\n", + "for i, out in enumerate(outs):\n", + " out[\"metadata\"] = dict(df[cols].iloc[i])\n", + " \n", + "json.dumps(outs)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a204a95-dc4c-4183-9ea7-f5c5e95e9087", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>bnf_code</th>\n", - " <th>snomed_code</th>\n", - " <th>read2_code</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " </tbody>\n", - "</table>\n", - "</div>" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [bnf_code, snomed_code, read2_code]\n", - "Index: []" - ] - }, - "execution_count": 180, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [] }, { "cell_type": "code", - "execution_count": 164, - "id": "b3166cf0-e4a5-43e0-aeac-78827427422e", + "execution_count": null, + "id": "5ce1ab58-50b4-4c22-b72b-c698de6830f7", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "BNF Code object\n", - "SNOMED Code object\n", - "dtype: object" - ] - }, - "execution_count": 164, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - ".astype(str).dtypes" + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1ea81c6-d1db-408f-9d3a-b96f44efe21f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "5eb544a3-9dd1-41e8-88c2-a808646c6112", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "### OMOP Database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9e58e62-9e44-4d0c-9d8d-35c175c07e6c", + "metadata": {}, + "outputs": [], + "source": [ + "import sqlite3\n", + "import csv\n", + "import pandas as pd\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f67c9a1-373f-4799-8a85-72767662d912", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0ecdf69-ee90-42c1-ad25-d8357b603d1b", + "metadata": {}, + "outputs": [], + "source": [ + "#IMPORT OMOP VOCABS\n", + "conn = sqlite3.connect(\"codes/omop_54.sqlite\") # change to 'sqlite:///your_filename.db'\n", + "folder_path = \"codes/vocabulary_download_v5_{9424944c-2b76-4127-8f05-f535e0f15e2a}_1731661390540\"\n", + "\n", + "# Check if the folder exists\n", + "if not os.path.isdir(folder_path):\n", + " raise Exception(f\"Error: The folder '{folder_path}' does not exist.\") \n", + "\n", + "# Iterate through files in the folder\n", + "for filename in os.listdir(folder_path):\n", + " if filename.endswith(\".csv\"): # Check if the file is a CSV\n", + " file_path = os.path.join(folder_path, filename)\n", + " try:\n", + " print(f\"Reading file: {file_path}\")\n", + " # Read the CSV file with the specified delimiter\n", + " df = pd.read_csv(file_path, delimiter=\"\\t\", low_memory=False)\n", + " table_name = os.path.splitext(os.path.basename(file_path))[0] #Get name of file\n", + " \n", + " #Export Table to sqlite db\n", + " df.to_sql(table_name, conn, if_exists='replace', index=False)\n", + " \n", + " except Exception as e:\n", + " raise Exception(f\"Error reading file {file_path}: {e}\")\n", + "\n", + "conn.commit()\n", + "conn.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9cafd0c-a3bd-408b-bca8-b0de2acde1cd", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a SQL connection to our SQLite database\n", + "conn = sqlite3.connect(\"codes/omop_54.sqlite\")\n", + "cur = conn.cursor()\n", + "\n", + "#Print ALL Columns in Table\n", + "# table=\"CONCEPT_SET\"\n", + "# cur.execute(f\"PRAGMA table_info({table});\")\n", + "# print(pd.DataFrame(cur.fetchall()))\n", + "\n", + "#Print ALL TABLE NAMES\n", + "# cur.execute(\"SELECT name FROM sqlite_master WHERE type='table' AND name=? ;\", (\"VOCABULARY\",))\n", + "# print(cur.fetchone())\n", + " \n", + "cur.execute(\"SELECT vocabulary_id FROM VOCABULARY WHERE vocabulary_id=? ;\", (\"MELDB\",))\n", + "print(cur.fetchone())\n", + "\n", + " \n", + " \n", + "#Print WHOLE TABLE\n", + "# cur.execute('SELECT * FROM CONCEPT;')\n", + "# cur.execute('SELECT * FROM CONCEPT WHERE standard_concept = \"C\";')\n", + "# cur.execute('SELECT * FROM CONCEPT WHERE concept_code = \"119768002\" LIMIT 1;')\n", + "# cur.execute('SELECT * FROM CONCEPT WHERE concept_code IN (\"119768002\", \"5905001\");')\n", + "# cur.execute('SELECT DISTINCT VOCABULARY_ID FROM CONCEPT;')\n", + "# df = pd.DataFrame(cur.fetchall())\n", + "# print(list(df[0]))\n", + "# display(df)\n", + "# for row in :\n", + " # print(row)\n", + "\n", + "\n", + "\n", + "#Get Header of Table\n", + "# table=\"CONCEPT_CLASS\"\n", + "# cur.execute(f\"SELECT * FROM {table} LIMIT 3;\")\n", + "# print(cur.fetchall())\n", + "\n", + "#create meldb VOCABULARY\n", + "# meldb_version='v3.2.10'\n", + "# meldb_description = 'Multidisciplinary Ecosystem to study Lifecourse Determinants and Prevention of Early-onset Burdensome Multimorbidity'\n", + "# meldb_reference = 'https://www.it-innovation.soton.ac.uk/projects/meldb'\n", + "# df_test = pd.DataFrame([{\n", + "# \"vocabulary_id\": 'MELDB',\n", + "# \"vocabulary_name\": meldb_description,\n", + "# \"vocabulary_reference\": meldb_reference,\n", + "# \"vocabulary_version\": meldb_version,\n", + "# # \"vocabulary_concept_id\": 0,\n", + "# }])\n", + "# df_test.to_sql(\"VOCABULARY\", conn, if_exists='append', index=False)\n", + "\n", + "\n", + "# cur.execute(\"\"\"\n", + "# CREATE TABLE CONCEPT_SET (\n", + "# concept_set_id INTEGER PRIMARY KEY AUTOINCREMENT, -- Unique identifier for each concept set\n", + "# atlas_id INTEGER, -- Unique identifier generated by ATLAS\n", + "# concept_set_name TEXT, -- Optional name for the concept set\n", + "# concept_set_description TEXT, -- Optional description for the concept set\n", + "# vocabulary_id TEXT NOT NULL, -- Foreign key to VOCABULARY table\n", + "# FOREIGN KEY (vocabulary_id) REFERENCES VOCABULARY(vocabulary_id)\n", + "# );\"\"\")\n", + "# cur.execute(\"DROP TABLE CONCEPT_SET;\")\n", + "\n", + "# cur.execute(\"\"\"\n", + "# CREATE TABLE CONCEPT_SET_ITEM (\n", + "# concept_set_item_id INTEGER PRIMARY KEY AUTOINCREMENT, -- Unique identifier for each mapping\n", + "# concept_set_id INTEGER NOT NULL, -- Foreign key to CONCEPT_SET table\n", + "# concept_id INTEGER NOT NULL, -- Foreign key to CONCEPT table\n", + "# FOREIGN KEY (concept_set_id) REFERENCES CONCEPT_SET(concept_set_id),\n", + "# FOREIGN KEY (concept_id) REFERENCES CONCEPT(concept_id)\n", + "# );\"\"\")\n", + "# cur.execute(\"DROP TABLE CONCEPT_SET_ITEM;\")\n", + "\n", + "# Be sure to close the connection\n", + "conn.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d03b75f3-902f-42d7-b52f-dac7e79ecb11", + "metadata": {}, + "outputs": [], + "source": [ + "conn = sqlite3.connect(\"codes/omop_54.sqlite\") # change to 'sqlite:///your_filename.db'\n", + "cur = conn.cursor()\n", + "\n", + "file_path = \"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/snomed/HEART_VALVE_DISORDERS.csv\"\n", + "df = pd.read_csv(file_path, low_memory=False)\n", + "df = df.set_index(\"code\")\n", + "\n", + "df.to_sql(name='test', con=conn, if_exists='replace')\n", + "\n", + "conn.commit()\n", + "conn.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d96c3511-3831-400e-ba40-0a36abcc60d3", + "metadata": {}, + "outputs": [], + "source": [ + "#DISPLAY SQL TABLE\n", + "table=\"CONCEPT_SET_ITEM\"\n", + "\n", + "# Create a SQL connection to our SQLite database\n", + "conn = sqlite3.connect(\"codes/omop_54.sqlite\")\n", + "cur = conn.cursor()\n", + "\n", + "#Print ALL Columns in Table\n", + "cur.execute(f\"PRAGMA table_info({table});\")\n", + "df_cols = pd.DataFrame(cur.fetchall())\n", + "print(df_cols)\n", + "df_cols = df_cols[1]\n", + "\n", + "#Print TABLE\n", + "cur.execute(f\"SELECT * FROM {table};\")\n", + "df = pd.DataFrame(cur.fetchall())\n", + "df = df.rename(columns={i:s for i, s in enumerate(df_cols)})\n", + "display(df)\n", + "\n", + "conn.close()\n", + "\n", + "\n", + "# a+s = 13364 \n", + "# a+s+i = 13591\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42d49a00-9646-4ba4-afb6-12297289b7a7", + "metadata": {}, + "outputs": [], + "source": [ + "def sql_row_exist(conn, table, column, value):\n", + "\t# Execute and check if a result exists\n", + "\tcur = conn.cursor()\n", + "\tquery = f\"SELECT 1 FROM {table} WHERE {column} = ? LIMIT 1;\"\n", + "\tcur.execute(query, (value,))\n", + "\texists = cur.fetchone() is not None\n", + "\t\n", + "\treturn exists" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7b51bcd-6ee1-4023-8d36-7f419ce4120d", + "metadata": {}, + "outputs": [], + "source": [ + "#EXPORT MELDB CSV OUTPUT\n", + "\n", + "conn = sqlite3.connect(\"codes/omop_54.sqlite\") # change to 'sqlite:///your_filename.db'\n", + "cur = conn.cursor()\n", + "\n", + "vocab_output = \"MELDB\"\n", + "vocab_type = \"SNOMED\"\n", + "file_path = \"/home/jjd1c23/ssd/meldb/jjd1c23/phenotype/output/V3_2_10_MELD_snomed_no_translate.csv\"\n", + "# file_path = \"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/snomed/HEART_VALVE_DISORDERS.csv\"\n", + "\n", + "# Read the CSV file with the specified delimiter\n", + "out = pd.read_csv(file_path, low_memory=False)\n", + "print(df.columns)\n", + "\n", + "for concept_set_name, grp in out.groupby(\"MELDB_concept\"):\n", + " # display(concept_set_name, grp[[\"code\", \"MELDB_concept\"]])\n", + " \n", + " #Create Concept_Set\n", + " if not sql_row_exist(conn, \"CONCEPT_SET\", \"concept_set_name\", concept_set_name):\n", + " cur.execute(f\"INSERT INTO CONCEPT_SET (concept_set_name, vocabulary_id) VALUES ('{concept_set_name}', 'MELDB');\")\n", + " else:\n", + " print(\"concept_set\", concept_set_name, \"already exists\")\n", + " #TODO: ask to remove old concept_set?\n", + " \n", + " #Get Concept_set_Id\n", + " query = \"SELECT concept_set_id FROM CONCEPT_SET WHERE concept_set_name = ? AND vocabulary_id = ?;\"\n", + " cur.execute(query, (concept_set_name, vocab_output, )) \n", + " concept_set_id = cur.fetchone()[0]\n", + " \n", + " #Get corresponing Concept_id (OMOP) for each Concept_code (e.g. SNOMED)\n", + " concept_codes = \"'\"+\"', '\".join(list(grp[\"code\"].astype(str)))+\"'\"\n", + " query = f\"SELECT concept_id FROM CONCEPT WHERE vocabulary_id = ? AND concept_code IN ({concept_codes});\"\n", + " print(query)\n", + " cur.execute(query, (vocab_type, ))\n", + " df_out = pd.DataFrame(cur.fetchall(), columns=[\"concept_id\"])\n", + " \n", + " if not len(grp) == len(df_out):\n", + " print(\"ERROR: Some\", vocab_type, \"Codes do not exist in OMOP Database\")\n", + " \n", + " #Create Concept_set_item\n", + " df_out[\"concept_set_id\"] = concept_set_id\n", + " df_out.to_sql(\"CONCEPT_SET_ITEM\", conn, if_exists='append', index=False)\n", + " \n", + " display(df_out)\n", + " \n", + " \n", + " \n", + " # break\n", + " \n", + " \n", + "\n", + "# #Create New CONCEPT_SET\n", + "# table_name = os.path.splitext(os.path.basename(file_path))[0] #Get name of file\n", + "# cur.execute(f\"INSERT INTO CONCEPT_SET (concept_class_name) VALUES ('{table_name}');\")\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "\n", + "conn.commit()\n", + "conn.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85007741-e34c-4112-a63c-9fb302b76958", + "metadata": {}, + "outputs": [], + "source": [ + "\"'\"+\"', '\".join(list(grp[\"code\"].astype(str)))+\"'\"" + ] + }, + { + "cell_type": "markdown", + "id": "423e7c21-f3bd-439d-9dcb-c17cc2cc6854", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "### ATLAS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6b45e4d-c7d2-42e7-9b4a-0e9c1c86d34b", + "metadata": {}, + "outputs": [], + "source": [ + "#Create ATLAS Concept Set\n", + "\n", + "def atlas_create_concept(name, description=\"\", items=[]):\n", + " data={\n", + " \"id\": 0,\n", + " \"name\": name,\n", + " \"description\": description,\n", + " \"expression\": {\n", + " \"items\":items \n", + " }\n", + " }\n", + "\n", + " try:\n", + " # Sending the POST request\n", + " response = requests.post(url, json=data, headers=headers)\n", + "\n", + " # Check the response status\n", + " if response.status_code == 200 or response.status_code == 201:\n", + " print(\"POST request successful:\")\n", + " print(response.json()) # Assuming the response is JSON\n", + " return response[\"id\"]\n", + " else:\n", + " print(f\"POST request failed. HTTP Status Code: {response.status_code}\")\n", + " print(\"Response content:\")\n", + " print(response.text)\n", + " return None\n", + "\n", + " except requests.exceptions.RequestException as e:\n", + " print(f\"An error occurred: {e}\")\n", + "\n", + "# Heart Test 1 - 1885487\n", + "# Heart Test 2 - 1885488\n", + "# Heart Valve Disorders - 1885449\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45497623-1da0-4f74-b21e-da8811c89b04", + "metadata": {}, + "outputs": [], + "source": [ + "def get_omop_concepts(cur, codes, vocab_id): \n", + " #Create List for SQL\n", + " mask = \"\"\n", + " for c in codes:\n", + " mask+=f'\"{c}\", '\n", + " mask = mask[:-2] #remove last comma\n", + " \n", + " #Execute SQL\n", + " cur.execute(f'SELECT * FROM CONCEPT WHERE concept_code IN ({mask}) AND VOCABULARY_ID = \"{vocab_id}\";')\n", + " df = pd.DataFrame(cur.fetchall()) #convert to pandas df\n", + " \n", + " print(\"Identified\", len(df[0]) ,\"OMOP Concepts:\", list(df[0]))\n", + " \n", + " return df\n", + " \n", + "def omop_concepts_to_atlas_json(df):\n", + " json = []\n", + " for i, row in df.iterrows():\n", + " #template for atlas api\n", + " out = { \n", + " \"concept\": {\n", + " 'CONCEPT_ID': row[0],\n", + " 'CONCEPT_NAME': row[1],\n", + " 'STANDARD_CONCEPT': 'S',\n", + " 'STANDARD_CONCEPT_CAPTION': 'Standard',\n", + " 'INVALID_REASON': 'V',\n", + " 'INVALID_REASON_CAPTION': 'Valid',\n", + " 'CONCEPT_CODE': row[6],\n", + " 'DOMAIN_ID': row[2],\n", + " 'VOCABULARY_ID': row[3],\n", + " 'CONCEPT_CLASS_ID': row[4],\n", + " 'VALID_START_DATE': int(row[7]),\n", + " 'VALID_END_DATE': int(row[8])\n", + " },\n", + " 'isExcluded': False,\n", + " 'includeDescendants': False,\n", + " 'includeMapped': False\n", + " }\n", + " json.append(out)\n", + " return json \n", + "\n", + "conn = sqlite3.connect(\"codes/omop_54.sqlite\")\n", + "cur = conn.cursor()\n", + "\n", + "vocab_id=\"SNOMED\" #SNOMED, ATC, ICD10CM, ICD9CM, Read\n", + "csv_output = \"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/snomed/ANGER.csv\"\n", + "\n", + "#Load CSV Output File\n", + "df_in = pd.read_csv(csv_output)\n", + "print(len(df_in))\n", + "\n", + "# df = get_omop_concepts(cur, [\"119768002\", \"5905001\"], \"SNOMED\")\n", + "df = get_omop_concepts(cur, list(df_in[\"code\"]), vocab_id)\n", + "json = omop_concepts_to_atlas_json(df)\n", + "# display(json)\n", + "\n", + "conn.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea759907-c085-472a-82e2-07b6b19e2c8f", + "metadata": {}, + "outputs": [], + "source": [ + "#ATLAS GET CONCEPT SET\n", + "import requests\n", + "\n", + "def request_get(url):\n", + " try:\n", + " # Sending the GET request\n", + " response = requests.get(url)\n", + "\n", + " # Check if the response status code is 200 (OK)\n", + " if response.status_code == 200:\n", + " print(\"Response data:\")\n", + " # print(response.json()) # Assuming the response is in JSON format\n", + " return response.json()\n", + " else:\n", + " print(f\"Failed to fetch data. HTTP Status Code: {response.status_code}\")\n", + " print(\"Response content:\")\n", + " print(response.text)\n", + " return None\n", + "\n", + " except requests.exceptions.RequestException as e:\n", + " print(f\"An error occurred: {e}\")\n", + "\n", + "\n", + "#GET SET INFO\n", + "set_id = \"1885449\"\n", + "url = f\"https://atlas-demo.ohdsi.org/WebAPI/conceptset/{set_id}\"\n", + "request_get(url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a70e636-6051-4930-bf1b-30d093fd0552", + "metadata": {}, + "outputs": [], + "source": [ + "#GET SET ITEMS (Concepts)\n", + "set_id = \"1885449\"\n", + "url = f\"https://atlas-demo.ohdsi.org/WebAPI/conceptset/{set_id}/expression/ATLASPROD\"\n", + "response = request_get(url)\n", + "display(response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96bfcd9c-27e8-4be4-a680-7553d908790e", + "metadata": {}, + "outputs": [], + "source": [ + "#ATLAS CREATE CONCEPT SET\n" ] } ],