Skip to content
Snippets Groups Projects
Commit 8f8b5119 authored by Jakub Dylag's avatar Jakub Dylag
Browse files

move summary excel into json config

parent 28f44a19
No related branches found
No related tags found
No related merge requests found
...@@ -255,23 +255,14 @@ def run_all(mapping_file, target_code_type, ...@@ -255,23 +255,14 @@ def run_all(mapping_file, target_code_type,
out = out.drop_duplicates(subset=["CONCEPT_SET", "CONCEPT"]) out = out.drop_duplicates(subset=["CONCEPT_SET", "CONCEPT"])
out = out.sort_values(by=["CONCEPT_SET", "CONCEPT"]) out = out.sort_values(by=["CONCEPT_SET", "CONCEPT"])
#Merge with Concept Types in Summary Excel File #Add Concept Set Defintions metadata
if "excel_sheet" in summary_config: summary_df = pd.DataFrame(summary_config["concept_set"]) #transform to dataframe
summary_df = read_table_file(summary_config["file"], excel_sheet=summary_config["excel_sheet"]) if "metadata" in summary_df.columns:
else: summary_df = summary_df.join(pd.json_normalize(summary_df["metadata"])) #metadata to columns
summary_df = read_table_file(summary_config["file"]) summary_df = summary_df.drop(columns=["metadata"])
summary_cols_all = [] #get all column names summary_df = summary_df.rename(columns={"concept_set_name":"CONCEPT_SET"})
for v in summary_config["columns"].values(): #TODO: put in seperate function - get all columns in JSON file object
if type(v) == str:
summary_cols_all.append(v)
else:
summary_cols_all += v
output_version = summary_config["version"]
summary_df = summary_df[summary_cols_all] #select all relevant columns
summary_df = summary_df.rename(columns={summary_config["columns"]["concept_set_name"]: "CONCEPT_SET"})
summary_df = summary_df.drop_duplicates() #remove duplicates summary_df = summary_df.drop_duplicates() #remove duplicates
out = out.merge(summary_df, how="left", on='CONCEPT_SET') out = out.merge(summary_df, how="left", on='CONCEPT_SET') #merge with output
# Save Output File # Save Output File
print(bcolors.HEADER, "---"*5, "OUTPUT", "---"*5, bcolors.ENDC) print(bcolors.HEADER, "---"*5, "OUTPUT", "---"*5, bcolors.ENDC)
......
%% Cell type:code id:8c8f4cdf-04a5-4762-895e-6555781a1f05 tags: %% Cell type:code id:8c8f4cdf-04a5-4762-895e-6555781a1f05 tags:
``` python ``` python
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import json import json
``` ```
%% Cell type:markdown id:c5786d78-7dc2-4f02-ad21-cee95e473823 tags: %% Cell type:markdown id:c5786d78-7dc2-4f02-ad21-cee95e473823 tags:
### Ho generate JSON ### Ho generate JSON
%% Cell type:code id:0292dc90-e31a-4724-8536-d0b55533aaef tags: %% Cell type:code id:0292dc90-e31a-4724-8536-d0b55533aaef tags:
``` python ``` python
#List v4 to json #List v4 to json
df = pd.read_excel("PHEN_code_lists_sources_V4.xlsx", sheet_name="ho", dtype=str) df = pd.read_excel("PHEN_code_lists_sources_V4.xlsx", sheet_name="ho", dtype=str)
# df = df.sort_values(by="mapped_condition") # df = df.sort_values(by="mapped_condition")
def json_file_template(file, cons, types, metadata): def json_file_template(file, cons, types, metadata):
concepts = "" concepts = ""
for concept in cons: for concept in cons:
concepts += f'"{concept}", ' concepts += f'"{concept}", '
concepts = concepts[:-2] #remove last , concepts = concepts[:-2] #remove last ,
type_str = "" type_str = ""
for k, v in types.items(): for k, v in types.items():
type_str += f'"{k}":"{v}", ' type_str += f'"{k}":"{v}", '
type_str = type_str[:-2] type_str = type_str[:-2]
meta_str = '"metadata":[' meta_str = '"metadata":['
for v in metadata: for v in metadata:
meta_str += f'"{v}", ' meta_str += f'"{v}", '
meta_str = meta_str[:-2] meta_str = meta_str[:-2]
meta_str = meta_str + "]" meta_str = meta_str + "]"
return ''' return '''
{ {
\"file\":\"'''+file+'''", \"file\":\"'''+file+'''",
\"columns\":{ \"columns\":{
'''+type_str+''', '''+type_str+''',
'''+meta_str+''' '''+meta_str+'''
}, },
\"meldb_phenotypes\":['''+concepts+'''] \"meldb_phenotypes\":['''+concepts+''']
},''' },'''
out = '"files":[' out = '"files":['
folder = "codes/GitHub_TG_repository/" folder = "codes/GitHub_TG_repository/"
for file, grp in df.groupby("mapped_condition"): for file, grp in df.groupby("mapped_condition"):
file = file.replace("%20", " ") file = file.replace("%20", " ")
for ext in ["_CPRD_GOLD.csv", "_CPRD_AURUM.csv", "_IMRD.csv"]: for ext in ["_CPRD_GOLD.csv", "_CPRD_AURUM.csv", "_IMRD.csv"]:
path = file+"/"+file+ext path = file+"/"+file+ext
if os.path.isfile(folder+path): if os.path.isfile(folder+path):
out+= json_file_template(path, grp["meldb_condition"], out+= json_file_template(path, grp["meldb_condition"],
types={ types={
"read2_code":"READ_CODE", "read2_code":"READ_CODE",
"snomed_code":"SNOMED_CT_CODE", "snomed_code":"SNOMED_CT_CODE",
# "med_code":"MEDICAL_CODE_ID", # "med_code":"MEDICAL_CODE_ID",
}, },
metadata = ["DESCRIPTION"] metadata = ["DESCRIPTION"]
) )
else: else:
print("NOT FILE", folder+path) print("NOT FILE", folder+path)
for ext in ["_ICD10.csv"]: for ext in ["_ICD10.csv"]:
path = file+"/"+file+ext path = file+"/"+file+ext
if os.path.isfile(folder+path): if os.path.isfile(folder+path):
out+= json_file_template(path, grp["meldb_condition"], out+= json_file_template(path, grp["meldb_condition"],
types={ types={
"icd10_code":"READ_CODE", "icd10_code":"READ_CODE",
"snomed_code":"SNOMED_CT_CODE", "snomed_code":"SNOMED_CT_CODE",
# "icd10_code":"MEDICAL_CODE_ID", # "icd10_code":"MEDICAL_CODE_ID",
}, },
metadata = ["DESCRIPTION"] metadata = ["DESCRIPTION"]
) )
else: else:
print("NOT FILE", folder+path) print("NOT FILE", folder+path)
# out+= json_file_template(file+"/"+file+"_CPRD_AURUM.csv", grp["meldb_condition"]) # out+= json_file_template(file+"/"+file+"_CPRD_AURUM.csv", grp["meldb_condition"])
# out+= json_file_template(file+"/"+file+"_ICD10.csv", grp["meldb_condition"]) # out+= json_file_template(file+"/"+file+"_ICD10.csv", grp["meldb_condition"])
# out+= json_file_template(file+"/"+file+"_IMRD.csv", grp["meldb_condition"]) # out+= json_file_template(file+"/"+file+"_IMRD.csv", grp["meldb_condition"])
# out += f' "{file}/{file}_CPRD_GOLD.csv":[{conds}],\n' # out += f' "{file}/{file}_CPRD_GOLD.csv":[{conds}],\n'
# out += f' "{file}/{file}_CPRD_AURUM.csv":[{conds}],\n' # out += f' "{file}/{file}_CPRD_AURUM.csv":[{conds}],\n'
# out += f' "{file}/{file}_ICD10.csv":[{conds}],\n' # out += f' "{file}/{file}_ICD10.csv":[{conds}],\n'
# out += f' "{file}/{file}_IMRD.csv":[{conds}],\n' # out += f' "{file}/{file}_IMRD.csv":[{conds}],\n'
out = out[:-1] #remove last , out = out[:-1] #remove last ,
out += "\n]" out += "\n]"
out = out.replace("%20", " ") out = out.replace("%20", " ")
print(out) print(out)
``` ```
%% Cell type:code id:f155b635-b459-4aff-81b2-e065fc223858 tags: %% Cell type:code id:f155b635-b459-4aff-81b2-e065fc223858 tags:
``` python ``` python
``` ```
%% Output
0 False
dtype: bool
%% Cell type:code id:d040eda5-4028-4047-834c-7315e307e415 tags: %% Cell type:code id:d040eda5-4028-4047-834c-7315e307e415 tags:
``` python ``` python
df = pd.read_parquet("maps/processed/icd10_code.parquet") df = pd.read_parquet("maps/processed/icd10_code.parquet")
df df
``` ```
%% Output
icd10_code icd10_alt_code \
0 A00 A00
1 A00.0 A000
2 A00.1 A001
3 A00.9 A009
4 A01 A01
... ... ...
17929 U84.3 U843
17930 U84.7 U847
17931 U84.8 U848
17932 U84.9 U849
17933 U85 U85X
description
0 Cholera
1 Cholera due to Vibrio cholerae 01, biovar chol...
2 Cholera due to Vibrio cholerae 01, biovar eltor
3 Cholera, unspecified
4 Typhoid and paratyphoid fevers
... ...
17929 Resistance to tuberculostatic drug(s)
17930 Resistance to multiple antimicrobial drugs
17931 Resistance to other specified antimicrobial drug
17932 Resistance to unspecified antimicrobial drugs
17933 Resistance to antineoplastic drugs
[17934 rows x 3 columns]
%% Cell type:code id:e0228ac9-8852-4818-b7f0-98429ca5229c tags: %% Cell type:code id:e0228ac9-8852-4818-b7f0-98429ca5229c tags:
``` python ``` python
code = ["A00.0", "*00.0"] code = ["A00.0", "*00.0"]
code = pd.Series(code) code = pd.Series(code)
print(code.isin(df["icd10_code"])) print(code.isin(df["icd10_code"]))
print(code.isin(df["icd10_alt_code"])) print(code.isin(df["icd10_alt_code"]))
# print( ) # print( )
~( ~(
~code.isin(df["icd10_code"]) ~code.isin(df["icd10_code"])
& &
~code.isin(df["icd10_alt_code"]) ~code.isin(df["icd10_alt_code"])
) )
``` ```
%% Output
0 True
1 False
dtype: bool
0 False
1 False
dtype: bool
0 True
1 False
dtype: bool
%% Cell type:markdown id:18efcacd-45f0-4341-86cc-d8e2e584350c tags: %% Cell type:markdown id:18efcacd-45f0-4341-86cc-d8e2e584350c tags:
### Analyse the JSON file ### Analyse the JSON file
%% Cell type:code id:85dc197b-451e-4fa9-a53b-e6770c132123 tags:
``` python
import json
import os
path_json = "../concepts/PHEN_assign_v3.json"
#Load JSON Concept Definitions
mapping = json.load(open(path_json,'rb'))
summary_config = mapping["concept_sets"]["concept_set"]
summary_df = pd.DataFrame(summary_config) #change to dataframe
summary_df = summary_df.join(pd.json_normalize(summary_df["metadata"])) #metadata to columns
summary_df = summary_df.drop(columns=["metadata"])
summary_df = summary_df.rename(columns={"concept_set_name":"CONCEPT_SET"})
summary_df = summary_df.drop_duplicates() #remove duplicates
summary_df
```
%% Cell type:code id:4c9b6b3f-08aa-4f61-b9b2-44a24b5d00a0 tags: %% Cell type:code id:4c9b6b3f-08aa-4f61-b9b2-44a24b5d00a0 tags:
``` python ``` python
import json import json
import os import os
path_json = "PHEN_assign_v3.json" path_json = "PHEN_assign_v3.json"
path_excel = "PHEN_summary_working.xlsx" path_excel = "PHEN_summary_working.xlsx"
path_codes = "codes/" path_codes = "codes/"
#Get all Files in JSON #Get all Files in JSON
def get_json_files(path_json): def get_json_files(path_json):
folders = json.load(open(path_json,'rb')) folders = json.load(open(path_json,'rb'))
out = [] out = []
for folder in folders: for folder in folders:
if "files" in folder: if "files" in folder:
for file in folder["files"]: for file in folder["files"]:
file_path = folder["folder"]+"/"+file["file"] file_path = folder["folder"]+"/"+file["file"]
if "meldb_phenotypes" in file: if "meldb_phenotypes" in file:
for concept in file["meldb_phenotypes"]: for concept in file["meldb_phenotypes"]:
out.append({"json_concept":concept, "filepath":file_path, "json_code_types":list(file["columns"].keys())}) out.append({"json_concept":concept, "filepath":file_path, "json_code_types":list(file["columns"].keys())})
elif "meldb_phenotypes_categories" in file: elif "meldb_phenotypes_categories" in file:
for code, concept in file["meldb_phenotypes_categories"].items(): for code, concept in file["meldb_phenotypes_categories"].items():
out.append({"json_concept":concept[0], "filepath":file_path, "json_code_types":list(file["columns"].keys())}) out.append({"json_concept":concept[0], "filepath":file_path, "json_code_types":list(file["columns"].keys())})
else: else:
out.append({"json_concept":None, "filepath":file_path}) out.append({"json_concept":None, "filepath":file_path})
out = pd.DataFrame(out) out = pd.DataFrame(out)
out["filepath"] = out["filepath"].astype(str) out["filepath"] = out["filepath"].astype(str)
return out return out
out = get_json_files(path_json) out = get_json_files(path_json)
#Get all Files Excel Summary #Get all Files Excel Summary
def get_excel_files(path_excel): def get_excel_files(path_excel):
path_excel = "PHEN_summary_working.xlsx" path_excel = "PHEN_summary_working.xlsx"
out2 = pd.read_excel(path_excel) out2 = pd.read_excel(path_excel)
out2 = out2[["CONCEPT NAME ", "CODING LIST", "AGREED", "FUNCTION"]].loc[1:] #select relevant columns out2 = out2[["CONCEPT NAME ", "CODING LIST", "AGREED", "FUNCTION"]].loc[1:] #select relevant columns
#Filter Concepts in use #Filter Concepts in use
out2 = out2[out2["AGREED"] == "USE"] #remove deprecated concepts out2 = out2[out2["AGREED"] == "USE"] #remove deprecated concepts
out2 = out2[out2["FUNCTION"] == "QUERY BY CODING LIST"] #remove deprecated concepts out2 = out2[out2["FUNCTION"] == "QUERY BY CODING LIST"] #remove deprecated concepts
out2 = out2.drop(['AGREED', 'FUNCTION'], axis=1) out2 = out2.drop(['AGREED', 'FUNCTION'], axis=1)
#Get filepaths #Get filepaths
out2["CODING LIST"] = out2["CODING LIST"].str.split(",") #split by , out2["CODING LIST"] = out2["CODING LIST"].str.split(",") #split by ,
out2 = out2.explode("CODING LIST") #one row per file out2 = out2.explode("CODING LIST") #one row per file
out2["CODING LIST"] = out2["CODING LIST"].str.strip() out2["CODING LIST"] = out2["CODING LIST"].str.strip()
out2["CODING LIST"] = out2["CODING LIST"].str.replace("https://git.soton.ac.uk/meld/meldb-external/phenotype/-/tree/main/", "") out2["CODING LIST"] = out2["CODING LIST"].str.replace("https://git.soton.ac.uk/meld/meldb-external/phenotype/-/tree/main/", "")
out2["CODING LIST"] = out2["CODING LIST"].str.replace("%20", " ") out2["CODING LIST"] = out2["CODING LIST"].str.replace("%20", " ")
out2 = out2.rename(columns={"CONCEPT NAME ":"excel_concept", "CODING LIST":"filepath"}) out2 = out2.rename(columns={"CONCEPT NAME ":"excel_concept", "CODING LIST":"filepath"})
return out2 return out2
out2 = get_excel_files(path_excel) out2 = get_excel_files(path_excel)
#Get all Files in /codes #Get all Files in /codes
def get_code_files(path_codes): def get_code_files(path_codes):
all_files = [] all_files = []
for root, dirs, files in os.walk(path_codes, topdown=False): for root, dirs, files in os.walk(path_codes, topdown=False):
for name in files: for name in files:
if ".ipynb_checkpoint" not in root: #exclude notebook checkpoints if ".ipynb_checkpoint" not in root: #exclude notebook checkpoints
if name.endswith(".csv") or name.endswith(".xlsx") or name.endswith(".dta"): #exclude non-data files if name.endswith(".csv") or name.endswith(".xlsx") or name.endswith(".dta"): #exclude non-data files
all_files.append(os.path.join(root, name)) all_files.append(os.path.join(root, name))
all_files = pd.DataFrame(all_files) all_files = pd.DataFrame(all_files)
all_files = all_files.rename(columns={0:"filepath"}) all_files = all_files.rename(columns={0:"filepath"})
all_files["filepath"] = all_files["filepath"].astype(str) all_files["filepath"] = all_files["filepath"].astype(str)
return all_files return all_files
all_files = get_code_files(path_codes) all_files = get_code_files(path_codes)
print("ALL FILES", len(all_files), len(all_files["filepath"].unique())) print("ALL FILES", len(all_files), len(all_files["filepath"].unique()))
print("JSON CONCEPTS", len(out), len(out["filepath"].unique())) print("JSON CONCEPTS", len(out), len(out["filepath"].unique()))
print("EXCEL CONCEPTS", len(out2), len(out2["filepath"].unique())) print("EXCEL CONCEPTS", len(out2), len(out2["filepath"].unique()))
outs = pd.merge(all_files, out, how="outer", on="filepath") outs = pd.merge(all_files, out, how="outer", on="filepath")
outs = pd.merge(outs, out2, how="outer", on="filepath") outs = pd.merge(outs, out2, how="outer", on="filepath")
print(len(outs), len(outs["filepath"].unique())) print(len(outs), len(outs["filepath"].unique()))
outs.to_csv("output/MELD_file_to_concept.csv", index=False) outs.to_csv("output/MELD_file_to_concept.csv", index=False)
# display(outs[ outs["concept"].isna()]) # display(outs[ outs["concept"].isna()])
# display(out ) # display(out )
``` ```
%% Output
ALL FILES 878 878
JSON CONCEPTS 436 397
EXCEL CONCEPTS 440 397
1755 878
/opt/conda/lib/python3.9/site-packages/openpyxl/worksheet/_reader.py:329: UserWarning: Data Validation extension is not supported and will be removed
warn(msg)
%% Cell type:code id:f8e70c33-c869-46f8-953e-f6b52992cfbb tags: %% Cell type:code id:f8e70c33-c869-46f8-953e-f6b52992cfbb tags:
``` python ``` python
display("JSON MISSING", outs[outs["json_concept"].isna() & outs["excel_concept"].notna()]) display("JSON MISSING", outs[outs["json_concept"].isna() & outs["excel_concept"].notna()])
display("EXCEL MISSING", outs[outs["json_concept"].notna() & outs["excel_concept"].isna()]) display("EXCEL MISSING", outs[outs["json_concept"].notna() & outs["excel_concept"].isna()])
``` ```
%% Output
%% Cell type:code id:9d84465f-f064-4df2-b0e4-2dfb217aea21 tags: %% Cell type:code id:9d84465f-f064-4df2-b0e4-2dfb217aea21 tags:
``` python ``` python
f = open('concepts-output/MELD-report.md', 'a') as f: f = open('concepts-output/MELD-report.md', 'a') as f:
f.write( f.write(
""" """
# Report # Report
- One thing - One thing
- Two thing - Two thing
- Three thing - Three thing
""") """)
``` ```
%% Cell type:code id:7f7fc771-e406-42c7-8a09-16a20b5298f5 tags: %% Cell type:code id:7f7fc771-e406-42c7-8a09-16a20b5298f5 tags:
``` python ``` python
total_length = 0 total_length = 0
for file in all_files["filepath"]: for file in all_files["filepath"]:
if file.endswith(".csv"): if file.endswith(".csv"):
df_file = pd.read_csv(file) df_file = pd.read_csv(file)
total_length += len(df_file) total_length += len(df_file)
elif file.endswith(".xlsx"): elif file.endswith(".xlsx"):
df_file = pd.read_excel(file) df_file = pd.read_excel(file)
total_length += len(df_file) total_length += len(df_file)
elif file.endswith(".dta"): elif file.endswith(".dta"):
df_file = pd.read_stata(file) df_file = pd.read_stata(file)
total_length += len(df_file) total_length += len(df_file)
total_length total_length
``` ```
%% Output
65307
%% Cell type:code id:08a9c565-28d6-46ee-9fa8-6fa0ee28a4d5 tags: %% Cell type:code id:08a9c565-28d6-46ee-9fa8-6fa0ee28a4d5 tags:
``` python ``` python
#turn filepaths into gitlab links #turn filepaths into gitlab links
outs2 = outs.copy() outs2 = outs.copy()
outs2["filepath"] = "https://git.soton.ac.uk/meld/meldb-external/phenotype/-/tree/main/"+outs2["filepath"].str.replace(" ", "%20") outs2["filepath"] = "https://git.soton.ac.uk/meld/meldb-external/phenotype/-/tree/main/"+outs2["filepath"].str.replace(" ", "%20")
#Groupby concepts and concat filepaths #Groupby concepts and concat filepaths
outs2 = outs2.groupby("concept")["filepath"].apply(', '.join).reset_index() outs2 = outs2.groupby("concept")["filepath"].apply(', '.join).reset_index()
outs2 = outs2.sort_values(by=["concept"]) outs2 = outs2.sort_values(by=["concept"])
outs2 outs2
outs2.to_csv("output/MELD_GitLab_link_to_concept.csv", index=False) outs2.to_csv("output/MELD_GitLab_link_to_concept.csv", index=False)
``` ```
%% Cell type:markdown id:357bb84c-90c2-4b5f-95c0-443191783a7f tags: %% Cell type:markdown id:357bb84c-90c2-4b5f-95c0-443191783a7f tags:
### Analyse Output Files ### Analyse Output Files
%% Cell type:code id:7d3f9cb7-be86-4f1f-92f6-991094eb7bb7 tags: %% Cell type:code id:7d3f9cb7-be86-4f1f-92f6-991094eb7bb7 tags:
``` python ``` python
version = "V2_2_2" version = "V2_2_2"
output_files = [f"output/{version}_MELD_concepts_readv2.csv", output_files = [f"output/{version}_MELD_concepts_readv2.csv",
f"output/{version}_MELD_snomed_no_translate.csv", f"output/{version}_MELD_snomed_no_translate.csv",
f"output/{version}_MELD_icd10_no_translate.csv", f"output/{version}_MELD_icd10_no_translate.csv",
# f"output/{version}_MELD_med_no_translate.csv", # f"output/{version}_MELD_med_no_translate.csv",
f"output/{version}_MELD_atc_no_translate.csv" f"output/{version}_MELD_atc_no_translate.csv"
] ]
error_file = f"output/{version}_MELD_errors.csv" error_file = f"output/{version}_MELD_errors.csv"
for output_file in output_files: for output_file in output_files:
print("---"*3,output_file,"---"*3,) print("---"*3,output_file,"---"*3,)
df = pd.read_csv(output_file) df = pd.read_csv(output_file)
# df["MELDB_concept"].loc[df["CONCEPT TYPE"].isna()] # df["MELDB_concept"].loc[df["CONCEPT TYPE"].isna()]
print("MELDB missing concepts ", len(df[df["CONCEPT TYPE"].isna()])) print("MELDB missing concepts ", len(df[df["CONCEPT TYPE"].isna()]))
if df["code"].dtype == "object": if df["code"].dtype == "object":
print("Chars present:", np.sort(df["code"].apply(lambda x : set(x)).explode().unique())) print("Chars present:", np.sort(df["code"].apply(lambda x : set(x)).explode().unique()))
# len(df["MELDB_concept"].unique()) # len(df["MELDB_concept"].unique())
print("---"*3,error_file,"---"*3,) print("---"*3,error_file,"---"*3,)
df = pd.read_csv(error_file) df = pd.read_csv(error_file)
df = df.drop_duplicates() df = df.drop_duplicates()
df["CODE_TYPE"].value_counts() df["CODE_TYPE"].value_counts()
# for i, row in df.drop_duplicates().iterrows(): # for i, row in df.drop_duplicates().iterrows():
# print(row["CODE"], row["CODE_TYPE"]) # print(row["CODE"], row["CODE_TYPE"])
``` ```
%% Output
--------- output/V2_2_2_MELD_concepts_readv2.csv ---------
MELDB missing concepts 0
Chars present: ['.' '0' '1' '2' '3' '4' '5' '6' '7' '8' '9' 'A' 'B' 'C' 'D' 'E' 'F' 'G'
'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X' 'Y'
'Z' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' 'p' 'q'
'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z']
--------- output/V2_2_2_MELD_snomed_no_translate.csv ---------
MELDB missing concepts 0
--------- output/V2_2_2_MELD_icd10_no_translate.csv ---------
MELDB missing concepts 0
Chars present: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' 'A' 'B' 'C' 'D' 'E' 'F' 'G' 'H'
'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'T' 'W' 'X' 'Y' 'Z']
--------- output/V2_2_2_MELD_atc_no_translate.csv ---------
MELDB missing concepts 0
Chars present: ['0' '1' '2' '3' '6' 'A' 'F' 'N' 'X']
--------- output/V2_2_2_MELD_errors.csv ---------
CODE_TYPE
snomed_code 1261
read2_code 464
read3_code 80
icd10_code 1
Name: count, dtype: int64
%% Cell type:code id:08e0ecc1-9271-48c3-9c5b-094800072906 tags: %% Cell type:code id:08e0ecc1-9271-48c3-9c5b-094800072906 tags:
``` python ``` python
def get_output_files(version): def get_output_files(version):
output_files = [f"output/{version}_MELD_concepts_readv2.csv", output_files = [f"output/{version}_MELD_concepts_readv2.csv",
f"output/{version}_MELD_snomed_no_translate.csv", f"output/{version}_MELD_snomed_no_translate.csv",
f"output/{version}_MELD_icd10_no_translate.csv", f"output/{version}_MELD_icd10_no_translate.csv",
# f"output/{version}_MELD_med_no_translate.csv", # f"output/{version}_MELD_med_no_translate.csv",
f"output/{version}_MELD_atc_no_translate.csv" f"output/{version}_MELD_atc_no_translate.csv"
] ]
error_file = f"output/{version}_MELD_errors.csv" error_file = f"output/{version}_MELD_errors.csv"
return output_files, error_file return output_files, error_file
# version_1 = "V1_0_0" # version_1 = "V1_0_0"
version_1 = "V2_1_4" version_1 = "V2_1_4"
version_2 = "V2_2_3" version_2 = "V2_2_3"
output1, err1 = get_output_files(version_1) output1, err1 = get_output_files(version_1)
output2, err2 = get_output_files(version_2) output2, err2 = get_output_files(version_2)
print("## Compare Concepts", version_1, "to", version_2) print("## Compare Concepts", version_1, "to", version_2)
for out1, out2 in zip(output1, output2): for out1, out2 in zip(output1, output2):
print(out1, out2 ) print(out1, out2 )
df1 = pd.read_csv(out1) df1 = pd.read_csv(out1)
df1 = df1[["code","MELDB_concept"]].groupby("MELDB_concept").count() df1 = df1[["code","MELDB_concept"]].groupby("MELDB_concept").count()
df2 = pd.read_csv(out2) df2 = pd.read_csv(out2)
df2 = df2[["code","MELDB_concept"]].groupby("MELDB_concept").count() df2 = df2[["code","MELDB_concept"]].groupby("MELDB_concept").count()
#Added/Removed Concepts #Added/Removed Concepts
print("- Removed Concepts", list(set(df1.index) - set(df2.index))) print("- Removed Concepts", list(set(df1.index) - set(df2.index)))
print("- Added Concepts", list(set(df2.index) - set(df1.index))) print("- Added Concepts", list(set(df2.index) - set(df1.index)))
#Changed Concepts #Changed Concepts
diff = df2 - df1 #diff in counts diff = df2 - df1 #diff in counts
diff = diff[(~(diff["code"] == 0.0)) & diff["code"].notna()] #get non-zero counts diff = diff[(~(diff["code"] == 0.0)) & diff["code"].notna()] #get non-zero counts
s = "\n" s = "\n"
for concept, row in diff.iterrows(): for concept, row in diff.iterrows():
s += "\t - {} {}\n".format(concept, row["code"]) s += "\t - {} {}\n".format(concept, row["code"])
print("- Changed Concepts", s) print("- Changed Concepts", s)
# for output_file in output_files: # for output_file in output_files:
# print("---"*3,output_file,"---"*3,) # print("---"*3,output_file,"---"*3,)
# df = pd.read_csv(output_file) # df = pd.read_csv(output_file)
# # df["MELDB_concept"].loc[df["CONCEPT TYPE"].isna()] # # df["MELDB_concept"].loc[df["CONCEPT TYPE"].isna()]
# print("MELDB missing concepts ", len(df[df["CONCEPT TYPE"].isna()])) # print("MELDB missing concepts ", len(df[df["CONCEPT TYPE"].isna()]))
# if df["code"].dtype == "object": # if df["code"].dtype == "object":
# print("Chars present:", np.sort(df["code"].apply(lambda x : set(x)).explode().unique())) # print("Chars present:", np.sort(df["code"].apply(lambda x : set(x)).explode().unique()))
``` ```
%% Output %% Cell type:code id:cc60c137-5a85-4155-af6b-6796f8c05980 tags:
``` python
import glob
import os
import pandas as pd
df = pd.read_csv("/home/jjd1c23/ssd/meldb/jjd1c23/concepts/PHEN_summary_working.csv")
df = df.set_index("#")
## Compare Concepts V2_1_4 to V2_2_3 for vocab in ["atc", "icd10", "readv2", "snomed"]:
output/V2_1_4_MELD_concepts_readv2.csv output/V2_2_3_MELD_concepts_readv2.csv df[vocab.upper()] = ""
- Removed Concepts ['THYROID_DISEASE', 'SCHIZOPHRENIA_BIPOLAR_DISORDER', 'PSIORIASIS_ECZEMA', 'HAEMATOLOGICAL_CANCERS', 'INFLAMM_ARTHROPATHIES', 'ALL_CANCER', 'STROKE_TIA', 'DIABETES', 'PMR_AND_GCD', 'LONG_TERM_MS_PROBLEMS', 'ALL_CKD', 'INFLAMM_ARTHROPATHIES_CONNECTIVE_TISSUE_DIS', 'RENAL_TRANSPLANT_DIALYSIS']
- Added Concepts [] for file in glob.glob(f"/home/jjd1c23/ssd/meldb/jjd1c23/concepts/{vocab}/*.csv"):
- Changed Concepts concept_set = os.path.basename(file)[:-4]
- ANXIETY -7.0 row_index = df[df["CONCEPT NAME "] == concept_set].index[0]
- ARRHYTHMIA -1.0
- ASTHMA -1.0 df.loc[row_index, vocab.upper()] = "YES"
- AUTISM_AND_ADHD -4.0
- BIPOLAR_DISORDER -1.0 df = df.drop(columns=["READv2_CODE", "ICD10_CODE"])
- BLINDNESS_AND_LOW_VISION -3.0 df.to_csv("/home/jjd1c23/ssd/meldb/jjd1c23/concepts/PHEN_summary_working_labelled.csv")
- COELIAC_DISEASE -1.0 ```
- CORONARY_HEART_DISEASE -8.0
- DEAFNESS -33.0
- DEMENTIA_ALZHEIMER -2.0
- DEPRESSION -5.0
- DIABETES_T1 -1.0
- DIABETES_T2 -1.0
- DIALYSIS -14.0
- DIVERTICULAR_DISEASE -11.0
- DRUG_ALCOHOL_MISUSE -3.0
- EATING_DISORDERS -2.0
- EPILEPSY -1.0
- FATIGUE -27.0
- HEADACHE -48.0
- HF -3.0
- INCONTINENCE -21.0
- LEARNING_DISABILITY -3.0
- MSK_PAIN -36.0
- MULTIPLE_SCLEROSIS -1.0
- PALLIATIVE_CARE -8.0
- PLASMACELL -1.0
- PTSD -1.0
- SCHIZOPHRENIA -1.0
- SELF_HARM -37.0
- SLEEP_PROBLEMS -74.0
- STRESS -31.0
- SYSTEMIC_LUPUS_ERYTHEMATOSUS -2.0
output/V2_1_4_MELD_snomed_no_translate.csv output/V2_2_3_MELD_snomed_no_translate.csv
- Removed Concepts ['THYROID_DISEASE', 'SCHIZOPHRENIA_BIPOLAR_DISORDER', 'PSIORIASIS_ECZEMA', 'HAEMATOLOGICAL_CANCERS', 'INFLAMM_ARTHROPATHIES', 'ALL_CANCER', 'STROKE_TIA', 'DIABETES', 'PMR_AND_GCD', 'LONG_TERM_MS_PROBLEMS', 'ALL_CKD', 'INFLAMM_ARTHROPATHIES_CONNECTIVE_TISSUE_DIS', 'RENAL_TRANSPLANT_DIALYSIS']
- Added Concepts []
- Changed Concepts
- ANAEMIA -2.0
- ANEURYSM -3.0
- ANXIETY -7.0
- ARRHYTHMIA -25.0
- ASTHMA -34.0
- ATOPIC_ECZEMA -6.0
- AUTISM_AND_ADHD -2.0
- BIPOLAR_DISORDER -3.0
- BLINDNESS_AND_LOW_VISION -4.0
- BREAST_CANCER -2.0
- BRONCHIECSTASIS -1.0
- CHRONIC_BACK_PAIN -1.0
- CHRONIC_FATIGUE_SYNDROME -3.0
- CHRONIC_LIVER_DISEASE -14.0
- CHRONIC_PAIN -2.0
- CKD_STAGE3_5 -3.0
- COELIAC_DISEASE -6.0
- COLON_CANCER -6.0
- CONGENITAL_DIS_CHROMOSOMAL_ABNORMALITIES -1.0
- COPD -31.0
- CORONARY_HEART_DISEASE -21.0
- CYSTIC_FIBROSIS -24.0
- DEAFNESS -15.0
- DEMENTIA_ALZHEIMER -111.0
- DEPRESSION -34.0
- DIABETES_T2 -2.0
- DIABETIC_RETINOPATHY -13.0
- DIALYSIS -1.0
- DIVERTICULAR_DISEASE -4.0
- DRUG_ALCOHOL_MISUSE -310.0
- EATING_DISORDERS -4.0
- ENDOMETRIOSIS -1.0
- EPILEPSY -11.0
- GLAUCOMA -3.0
- GOUT -4.0
- HEART_VALVE_DISORDERS -6.0
- HF -4.0
- HIVAIDS -18.0
- HYPERTENSION -11.0
- HYPERTHYROIDISM -1.0
- HYPOTHYROIDISM -8.0
- IBD -2.0
- ILD -2.0
- LEARNING_DISABILITY -40.0
- LEUKAEMIA -1.0
- LYMPHOMA -2.0
- MENIERES_DISEASE -1.0
- METASTATIC_CANCER -3.0
- MOBILITY_PROBLEMS -45.0
- MULTIPLE_SCLEROSIS -13.0
- OBESITY -63.0
- OSTEOARTHRITIS -3.0
- OSTEOPOROSIS -4.0
- PARALYSIS -3.0
- PARKINSONS -2.0
- PLASMACELL -1.0
- PROSTATE_CANCER -2.0
- PROSTATE_DISORDERS -2.0
- PSORIASIS -3.0
- PTSD -38.0
- RENAL_TRANSPLANT -1.0
- RHEUMATOID_ARTHRITIS -8.0
- SCHIZOPHRENIA -85.0
- SKIN_CANCER -4.0
- STROKE -4.0
- SYSTEMIC_LUPUS_ERYTHEMATOSUS -1.0
- TIA -1.0
- VIRAL_HEPATITIS -9.0
- VTD -5.0
output/V2_1_4_MELD_icd10_no_translate.csv output/V2_2_3_MELD_icd10_no_translate.csv
- Removed Concepts ['THYROID_DISEASE', 'SCHIZOPHRENIA_BIPOLAR_DISORDER', 'PSIORIASIS_ECZEMA', 'HAEMATOLOGICAL_CANCERS', 'INFLAMM_ARTHROPATHIES', 'ALL_CANCER', 'STROKE_TIA', 'DIABETES', 'PMR_AND_GCD', 'LONG_TERM_MS_PROBLEMS', 'ALL_CKD', 'INFLAMM_ARTHROPATHIES_CONNECTIVE_TISSUE_DIS']
- Added Concepts []
- Changed Concepts
- CVD_EVENTS -1.0
output/V2_1_4_MELD_atc_no_translate.csv output/V2_2_3_MELD_atc_no_translate.csv
- Removed Concepts []
- Added Concepts []
- Changed Concepts
%% Cell type:markdown id:e5c4291f-847b-4c82-976e-bd5b3a7b6bcc tags: %% Cell type:markdown id:e5c4291f-847b-4c82-976e-bd5b3a7b6bcc tags:
### Mappings ### Mappings
%% Cell type:code id:08e34750-413c-469e-bcb8-e71bb188ff42 tags: %% Cell type:code id:08e34750-413c-469e-bcb8-e71bb188ff42 tags:
``` python ``` python
#NHS Read Browser #NHS Read Browser
import simpledbf import simpledbf
import pandas as pd import pandas as pd
#r2 only #r2 only
df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V2/ANCESTOR.DBF').to_dataframe() df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V2/ANCESTOR.DBF').to_dataframe()
df = pd.concat([df['READCODE'], df['DESCENDANT']]) df = pd.concat([df['READCODE'], df['DESCENDANT']])
df = pd.DataFrame(df.drop_duplicates()) df = pd.DataFrame(df.drop_duplicates())
df = df.rename(columns={0:"read2_code"}) df = df.rename(columns={0:"read2_code"})
df.to_parquet("maps/processed/read2_code.parquet", index=False) df.to_parquet("maps/processed/read2_code.parquet", index=False)
#r2 -> atc #r2 -> atc
df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V2/ATC.DBF').to_dataframe() df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V2/ATC.DBF').to_dataframe()
df = df[["READCODE", "ATC"]] df = df[["READCODE", "ATC"]]
df = df.rename(columns={"READCODE":"read2_code", "ATC":"atc_code"}) df = df.rename(columns={"READCODE":"read2_code", "ATC":"atc_code"})
df.to_parquet("maps/processed/read2_code_to_atc_code.parquet", index=False) df.to_parquet("maps/processed/read2_code_to_atc_code.parquet", index=False)
#r2 -> icd10 #r2 -> icd10
df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V2/ICD10.DBF').to_dataframe() df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V2/ICD10.DBF').to_dataframe()
df = df[["READ_CODE", "TARG_CODE"]] df = df[["READ_CODE", "TARG_CODE"]]
df = df.rename(columns={"READ_CODE":"read2_code", "TARG_CODE":"icd10_code"}) df = df.rename(columns={"READ_CODE":"read2_code", "TARG_CODE":"icd10_code"})
df = df[~df["icd10_code"].str.match("^.*-.*$")] #remove codes with '-' df = df[~df["icd10_code"].str.match("^.*-.*$")] #remove codes with '-'
df = df[~df["read2_code"].str.match("^.*-.*$")] #remove codes with '-' df = df[~df["read2_code"].str.match("^.*-.*$")] #remove codes with '-'
df.to_parquet("maps/processed/read2_code_to_icd10_code.parquet", index=False) df.to_parquet("maps/processed/read2_code_to_icd10_code.parquet", index=False)
#r2 -> opcs4 #r2 -> opcs4
df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V2/OPCS4V3.DBF').to_dataframe() df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V2/OPCS4V3.DBF').to_dataframe()
df = df[["READ_CODE", "TARG_CODE"]] df = df[["READ_CODE", "TARG_CODE"]]
df = df.rename(columns={"READ_CODE":"read2_code", "TARG_CODE":"opcs4_code"}) df = df.rename(columns={"READ_CODE":"read2_code", "TARG_CODE":"opcs4_code"})
df = df[~df["opcs4_code"].str.match("^.*-.*$")] #remove codes with '-' df = df[~df["opcs4_code"].str.match("^.*-.*$")] #remove codes with '-'
df = df[~df["read2_code"].str.match("^.*-.*$")] #remove codes with '-' df = df[~df["read2_code"].str.match("^.*-.*$")] #remove codes with '-'
df.to_parquet("maps/processed/read2_code_to_opcs4_code.parquet", index=False) df.to_parquet("maps/processed/read2_code_to_opcs4_code.parquet", index=False)
#r3 only #r3 only
df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V3/ANCESTOR.DBF').to_dataframe() df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V3/ANCESTOR.DBF').to_dataframe()
df = pd.concat([df['READCODE'], df['DESCENDANT']]) df = pd.concat([df['READCODE'], df['DESCENDANT']])
df = pd.DataFrame(df.drop_duplicates()) df = pd.DataFrame(df.drop_duplicates())
df = df.rename(columns={0:"read3_code"}) df = df.rename(columns={0:"read3_code"})
df.to_parquet("maps/processed/read3_code.parquet", index=False) df.to_parquet("maps/processed/read3_code.parquet", index=False)
#r3 -> icd10 #r3 -> icd10
df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V3/ICD10.DBF').to_dataframe() df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V3/ICD10.DBF').to_dataframe()
df = df[["READ_CODE", "TARG_CODE"]] df = df[["READ_CODE", "TARG_CODE"]]
df = df.rename(columns={"READ_CODE":"read3_code", "TARG_CODE":"icd10_code"}) df = df.rename(columns={"READ_CODE":"read3_code", "TARG_CODE":"icd10_code"})
df = df[~df["icd10_code"].str.match("^.*-.*$")] #remove codes with '-' df = df[~df["icd10_code"].str.match("^.*-.*$")] #remove codes with '-'
df = df[~df["read3_code"].str.match("^.*-.*$")] #remove codes with '-' df = df[~df["read3_code"].str.match("^.*-.*$")] #remove codes with '-'
df.to_parquet("maps/processed/read3_code_to_icd10_code.parquet", index=False) df.to_parquet("maps/processed/read3_code_to_icd10_code.parquet", index=False)
#r3 -> icd9 #r3 -> icd9
# dbf = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V3/ICD9V3.DBF') # dbf = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V3/ICD9V3.DBF')
#r3 -> opcs4 #r3 -> opcs4
df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V3/OPCS4V3.DBF').to_dataframe() df = simpledbf.Dbf5('maps/nhs_readbrowser_25.0.0_20180401000001/Standard/V3/OPCS4V3.DBF').to_dataframe()
df = df[["READ_CODE", "TARG_CODE"]] df = df[["READ_CODE", "TARG_CODE"]]
df = df.rename(columns={"READ_CODE":"read3_code", "TARG_CODE":"opcs4_code"}) df = df.rename(columns={"READ_CODE":"read3_code", "TARG_CODE":"opcs4_code"})
df = df[~df["opcs4_code"].str.match("^.*-.*$")] #remove codes with '-' df = df[~df["opcs4_code"].str.match("^.*-.*$")] #remove codes with '-'
df = df[~df["read3_code"].str.match("^.*-.*$")] #remove codes with '-' df = df[~df["read3_code"].str.match("^.*-.*$")] #remove codes with '-'
df.to_parquet("maps/processed/read3_code_to_opcs4_code.parquet", index=False) df.to_parquet("maps/processed/read3_code_to_opcs4_code.parquet", index=False)
``` ```
%% Cell type:code id:5fe95638-1f25-45f3-803c-2fff74a2a4fd tags: %% Cell type:code id:5fe95638-1f25-45f3-803c-2fff74a2a4fd tags:
``` python ``` python
#NHS Data Migrations #NHS Data Migrations
#r2 only #r2 only
# df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/rctcremap_uk_20200401000001.txt', sep='\t') # df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/rctcremap_uk_20200401000001.txt', sep='\t')
#r3 only #r3 only
# df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/ctv3cremap_uk_20200401000001.txt', sep='\t') # df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/ctv3cremap_uk_20200401000001.txt', sep='\t')
#snomed only #snomed only
df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/sctcremap_uk_20200401000001.txt', sep='\t') df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/sctcremap_uk_20200401000001.txt', sep='\t')
df = df[["SCT_CONCEPTID"]] df = df[["SCT_CONCEPTID"]]
df = df.rename(columns={"SCT_CONCEPTID":"snomed_code"}) df = df.rename(columns={"SCT_CONCEPTID":"snomed_code"})
df = df.drop_duplicates() df = df.drop_duplicates()
df = df.astype(str) df = df.astype(str)
df.to_parquet("maps/processed/snomed_code.parquet", index=False) df.to_parquet("maps/processed/snomed_code.parquet", index=False)
#r2 -> r3 #r2 -> r3
df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/rctctv3map_uk_20200401000001.txt', sep='\t') df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/rctctv3map_uk_20200401000001.txt', sep='\t')
df = df[["V2_CONCEPTID", "CTV3_CONCEPTID"]] df = df[["V2_CONCEPTID", "CTV3_CONCEPTID"]]
df = df.rename(columns={"V2_CONCEPTID":"read2_code", df = df.rename(columns={"V2_CONCEPTID":"read2_code",
"CTV3_CONCEPTID":"read3_code"}) "CTV3_CONCEPTID":"read3_code"})
df.to_parquet("maps/processed/read2_code_to_read3_code.parquet", index=False) df.to_parquet("maps/processed/read2_code_to_read3_code.parquet", index=False)
#r3->r2 #r3->r2
df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/ctv3rctmap_uk_20200401000002.txt', sep='\t') df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/ctv3rctmap_uk_20200401000002.txt', sep='\t')
df = df[["CTV3_CONCEPTID", "V2_CONCEPTID"]] df = df[["CTV3_CONCEPTID", "V2_CONCEPTID"]]
df = df.rename(columns={"CTV3_CONCEPTID":"read3_code", df = df.rename(columns={"CTV3_CONCEPTID":"read3_code",
"V2_CONCEPTID":"read2_code"}) "V2_CONCEPTID":"read2_code"})
df = df.drop_duplicates() df = df.drop_duplicates()
df = df[~df["read2_code"].str.match("^.*_.*$")] #remove r2 codes with '_' df = df[~df["read2_code"].str.match("^.*_.*$")] #remove r2 codes with '_'
df.to_parquet("maps/processed/read3_code_to_read2_code.parquet", index=False) df.to_parquet("maps/processed/read3_code_to_read2_code.parquet", index=False)
#r2 -> snomed #r2 -> snomed
df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/rcsctmap2_uk_20200401000001.txt', sep='\t', dtype=str) df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/rcsctmap2_uk_20200401000001.txt', sep='\t', dtype=str)
df = df[["ReadCode", "ConceptId"]] df = df[["ReadCode", "ConceptId"]]
df = df.rename(columns={"ReadCode":"read2_code", df = df.rename(columns={"ReadCode":"read2_code",
"ConceptId":"snomed_code"}) "ConceptId":"snomed_code"})
df.to_parquet("maps/processed/read2_code_to_snomed_code.parquet", index=False) df.to_parquet("maps/processed/read2_code_to_snomed_code.parquet", index=False)
#r3->snomed #r3->snomed
df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/ctv3sctmap2_uk_20200401000001.txt', sep='\t') df = pd.read_csv('maps/nhs_datamigration_29.0.0_20200401000001/Mapping Tables/Updated/Clinically Assured/ctv3sctmap2_uk_20200401000001.txt', sep='\t')
df = df[["CTV3_TERMID", "SCT_CONCEPTID"]] df = df[["CTV3_TERMID", "SCT_CONCEPTID"]]
df = df.rename(columns={"CTV3_TERMID":"read3_code", df = df.rename(columns={"CTV3_TERMID":"read3_code",
"SCT_CONCEPTID":"snomed_code"}) "SCT_CONCEPTID":"snomed_code"})
df["snomed_code"] = df["snomed_code"].astype(str) df["snomed_code"] = df["snomed_code"].astype(str)
df = df[~df["snomed_code"].str.match("^.*_.*$")] #remove snomed codes with '_' df = df[~df["snomed_code"].str.match("^.*_.*$")] #remove snomed codes with '_'
df.to_parquet("maps/processed/read3_code_to_snomed_code.parquet", index=False) df.to_parquet("maps/processed/read3_code_to_snomed_code.parquet", index=False)
``` ```
%% Cell type:code id:267fa1cc-5159-48c4-9eee-19af5039d627 tags: %% Cell type:code id:267fa1cc-5159-48c4-9eee-19af5039d627 tags:
``` python ``` python
#OPCS410 Data Files #OPCS410 Data Files
df = pd.read_csv("maps/OPCS410 Data files txt/OPCS410 CodesAndTitles Nov 2022 V1.0.txt", sep='\t', dtype=str, header=None) df = pd.read_csv("maps/OPCS410 Data files txt/OPCS410 CodesAndTitles Nov 2022 V1.0.txt", sep='\t', dtype=str, header=None)
df = df.rename(columns={0:"opcs4_code", 1:"description"}) df = df.rename(columns={0:"opcs4_code", 1:"description"})
df.to_parquet("maps/processed/opcs4_code.parquet", index=False) df.to_parquet("maps/processed/opcs4_code.parquet", index=False)
``` ```
%% Cell type:code id:01d046fd-69af-44f3-acad-5d0edef3f745 tags: %% Cell type:code id:01d046fd-69af-44f3-acad-5d0edef3f745 tags:
``` python ``` python
#ICD10_edition5 #ICD10_edition5
df = pd.read_xml("maps/ICD10_Edition5_XML_20160401/Content/ICD10_Edition5_CodesAndTitlesAndMetadata_GB_20160401.xml",) df = pd.read_xml("maps/ICD10_Edition5_XML_20160401/Content/ICD10_Edition5_CodesAndTitlesAndMetadata_GB_20160401.xml",)
df = df[["CODE", "ALT_CODE", "DESCRIPTION"]] df = df[["CODE", "ALT_CODE", "DESCRIPTION"]]
df = df.rename(columns={"CODE":"icd10_code", df = df.rename(columns={"CODE":"icd10_code",
"ALT_CODE":"icd10_alt_code", "ALT_CODE":"icd10_alt_code",
"DESCRIPTION":"description" "DESCRIPTION":"description"
}) })
df.to_parquet("maps/processed/icd10_code.parquet", index=False) df.to_parquet("maps/processed/icd10_code.parquet", index=False)
``` ```
%% Cell type:code id:36630e24-f56c-48e1-8ecf-4ccd2b41eaea tags: %% Cell type:code id:36630e24-f56c-48e1-8ecf-4ccd2b41eaea tags:
``` python ``` python
code1="read2_code" code1="read2_code"
code2="icd10_code" code2="icd10_code"
df_map = pd.read_parquet(f"maps/processed/{code1}_to_{code2}.parquet") df_map = pd.read_parquet(f"maps/processed/{code1}_to_{code2}.parquet")
codes=df_map["read2_code"].iloc[:5] codes=df_map["read2_code"].iloc[:5]
pd.merge(codes, df_map, how='left')[code2] pd.merge(codes, df_map, how='left')[code2]
``` ```
%% Cell type:code id:9787adeb-8507-488b-9a91-b8df3fbbe21e tags: %% Cell type:code id:9787adeb-8507-488b-9a91-b8df3fbbe21e tags:
``` python ``` python
#CPRD Code Browser #CPRD Code Browser
df = pd.read_csv('maps/CPRD_CodeBrowser_202211_Aurum/CPRDAurumMedical.txt', sep='\t') df = pd.read_csv('maps/CPRD_CodeBrowser_202211_Aurum/CPRDAurumMedical.txt', sep='\t')
df = df[["MedCodeId", "CleansedReadCode", "SnomedCTConceptId"]] df = df[["MedCodeId", "CleansedReadCode", "SnomedCTConceptId"]]
df = df.rename(columns={"MedCodeId":"med_code", df = df.rename(columns={"MedCodeId":"med_code",
"CleansedReadCode":"read2_code", "CleansedReadCode":"read2_code",
"SnomedCTConceptId":"snomed_code"}) "SnomedCTConceptId":"snomed_code"})
# df = pd.read_csv('maps/CPRD_CodeBrowser_202211_Aurum/CPRDAurumProduct.txt', sep='\t', dtype=str) # df = pd.read_csv('maps/CPRD_CodeBrowser_202211_Aurum/CPRDAurumProduct.txt', sep='\t', dtype=str)
# df = pd.read_csv('maps/CPRD_CodeBrowser_202211_GOLD/medical.txt', sep='\t') # df = pd.read_csv('maps/CPRD_CodeBrowser_202211_GOLD/medical.txt', sep='\t')
# df = df.reset_index().iloc[:,[1,6]] # df = df.reset_index().iloc[:,[1,6]]
# df = df.rename(columns={"level_1":"read2_code", "20220523":"description"}) # df = df.rename(columns={"level_1":"read2_code", "20220523":"description"})
# df = pd.read_csv('maps/CPRD_CodeBrowser_202211_GOLD/product.txt', sep='\t', dtype=str) #CANNOT OPEN # df = pd.read_csv('maps/CPRD_CodeBrowser_202211_GOLD/product.txt', sep='\t', dtype=str) #CANNOT OPEN
df df
``` ```
%% Cell type:code id:a968ffb1-4337-456b-8d20-419888b4044f tags: %% Cell type:code id:a968ffb1-4337-456b-8d20-419888b4044f tags:
``` python ``` python
#BNF #BNF
df = pd.read_excel("maps/BNF Snomed Mapping data 20231215.xlsx") df = pd.read_excel("maps/BNF Snomed Mapping data 20231215.xlsx")
df = df.astype(str) df = df.astype(str)
df = df.rename(columns={"BNF Code":"bnf_code", df = df.rename(columns={"BNF Code":"bnf_code",
"SNOMED Code":"snomed_code"}) "SNOMED Code":"snomed_code"})
df[["bnf_code", "snomed_code"]].to_parquet("maps/processed/bnf_code_to_snomed_code.parquet", index=False) df[["bnf_code", "snomed_code"]].to_parquet("maps/processed/bnf_code_to_snomed_code.parquet", index=False)
``` ```
%% Cell type:code id:c70b1ce2-0f41-4d02-ad17-6fc44bc3c6bf tags: %% Cell type:code id:c70b1ce2-0f41-4d02-ad17-6fc44bc3c6bf tags:
``` python ``` python
#BNF to Readv2 Merge #BNF to Readv2 Merge
df1 = pd.read_parquet("maps/processed/bnf_code_to_snomed_code.parquet").astype(str) df1 = pd.read_parquet("maps/processed/bnf_code_to_snomed_code.parquet").astype(str)
df2 = pd.read_parquet("maps/processed/read2_code_to_snomed_code.parquet").astype(str) df2 = pd.read_parquet("maps/processed/read2_code_to_snomed_code.parquet").astype(str)
# df1.merge(df2, how="inner", on="snomed_code") # df1.merge(df2, how="inner", on="snomed_code")
``` ```
%% Cell type:code id:d5d34237-02d4-4dea-8c20-5adaf337f6b5 tags: %% Cell type:code id:d5d34237-02d4-4dea-8c20-5adaf337f6b5 tags:
``` python ``` python
df1.merge(df2, how='inner', on='snomed_code') df1.merge(df2, how='inner', on='snomed_code')
``` ```
%% Output %% Cell type:code id:b3166cf0-e4a5-43e0-aeac-78827427422e tags:
Empty DataFrame ``` python
Columns: [bnf_code, snomed_code, read2_code] .astype(str).dtypes
Index: [] ```
%% Cell type:code id:d0cbadfe-ef55-40a8-a0f1-a9fc69d7456b tags: %% Cell type:code id:c0a766f9-7959-4a10-b58f-cd946a878b60 tags:
``` python ``` python
df = pd.read_csv("../concepts/PHEN_summary_working.csv")
cols = list(df.columns)
cols.remove('CONCEPT NAME ')
cols.remove('AGREED')
df = df.applymap(lambda x: str(x) if isinstance(x, (int, float)) else x) #change to int
df_copy = df.rename(columns={
"CONCEPT NAME ":"concept_set_name",
"AGREED":"concept_set_status"
})
df_copy["concept_set_status"] = df_copy["concept_set_status"].replace("USE", "AGREED")
df_copy = df_copy[["concept_set_name", "concept_set_status"]]
outs = df_copy.to_dict(orient='records')
for i, out in enumerate(outs):
out["metadata"] = dict(df[cols].iloc[i])
json.dumps(outs)
``` ```
%% Output %% Cell type:code id:8a204a95-dc4c-4183-9ea7-f5c5e95e9087 tags:
Empty DataFrame ``` python
Columns: [bnf_code, snomed_code, read2_code] ```
Index: []
%% Cell type:code id:b3166cf0-e4a5-43e0-aeac-78827427422e tags: %% Cell type:code id:5ce1ab58-50b4-4c22-b72b-c698de6830f7 tags:
``` python ``` python
.astype(str).dtypes import json
``` ```
%% Output %% Cell type:code id:f1ea81c6-d1db-408f-9d3a-b96f44efe21f tags:
``` python
```
%% Cell type:markdown id:5eb544a3-9dd1-41e8-88c2-a808646c6112 tags:
### OMOP Database
%% Cell type:code id:c9e58e62-9e44-4d0c-9d8d-35c175c07e6c tags:
``` python
import sqlite3
import csv
import pandas as pd
import os
```
%% Cell type:code id:4f67c9a1-373f-4799-8a85-72767662d912 tags:
``` python
```
%% Cell type:code id:d0ecdf69-ee90-42c1-ad25-d8357b603d1b tags:
``` python
#IMPORT OMOP VOCABS
conn = sqlite3.connect("codes/omop_54.sqlite") # change to 'sqlite:///your_filename.db'
folder_path = "codes/vocabulary_download_v5_{9424944c-2b76-4127-8f05-f535e0f15e2a}_1731661390540"
# Check if the folder exists
if not os.path.isdir(folder_path):
raise Exception(f"Error: The folder '{folder_path}' does not exist.")
# Iterate through files in the folder
for filename in os.listdir(folder_path):
if filename.endswith(".csv"): # Check if the file is a CSV
file_path = os.path.join(folder_path, filename)
try:
print(f"Reading file: {file_path}")
# Read the CSV file with the specified delimiter
df = pd.read_csv(file_path, delimiter="\t", low_memory=False)
table_name = os.path.splitext(os.path.basename(file_path))[0] #Get name of file
#Export Table to sqlite db
df.to_sql(table_name, conn, if_exists='replace', index=False)
except Exception as e:
raise Exception(f"Error reading file {file_path}: {e}")
conn.commit()
conn.close()
```
%% Cell type:code id:b9cafd0c-a3bd-408b-bca8-b0de2acde1cd tags:
``` python
# Create a SQL connection to our SQLite database
conn = sqlite3.connect("codes/omop_54.sqlite")
cur = conn.cursor()
#Print ALL Columns in Table
# table="CONCEPT_SET"
# cur.execute(f"PRAGMA table_info({table});")
# print(pd.DataFrame(cur.fetchall()))
#Print ALL TABLE NAMES
# cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=? ;", ("VOCABULARY",))
# print(cur.fetchone())
cur.execute("SELECT vocabulary_id FROM VOCABULARY WHERE vocabulary_id=? ;", ("MELDB",))
print(cur.fetchone())
#Print WHOLE TABLE
# cur.execute('SELECT * FROM CONCEPT;')
# cur.execute('SELECT * FROM CONCEPT WHERE standard_concept = "C";')
# cur.execute('SELECT * FROM CONCEPT WHERE concept_code = "119768002" LIMIT 1;')
# cur.execute('SELECT * FROM CONCEPT WHERE concept_code IN ("119768002", "5905001");')
# cur.execute('SELECT DISTINCT VOCABULARY_ID FROM CONCEPT;')
# df = pd.DataFrame(cur.fetchall())
# print(list(df[0]))
# display(df)
# for row in :
# print(row)
BNF Code object #Get Header of Table
SNOMED Code object # table="CONCEPT_CLASS"
dtype: object # cur.execute(f"SELECT * FROM {table} LIMIT 3;")
# print(cur.fetchall())
#create meldb VOCABULARY
# meldb_version='v3.2.10'
# meldb_description = 'Multidisciplinary Ecosystem to study Lifecourse Determinants and Prevention of Early-onset Burdensome Multimorbidity'
# meldb_reference = 'https://www.it-innovation.soton.ac.uk/projects/meldb'
# df_test = pd.DataFrame([{
# "vocabulary_id": 'MELDB',
# "vocabulary_name": meldb_description,
# "vocabulary_reference": meldb_reference,
# "vocabulary_version": meldb_version,
# # "vocabulary_concept_id": 0,
# }])
# df_test.to_sql("VOCABULARY", conn, if_exists='append', index=False)
# cur.execute("""
# CREATE TABLE CONCEPT_SET (
# concept_set_id INTEGER PRIMARY KEY AUTOINCREMENT, -- Unique identifier for each concept set
# atlas_id INTEGER, -- Unique identifier generated by ATLAS
# concept_set_name TEXT, -- Optional name for the concept set
# concept_set_description TEXT, -- Optional description for the concept set
# vocabulary_id TEXT NOT NULL, -- Foreign key to VOCABULARY table
# FOREIGN KEY (vocabulary_id) REFERENCES VOCABULARY(vocabulary_id)
# );""")
# cur.execute("DROP TABLE CONCEPT_SET;")
# cur.execute("""
# CREATE TABLE CONCEPT_SET_ITEM (
# concept_set_item_id INTEGER PRIMARY KEY AUTOINCREMENT, -- Unique identifier for each mapping
# concept_set_id INTEGER NOT NULL, -- Foreign key to CONCEPT_SET table
# concept_id INTEGER NOT NULL, -- Foreign key to CONCEPT table
# FOREIGN KEY (concept_set_id) REFERENCES CONCEPT_SET(concept_set_id),
# FOREIGN KEY (concept_id) REFERENCES CONCEPT(concept_id)
# );""")
# cur.execute("DROP TABLE CONCEPT_SET_ITEM;")
# Be sure to close the connection
conn.close()
```
%% Cell type:code id:d03b75f3-902f-42d7-b52f-dac7e79ecb11 tags:
``` python
conn = sqlite3.connect("codes/omop_54.sqlite") # change to 'sqlite:///your_filename.db'
cur = conn.cursor()
file_path = "/home/jjd1c23/ssd/meldb/jjd1c23/concepts/snomed/HEART_VALVE_DISORDERS.csv"
df = pd.read_csv(file_path, low_memory=False)
df = df.set_index("code")
df.to_sql(name='test', con=conn, if_exists='replace')
conn.commit()
conn.close()
```
%% Cell type:code id:d96c3511-3831-400e-ba40-0a36abcc60d3 tags:
``` python
#DISPLAY SQL TABLE
table="CONCEPT_SET_ITEM"
# Create a SQL connection to our SQLite database
conn = sqlite3.connect("codes/omop_54.sqlite")
cur = conn.cursor()
#Print ALL Columns in Table
cur.execute(f"PRAGMA table_info({table});")
df_cols = pd.DataFrame(cur.fetchall())
print(df_cols)
df_cols = df_cols[1]
#Print TABLE
cur.execute(f"SELECT * FROM {table};")
df = pd.DataFrame(cur.fetchall())
df = df.rename(columns={i:s for i, s in enumerate(df_cols)})
display(df)
conn.close()
# a+s = 13364
# a+s+i = 13591
```
%% Cell type:code id:42d49a00-9646-4ba4-afb6-12297289b7a7 tags:
``` python
def sql_row_exist(conn, table, column, value):
# Execute and check if a result exists
cur = conn.cursor()
query = f"SELECT 1 FROM {table} WHERE {column} = ? LIMIT 1;"
cur.execute(query, (value,))
exists = cur.fetchone() is not None
return exists
```
%% Cell type:code id:f7b51bcd-6ee1-4023-8d36-7f419ce4120d tags:
``` python
#EXPORT MELDB CSV OUTPUT
conn = sqlite3.connect("codes/omop_54.sqlite") # change to 'sqlite:///your_filename.db'
cur = conn.cursor()
vocab_output = "MELDB"
vocab_type = "SNOMED"
file_path = "/home/jjd1c23/ssd/meldb/jjd1c23/phenotype/output/V3_2_10_MELD_snomed_no_translate.csv"
# file_path = "/home/jjd1c23/ssd/meldb/jjd1c23/concepts/snomed/HEART_VALVE_DISORDERS.csv"
# Read the CSV file with the specified delimiter
out = pd.read_csv(file_path, low_memory=False)
print(df.columns)
for concept_set_name, grp in out.groupby("MELDB_concept"):
# display(concept_set_name, grp[["code", "MELDB_concept"]])
#Create Concept_Set
if not sql_row_exist(conn, "CONCEPT_SET", "concept_set_name", concept_set_name):
cur.execute(f"INSERT INTO CONCEPT_SET (concept_set_name, vocabulary_id) VALUES ('{concept_set_name}', 'MELDB');")
else:
print("concept_set", concept_set_name, "already exists")
#TODO: ask to remove old concept_set?
#Get Concept_set_Id
query = "SELECT concept_set_id FROM CONCEPT_SET WHERE concept_set_name = ? AND vocabulary_id = ?;"
cur.execute(query, (concept_set_name, vocab_output, ))
concept_set_id = cur.fetchone()[0]
#Get corresponing Concept_id (OMOP) for each Concept_code (e.g. SNOMED)
concept_codes = "'"+"', '".join(list(grp["code"].astype(str)))+"'"
query = f"SELECT concept_id FROM CONCEPT WHERE vocabulary_id = ? AND concept_code IN ({concept_codes});"
print(query)
cur.execute(query, (vocab_type, ))
df_out = pd.DataFrame(cur.fetchall(), columns=["concept_id"])
if not len(grp) == len(df_out):
print("ERROR: Some", vocab_type, "Codes do not exist in OMOP Database")
#Create Concept_set_item
df_out["concept_set_id"] = concept_set_id
df_out.to_sql("CONCEPT_SET_ITEM", conn, if_exists='append', index=False)
display(df_out)
# break
# #Create New CONCEPT_SET
# table_name = os.path.splitext(os.path.basename(file_path))[0] #Get name of file
# cur.execute(f"INSERT INTO CONCEPT_SET (concept_class_name) VALUES ('{table_name}');")
conn.commit()
conn.close()
```
%% Cell type:code id:85007741-e34c-4112-a63c-9fb302b76958 tags:
``` python
"'"+"', '".join(list(grp["code"].astype(str)))+"'"
```
%% Cell type:markdown id:423e7c21-f3bd-439d-9dcb-c17cc2cc6854 tags:
### ATLAS
%% Cell type:code id:c6b45e4d-c7d2-42e7-9b4a-0e9c1c86d34b tags:
``` python
#Create ATLAS Concept Set
def atlas_create_concept(name, description="", items=[]):
data={
"id": 0,
"name": name,
"description": description,
"expression": {
"items":items
}
}
try:
# Sending the POST request
response = requests.post(url, json=data, headers=headers)
# Check the response status
if response.status_code == 200 or response.status_code == 201:
print("POST request successful:")
print(response.json()) # Assuming the response is JSON
return response["id"]
else:
print(f"POST request failed. HTTP Status Code: {response.status_code}")
print("Response content:")
print(response.text)
return None
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
# Heart Test 1 - 1885487
# Heart Test 2 - 1885488
# Heart Valve Disorders - 1885449
```
%% Cell type:code id:45497623-1da0-4f74-b21e-da8811c89b04 tags:
``` python
def get_omop_concepts(cur, codes, vocab_id):
#Create List for SQL
mask = ""
for c in codes:
mask+=f'"{c}", '
mask = mask[:-2] #remove last comma
#Execute SQL
cur.execute(f'SELECT * FROM CONCEPT WHERE concept_code IN ({mask}) AND VOCABULARY_ID = "{vocab_id}";')
df = pd.DataFrame(cur.fetchall()) #convert to pandas df
print("Identified", len(df[0]) ,"OMOP Concepts:", list(df[0]))
return df
def omop_concepts_to_atlas_json(df):
json = []
for i, row in df.iterrows():
#template for atlas api
out = {
"concept": {
'CONCEPT_ID': row[0],
'CONCEPT_NAME': row[1],
'STANDARD_CONCEPT': 'S',
'STANDARD_CONCEPT_CAPTION': 'Standard',
'INVALID_REASON': 'V',
'INVALID_REASON_CAPTION': 'Valid',
'CONCEPT_CODE': row[6],
'DOMAIN_ID': row[2],
'VOCABULARY_ID': row[3],
'CONCEPT_CLASS_ID': row[4],
'VALID_START_DATE': int(row[7]),
'VALID_END_DATE': int(row[8])
},
'isExcluded': False,
'includeDescendants': False,
'includeMapped': False
}
json.append(out)
return json
conn = sqlite3.connect("codes/omop_54.sqlite")
cur = conn.cursor()
vocab_id="SNOMED" #SNOMED, ATC, ICD10CM, ICD9CM, Read
csv_output = "/home/jjd1c23/ssd/meldb/jjd1c23/concepts/snomed/ANGER.csv"
#Load CSV Output File
df_in = pd.read_csv(csv_output)
print(len(df_in))
# df = get_omop_concepts(cur, ["119768002", "5905001"], "SNOMED")
df = get_omop_concepts(cur, list(df_in["code"]), vocab_id)
json = omop_concepts_to_atlas_json(df)
# display(json)
conn.close()
```
%% Cell type:code id:ea759907-c085-472a-82e2-07b6b19e2c8f tags:
``` python
#ATLAS GET CONCEPT SET
import requests
def request_get(url):
try:
# Sending the GET request
response = requests.get(url)
# Check if the response status code is 200 (OK)
if response.status_code == 200:
print("Response data:")
# print(response.json()) # Assuming the response is in JSON format
return response.json()
else:
print(f"Failed to fetch data. HTTP Status Code: {response.status_code}")
print("Response content:")
print(response.text)
return None
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
#GET SET INFO
set_id = "1885449"
url = f"https://atlas-demo.ohdsi.org/WebAPI/conceptset/{set_id}"
request_get(url)
```
%% Cell type:code id:5a70e636-6051-4930-bf1b-30d093fd0552 tags:
``` python
#GET SET ITEMS (Concepts)
set_id = "1885449"
url = f"https://atlas-demo.ohdsi.org/WebAPI/conceptset/{set_id}/expression/ATLASPROD"
response = request_get(url)
display(response)
```
%% Cell type:code id:96bfcd9c-27e8-4be4-a680-7553d908790e tags:
``` python
#ATLAS CREATE CONCEPT SET
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment