Skip to content
Snippets Groups Projects
Commit ae19db0e authored by Jakub Dylag's avatar Jakub Dylag
Browse files

Convertion script: PHEN_assign_v3.json -> config.yml

parent 406117cf
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
import yaml
import json
from pathlib import Path
```
%% Cell type:code id: tags:
``` python
json_file = "PHEN_assign_v3.json"
yaml_path = "config.yml"
outs = {}
# Read the JSON file
with open(json_file, 'r', encoding='utf-8') as file:
data = json.load(file)
def add_conc(outs, name, path, columns, category=None, metadata=None):
if name == "PLASMACELL":
return outs
out = {
"name":str(name),
"file":{
"path":str(path),
"columns":columns,
},
}
if category is not None:
out["file"]["category"]=str(category)
if metadata is not None:
out["metadata"]=metadata
outs.append(out)
return outs
outs = []
for folder in data["codes"]:
folder_path = folder["folder"]
for files in folder["files"]:
#TODO: actions divide_col
#TODO: save metadata - has to be dict not list?
#Columns
col_out = {}
for k,v in files["columns"].items():
supported = ["read2"]
if type(v) == str and k[:-5] in supported:
col_out[k[:-5]] = v
#Metadata
# if "metadata" in files["columns"]:
# meta = dict(files["columns"]["metadata"])
# else:
# meta = None
#File Path
path = folder["folder"][6:]+"/"+files["file"]
if "actions" in files.keys():
pass
#split_col
# if
#divide_col
# elif "concept_set_categories" in files:
# for cat, name in files["concept_set_categories"].items():
# print(col_out)
# outs = add_conc(
# outs,
# name = name,
# category = cat,
# path=path,
# columns = {"read2":"Read Code"}, #TODO: fix bodged
# metadata = {}
# )
elif "excel_sheet" in files.keys():
pass
elif "concept_set" in files:
for name in files["concept_set"]: #If belongs to multiple
outs = add_conc(
outs,
name=str(name),
path=path,
columns = col_out,
metadata = {},
# metadata = meta
)
final = {
"phenotype":{
"version": "4.0.0",
"omop":{
"vocabulary_id": "MELDB_SAIL",
"vocabulary_name": "Multidisciplinary Ecosystem to study Lifecourse Determinants and Prevention of Early-onset Burdensome Multimorbidity",
"vocabulary_reference": "https://www.it-innovation.soton.ac.uk/projects/meldb",
},
"map":["read2", "read3", "icd10", "snomed", "opcs4", "atc"],
"concept_sets":outs,
},
}
yaml.Dumper.ignore_aliases = lambda *args : True #remove unwanted pointers
# Convert and write to YAML
with open(yaml_path, 'w', encoding='utf-8') as file:
yaml.dump(dict(final), file, default_flow_style=False, allow_unicode=True)
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment