Skip to content
Snippets Groups Projects
Commit 85582f14 authored by mjbonifa's avatar mjbonifa
Browse files

Merge branch '5-write-version-file-including-acmc-trud-and-omop-in-phenotype-repo' into 'dev'

refactor:trud and omop versions are now written in yaml format. When map is...

Closes #5

See merge request meldb/concepts-processing!4
parents 44f11f70 08006e0c
No related branches found
No related tags found
No related merge requests found
......@@ -2,10 +2,11 @@ import os
import argparse
import sqlite3
import pandas as pd
import json
import logging
import zipfile
import shutil
import json
import yaml
from pathlib import Path
......@@ -17,7 +18,7 @@ logger = logging_config.setup_logger()
# constants
VOCAB_PATH = Path('./vocab/omop')
DB_PATH = VOCAB_PATH / 'omop_54.sqlite'
VERSION_FILE = 'omop_version.json'
VERSION_FILE = 'omop_version.yaml'
VERSION_PATH = VOCAB_PATH / VERSION_FILE
EXPORT_FILE = 'omop_export.db'
......@@ -115,8 +116,8 @@ def install (omop_zip_file, version):
def write_version_file(version):
"""Writes the OMOP vocaburaries and version to a file"""
vocabularies['version'] = version
with open(VERSION_PATH, "w", encoding="utf-8") as f:
json.dump(vocabularies, f, indent=4)
with open(VERSION_PATH, "w") as file:
yaml.dump(vocabularies, file, default_flow_style=False, sort_keys=False)
def clear(db_path):
"""Clears the OMOP sql database"""
......
......@@ -10,9 +10,11 @@ import git
import re
import logging
import requests
import yaml
from pathlib import Path
from urllib.parse import urlparse, urlunparse
import acmc
from acmc import trud, omop, parse
# setup logging
......@@ -30,6 +32,7 @@ CONCEPT_SET_DIR = 'concept-set'
OMOP_DIR = 'omop'
DEFAULT_PHEN_DIR_LIST = [CODES_DIR, MAP_DIR, CONCEPT_SET_DIR, OMOP_DIR]
CONFIG_FILE = 'config.json'
VOCAB_VERSION_FILE = 'vocab_version.yaml'
DEFAULT_GIT_BRANCH = 'main'
......@@ -438,6 +441,33 @@ def write_code_errors(code_errors, code_errors_path):
err_df = err_df.sort_values(by=["SOURCE", "VOCABULARY", "CONCEPT"])
err_df.to_csv(code_errors_path, index=False, mode="w")
def write_vocab_version(phen_path):
# write the vocab version files
if not trud.VERSION_PATH.exists():
raise FileNotFoundError(f"TRUD version path {trud.VERSION_PATH} does not exist, please check TRUD is installed")
if not omop.VERSION_PATH.exists():
raise FileNotFoundError(f"OMOP version path {omop.VERSION_PATH} does not exist, please check OMOP is installed")
with trud.VERSION_PATH.open("r") as file:
trud_version = yaml.safe_load(file)
with omop.VERSION_PATH.open("r") as file:
omop_version = yaml.safe_load(file)
# Create the combined YAML structure
version_data = {
"versions": {
"acmc": acmc.__version__,
"trud": trud_version,
"omop": omop_version,
}
}
with open(phen_path / VOCAB_VERSION_FILE, "w") as file:
yaml.dump(version_data, file, default_flow_style=False, sort_keys=False)
def map(phen_dir, target_code_type):
logger.info(f"Processing phenotype: {phen_dir}")
logger.debug(f"Target coding format: {target_code_type}")
......@@ -569,9 +599,7 @@ def map(phen_dir, target_code_type):
concept_path = concept_set_path / filename
concept.to_csv(concept_path, index=False )
# copy version files used for mapping to repo
shutil.copy(trud.VERSION_PATH, phen_path / trud.VERSION_FILE)
shutil.copy(omop.VERSION_PATH, phen_path / omop.VERSION_FILE)
write_vocab_version(phen_path)
logger.info(f"Phenotype processed successfully")
......
import os
import sys
import requests
import json
import argparse
import shutil
import hashlib
import zipfile
import pandas as pd
import simpledbf
import yaml
from pathlib import Path
# setup logging
......@@ -17,7 +17,7 @@ logger = lc.setup_logger()
# Constants
FQDN = "isd.digital.nhs.uk"
VOCAB_PATH = Path('./vocab/trud')
VERSION_FILE = 'trud_version.json'
VERSION_FILE = 'trud_version.yaml'
VERSION_PATH = VOCAB_PATH / VERSION_FILE
DOWNLOADS_PATH = VOCAB_PATH / 'downloads'
PROCESSED_PATH = VOCAB_PATH / 'processed'
......@@ -310,11 +310,11 @@ def install():
# TODO: Download BNF from separate site? https://www.nhsbsa.nhs.uk/sites/default/files/2024-10/BNF%20Snomed%20Mapping%20data%2020241016.zip
]
# remove function from items to save versions
data = [{k: v for k, v in d.items() if k != "extract"} for d in items]
# save TRUD versions to file to main record of what was downloaded
with open(VERSION_PATH, "w", encoding="utf-8") as f:
# remove function from items
data = [{k: v for k, v in d.items() if k != "extract"} for d in items]
json.dump(data, f, indent=4)
with open(VERSION_PATH, "w") as file:
yaml.dump(data, file, default_flow_style=False, sort_keys=False)
# Validate and process each item ID
for item in items:
......
......@@ -283,6 +283,7 @@ acmc --help
You can do this from the issue page in GitLab by selecting "Create Branch", then checkout using:
```sh
git pull origin
git checkout -b feature-branch origin/feature-branch
```
......
......@@ -36,7 +36,8 @@ dependencies = [
"requests",
"simpledbf",
"smmap",
"sqlalchemy"
"sqlalchemy",
"pyyaml"
]
[project.scripts]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment