diff --git a/acmc/omop.py b/acmc/omop.py index 7075994944e0ee886f2a5a09d0eb765a07792f7e..0d03f4e56c28732ed26d63cf0d70ae2aff9dd3ee 100644 --- a/acmc/omop.py +++ b/acmc/omop.py @@ -2,10 +2,11 @@ import os import argparse import sqlite3 import pandas as pd -import json import logging import zipfile import shutil +import json +import yaml from pathlib import Path @@ -17,7 +18,7 @@ logger = logging_config.setup_logger() # constants VOCAB_PATH = Path('./vocab/omop') DB_PATH = VOCAB_PATH / 'omop_54.sqlite' -VERSION_FILE = 'omop_version.json' +VERSION_FILE = 'omop_version.yaml' VERSION_PATH = VOCAB_PATH / VERSION_FILE EXPORT_FILE = 'omop_export.db' @@ -115,8 +116,8 @@ def install (omop_zip_file, version): def write_version_file(version): """Writes the OMOP vocaburaries and version to a file""" vocabularies['version'] = version - with open(VERSION_PATH, "w", encoding="utf-8") as f: - json.dump(vocabularies, f, indent=4) + with open(VERSION_PATH, "w") as file: + yaml.dump(vocabularies, file, default_flow_style=False, sort_keys=False) def clear(db_path): """Clears the OMOP sql database""" diff --git a/acmc/phen.py b/acmc/phen.py index b503af68e01a256ebdfbafe0f1586340191fc5f7..1f00c0ed99ed1978f3c95841cabdaffc62a538c1 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -10,9 +10,11 @@ import git import re import logging import requests +import yaml from pathlib import Path from urllib.parse import urlparse, urlunparse +import acmc from acmc import trud, omop, parse # setup logging @@ -30,6 +32,7 @@ CONCEPT_SET_DIR = 'concept-set' OMOP_DIR = 'omop' DEFAULT_PHEN_DIR_LIST = [CODES_DIR, MAP_DIR, CONCEPT_SET_DIR, OMOP_DIR] CONFIG_FILE = 'config.json' +VOCAB_VERSION_FILE = 'vocab_version.yaml' DEFAULT_GIT_BRANCH = 'main' @@ -438,6 +441,33 @@ def write_code_errors(code_errors, code_errors_path): err_df = err_df.sort_values(by=["SOURCE", "VOCABULARY", "CONCEPT"]) err_df.to_csv(code_errors_path, index=False, mode="w") +def write_vocab_version(phen_path): + # write the vocab version files + + if not trud.VERSION_PATH.exists(): + raise FileNotFoundError(f"TRUD version path {trud.VERSION_PATH} does not exist, please check TRUD is installed") + + if not omop.VERSION_PATH.exists(): + raise FileNotFoundError(f"OMOP version path {omop.VERSION_PATH} does not exist, please check OMOP is installed") + + with trud.VERSION_PATH.open("r") as file: + trud_version = yaml.safe_load(file) + + with omop.VERSION_PATH.open("r") as file: + omop_version = yaml.safe_load(file) + + # Create the combined YAML structure + version_data = { + "versions": { + "acmc": acmc.__version__, + "trud": trud_version, + "omop": omop_version, + } + } + + with open(phen_path / VOCAB_VERSION_FILE, "w") as file: + yaml.dump(version_data, file, default_flow_style=False, sort_keys=False) + def map(phen_dir, target_code_type): logger.info(f"Processing phenotype: {phen_dir}") logger.debug(f"Target coding format: {target_code_type}") @@ -569,9 +599,7 @@ def map(phen_dir, target_code_type): concept_path = concept_set_path / filename concept.to_csv(concept_path, index=False ) - # copy version files used for mapping to repo - shutil.copy(trud.VERSION_PATH, phen_path / trud.VERSION_FILE) - shutil.copy(omop.VERSION_PATH, phen_path / omop.VERSION_FILE) + write_vocab_version(phen_path) logger.info(f"Phenotype processed successfully") diff --git a/acmc/trud.py b/acmc/trud.py index 5338a58ae071e58616c80df7eb8706744b38d9c2..d014f7e05b551f84dde0936e09cccfb65240d21c 100644 --- a/acmc/trud.py +++ b/acmc/trud.py @@ -1,13 +1,13 @@ import os import sys import requests -import json import argparse import shutil import hashlib import zipfile import pandas as pd import simpledbf +import yaml from pathlib import Path # setup logging @@ -17,7 +17,7 @@ logger = lc.setup_logger() # Constants FQDN = "isd.digital.nhs.uk" VOCAB_PATH = Path('./vocab/trud') -VERSION_FILE = 'trud_version.json' +VERSION_FILE = 'trud_version.yaml' VERSION_PATH = VOCAB_PATH / VERSION_FILE DOWNLOADS_PATH = VOCAB_PATH / 'downloads' PROCESSED_PATH = VOCAB_PATH / 'processed' @@ -310,11 +310,11 @@ def install(): # TODO: Download BNF from separate site? https://www.nhsbsa.nhs.uk/sites/default/files/2024-10/BNF%20Snomed%20Mapping%20data%2020241016.zip ] + # remove function from items to save versions + data = [{k: v for k, v in d.items() if k != "extract"} for d in items] # save TRUD versions to file to main record of what was downloaded - with open(VERSION_PATH, "w", encoding="utf-8") as f: - # remove function from items - data = [{k: v for k, v in d.items() if k != "extract"} for d in items] - json.dump(data, f, indent=4) + with open(VERSION_PATH, "w") as file: + yaml.dump(data, file, default_flow_style=False, sort_keys=False) # Validate and process each item ID for item in items: diff --git a/docs/index.md b/docs/index.md index 6a65dc5edaffbc76c95097d3bf99e6585d5620bc..35182069a02f2e56ffe7cba91995463fec0a2ece 100644 --- a/docs/index.md +++ b/docs/index.md @@ -283,6 +283,7 @@ acmc --help You can do this from the issue page in GitLab by selecting "Create Branch", then checkout using: ```sh + git pull origin git checkout -b feature-branch origin/feature-branch ``` diff --git a/pyproject.toml b/pyproject.toml index 55f7b0dae4c31425cffeb8f7642c282043262cf1..fc4377e263b37f2ba3f05d8e71bbf93e8fd2ff51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,8 @@ dependencies = [ "requests", "simpledbf", "smmap", - "sqlalchemy" + "sqlalchemy", + "pyyaml" ] [project.scripts]