Skip to content
Snippets Groups Projects
Commit 08006e0c authored by mjbonifa's avatar mjbonifa
Browse files

refactor:trud and omop versions are now written in yaml format. When map is...

refactor:trud and omop versions are now written in yaml format. When map is created a version file is written (also in yaml) to the phen directory vocab_version.yaml including acmc, trud and omop version data. Closes #5.
parent 44f11f70
No related branches found
No related tags found
No related merge requests found
......@@ -2,10 +2,11 @@ import os
import argparse
import sqlite3
import pandas as pd
import json
import logging
import zipfile
import shutil
import json
import yaml
from pathlib import Path
......@@ -17,7 +18,7 @@ logger = logging_config.setup_logger()
# constants
VOCAB_PATH = Path('./vocab/omop')
DB_PATH = VOCAB_PATH / 'omop_54.sqlite'
VERSION_FILE = 'omop_version.json'
VERSION_FILE = 'omop_version.yaml'
VERSION_PATH = VOCAB_PATH / VERSION_FILE
EXPORT_FILE = 'omop_export.db'
......@@ -115,8 +116,8 @@ def install (omop_zip_file, version):
def write_version_file(version):
"""Writes the OMOP vocaburaries and version to a file"""
vocabularies['version'] = version
with open(VERSION_PATH, "w", encoding="utf-8") as f:
json.dump(vocabularies, f, indent=4)
with open(VERSION_PATH, "w") as file:
yaml.dump(vocabularies, file, default_flow_style=False, sort_keys=False)
def clear(db_path):
"""Clears the OMOP sql database"""
......
......@@ -10,9 +10,11 @@ import git
import re
import logging
import requests
import yaml
from pathlib import Path
from urllib.parse import urlparse, urlunparse
import acmc
from acmc import trud, omop, parse
# setup logging
......@@ -30,6 +32,7 @@ CONCEPT_SET_DIR = 'concept-set'
OMOP_DIR = 'omop'
DEFAULT_PHEN_DIR_LIST = [CODES_DIR, MAP_DIR, CONCEPT_SET_DIR, OMOP_DIR]
CONFIG_FILE = 'config.json'
VOCAB_VERSION_FILE = 'vocab_version.yaml'
DEFAULT_GIT_BRANCH = 'main'
......@@ -438,6 +441,33 @@ def write_code_errors(code_errors, code_errors_path):
err_df = err_df.sort_values(by=["SOURCE", "VOCABULARY", "CONCEPT"])
err_df.to_csv(code_errors_path, index=False, mode="w")
def write_vocab_version(phen_path):
# write the vocab version files
if not trud.VERSION_PATH.exists():
raise FileNotFoundError(f"TRUD version path {trud.VERSION_PATH} does not exist, please check TRUD is installed")
if not omop.VERSION_PATH.exists():
raise FileNotFoundError(f"OMOP version path {omop.VERSION_PATH} does not exist, please check OMOP is installed")
with trud.VERSION_PATH.open("r") as file:
trud_version = yaml.safe_load(file)
with omop.VERSION_PATH.open("r") as file:
omop_version = yaml.safe_load(file)
# Create the combined YAML structure
version_data = {
"versions": {
"acmc": acmc.__version__,
"trud": trud_version,
"omop": omop_version,
}
}
with open(phen_path / VOCAB_VERSION_FILE, "w") as file:
yaml.dump(version_data, file, default_flow_style=False, sort_keys=False)
def map(phen_dir, target_code_type):
logger.info(f"Processing phenotype: {phen_dir}")
logger.debug(f"Target coding format: {target_code_type}")
......@@ -569,9 +599,7 @@ def map(phen_dir, target_code_type):
concept_path = concept_set_path / filename
concept.to_csv(concept_path, index=False )
# copy version files used for mapping to repo
shutil.copy(trud.VERSION_PATH, phen_path / trud.VERSION_FILE)
shutil.copy(omop.VERSION_PATH, phen_path / omop.VERSION_FILE)
write_vocab_version(phen_path)
logger.info(f"Phenotype processed successfully")
......
import os
import sys
import requests
import json
import argparse
import shutil
import hashlib
import zipfile
import pandas as pd
import simpledbf
import yaml
from pathlib import Path
# setup logging
......@@ -17,7 +17,7 @@ logger = lc.setup_logger()
# Constants
FQDN = "isd.digital.nhs.uk"
VOCAB_PATH = Path('./vocab/trud')
VERSION_FILE = 'trud_version.json'
VERSION_FILE = 'trud_version.yaml'
VERSION_PATH = VOCAB_PATH / VERSION_FILE
DOWNLOADS_PATH = VOCAB_PATH / 'downloads'
PROCESSED_PATH = VOCAB_PATH / 'processed'
......@@ -310,11 +310,11 @@ def install():
# TODO: Download BNF from separate site? https://www.nhsbsa.nhs.uk/sites/default/files/2024-10/BNF%20Snomed%20Mapping%20data%2020241016.zip
]
# save TRUD versions to file to main record of what was downloaded
with open(VERSION_PATH, "w", encoding="utf-8") as f:
# remove function from items
# remove function from items to save versions
data = [{k: v for k, v in d.items() if k != "extract"} for d in items]
json.dump(data, f, indent=4)
# save TRUD versions to file to main record of what was downloaded
with open(VERSION_PATH, "w") as file:
yaml.dump(data, file, default_flow_style=False, sort_keys=False)
# Validate and process each item ID
for item in items:
......
......@@ -283,6 +283,7 @@ acmc --help
You can do this from the issue page in GitLab by selecting "Create Branch", then checkout using:
```sh
git pull origin
git checkout -b feature-branch origin/feature-branch
```
......
......@@ -36,7 +36,8 @@ dependencies = [
"requests",
"simpledbf",
"smmap",
"sqlalchemy"
"sqlalchemy",
"pyyaml"
]
[project.scripts]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment