Skip to content
Snippets Groups Projects
Commit 43692665 authored by mjbonifa's avatar mjbonifa
Browse files

moved trud api key to an environment variable; stored trud versions in file...

moved trud api key to an environment variable; stored trud versions in file trud_version.json to keep track of the source mappings when we commit to a repo
parent 80685c23
No related branches found
No related tags found
No related merge requests found
...@@ -52,6 +52,32 @@ The tool supports verification and mapping across diagnostic coding formats belo ...@@ -52,6 +52,32 @@ The tool supports verification and mapping across diagnostic coding formats belo
- [**ICD-10:**](https://icd.who.int/browse10/2019/en) International Classification of Diseases (ICD) is a medical classification list from the World Health Organization (WHO) and widely used in hospital settings, e.g. Hospital Episode Statistics (HES). - [**ICD-10:**](https://icd.who.int/browse10/2019/en) International Classification of Diseases (ICD) is a medical classification list from the World Health Organization (WHO) and widely used in hospital settings, e.g. Hospital Episode Statistics (HES).
- [**ATC Codes:**](https://www.who.int/tools/atc-ddd-toolkit/atc-classification) Anatomical Therapeutic Chemical (ATC) Classification is a drug classification list from the World Health Organization (WHO) - [**ATC Codes:**](https://www.who.int/tools/atc-ddd-toolkit/atc-classification) Anatomical Therapeutic Chemical (ATC) Classification is a drug classification list from the World Health Organization (WHO)
## Notes
Linux/macOS:
```
export ACMC_TRUD_API_KEY="your_api_key"
export ACMC_GITLAB_PAT="your_personal_access_token"
export ACMC_GITHUB_PAT="your_personal_access_token"
```
Windows (Command prompt):
```
set ACMC_TRUD_API_KEY=your_api_key
set ACMC_GITLAB_PAT=your_personal_access_token
set ACMC_GITHUB_PAT=your_personal_access_token
```
Windows (Powershell):
```
$env:ACMC_TRUD_API_KEY="your_api_key"
$env:ACMC_GITLAB_PAT="your_personal_access_token"
$env:ACMC_GITHUB_PAT="your_personal_access_token"
```
## Installation ## Installation
**1. Setup Conda Enviroment** **1. Setup Conda Enviroment**
......
...@@ -10,7 +10,7 @@ BUILD_PATH = Path('build') ...@@ -10,7 +10,7 @@ BUILD_PATH = Path('build')
def trud_install(args): def trud_install(args):
"""Handle the `trud install` command.""" """Handle the `trud install` command."""
trud.install(args.api_key) trud.install()
def omop_install(args): def omop_install(args):
"""Handle the `omop install` command.""" """Handle the `omop install` command."""
...@@ -72,7 +72,6 @@ def main(): ...@@ -72,7 +72,6 @@ def main():
# trud install # trud install
trud_install_parser = trud_subparsers.add_parser("install", help="Install TRUD components") trud_install_parser = trud_subparsers.add_parser("install", help="Install TRUD components")
trud_install_parser.add_argument("-k", "--api-key", required=True, help="TRUD API Key")
trud_install_parser.set_defaults(func=trud_install) trud_install_parser.set_defaults(func=trud_install)
### OMOP Command ### ### OMOP Command ###
......
...@@ -15,19 +15,15 @@ import simpledbf ...@@ -15,19 +15,15 @@ import simpledbf
# Constants # Constants
FQDN = "isd.digital.nhs.uk" FQDN = "isd.digital.nhs.uk"
TRUD_DIR = Path('./build/trud') TRUD_PATH = Path('./build/trud')
TRUD_DOWNLOADS_DIR = TRUD_DIR / 'downloads' TRUD_VERSION_PATH = TRUD_PATH / 'trud_version.json'
TRUD_PROCESSED_DIR = TRUD_DIR / 'processed' TRUD_DOWNLOADS_DIR = TRUD_PATH / 'downloads'
TRUD_PROCESSED_DIR = TRUD_PATH / 'processed'
def error_exit(message): def error_exit(message):
print(message, "error") print(message, "error")
sys.exit(1) sys.exit(1)
def validate_api_key(api_key):
"""Validate that the API key is 40-character hexadecimal."""
if not api_key or len(api_key) != 40 or not all(c in "0123456789abcdef" for c in api_key.lower()):
error_exit("Invalid API key format. Expected a 40-character hexadecimal string.")
def get_releases(item_id, API_KEY, latest=False): def get_releases(item_id, API_KEY, latest=False):
"""Retrieve release information for an item from the TRUD API.""" """Retrieve release information for an item from the TRUD API."""
url = f"https://{FQDN}/trud/api/v1/keys/{API_KEY}/items/{item_id}/releases" url = f"https://{FQDN}/trud/api/v1/keys/{API_KEY}/items/{item_id}/releases"
...@@ -255,11 +251,11 @@ def create_map_directories(): ...@@ -255,11 +251,11 @@ def create_map_directories():
# Check if build directory exists # Check if build directory exists
create_map_dirs = False create_map_dirs = False
if TRUD_DIR.exists(): if TRUD_PATH.exists():
user_input = input(f"The map directory {TRUD_DIR} already exists. Do you want to download and process trud data again? (y/n): ").strip().lower() user_input = input(f"The map directory {TRUD_PATH} already exists. Do you want to download and process trud data again? (y/n): ").strip().lower()
if user_input == "y": if user_input == "y":
# delete all build files # delete all build files
shutil.rmtree(TRUD_DIR) shutil.rmtree(TRUD_PATH)
create_map_dirs = True create_map_dirs = True
elif user_input == "n": elif user_input == "n":
print("Exiting TRUD installation") print("Exiting TRUD installation")
...@@ -269,76 +265,89 @@ def create_map_directories(): ...@@ -269,76 +265,89 @@ def create_map_directories():
if create_map_dirs: if create_map_dirs:
# create maps directories # create maps directories
TRUD_DIR.mkdir(parents=True, exist_ok=True) TRUD_PATH.mkdir(parents=True, exist_ok=True)
TRUD_DOWNLOADS_DIR.mkdir(parents=True, exist_ok=True) TRUD_DOWNLOADS_DIR.mkdir(parents=True, exist_ok=True)
TRUD_PROCESSED_DIR.mkdir(parents=True,exist_ok=True) TRUD_PROCESSED_DIR.mkdir(parents=True,exist_ok=True)
def install(api_key): def install():
print(f"Installing TRUD") print(f"Installing TRUD")
create_map_directories() # get TRUD api key from environment variable
api_key = os.getenv("ACMC_TRUD_API_KEY")
items_latest = True if not api_key:
items = [ raise ValueError("TRUD API KEY not found. Set the ACMC_TRUD_API_KEY environment variable.")
{
"id": 259, create_map_directories()
"name": "NHS ICD-10 5th Edition XML data files",
"hash": "A4F7BBA6E86349AADD0F4696C5E91152EB99CC06121427FC359160439B9F883F", items_latest = True
"extract": extract_icd10, items = [
}, {
{ "id": 259,
"id": 119, "name": "NHS ICD-10 5th Edition XML data files",
"name": "OPCS-4 data files", "hash": "A4F7BBA6E86349AADD0F4696C5E91152EB99CC06121427FC359160439B9F883F",
"hash": "0615A2BF43FFEF94517F1D1E0C05493B627839F323F22C52CBCD8B40BF767CD3", "extract": extract_icd10,
"extract": extract_opsc4, },
}, {
{ "id": 119,
"id": 9, "name": "OPCS-4 data files",
"name": "NHS Data Migration", "hash": "0615A2BF43FFEF94517F1D1E0C05493B627839F323F22C52CBCD8B40BF767CD3",
"hash": "D4317B3ADBA6E1247CF17F0B7CD2B8850FD36C0EA2923BF684EA6159F3A54765", "extract": extract_opsc4,
"extract": extract_nhs_data_migrations, },
}, {
{ "id": 9,
"id": 8, "name": "NHS Data Migration",
"name": "NHS Read Browser", "hash": "D4317B3ADBA6E1247CF17F0B7CD2B8850FD36C0EA2923BF684EA6159F3A54765",
"hash": "1FFF2CBF11D0E6D7FC6CC6F13DD52D2F459095C3D83A3F754E6C359F16913C5E", "extract": extract_nhs_data_migrations,
"extract": extract_nhs_read_browser, },
}, {
# TODO: Download BNF from separate site? https://www.nhsbsa.nhs.uk/sites/default/files/2024-10/BNF%20Snomed%20Mapping%20data%2020241016.zip "id": 8,
] "name": "NHS Read Browser",
"hash": "1FFF2CBF11D0E6D7FC6CC6F13DD52D2F459095C3D83A3F754E6C359F16913C5E",
# Validate and process each item ID "extract": extract_nhs_read_browser,
for item in items: }
item_id = item["id"] # TODO: Download BNF from separate site? https://www.nhsbsa.nhs.uk/sites/default/files/2024-10/BNF%20Snomed%20Mapping%20data%2020241016.zip
print(bcolors.HEADER, "---"+item["name"]+"---", bcolors.ENDC) ]
releases = get_releases(item_id, API_KEY=api_key, latest=items_latest) # save TRUD versions to file to main record of what was downloaded
if not releases: with open(TRUD_VERSION_PATH, "w", encoding="utf-8") as f:
error_exit(f"No releases found for item {item_id}.") # remove function from items
data = [{k: v for k, v in d.items() if k != "extract"} for d in items]
# Process each release in reverse order json.dump(data, f, indent=4)
for release_ordinal, release in enumerate(releases[::-1], 1):
# Download archive file # Validate and process each item ID
file_destination = download_release_file(item_id, release_ordinal, release, "archive") for item in items:
item_id = item["id"]
# Optional files print(bcolors.HEADER, "---"+item["name"]+"---", bcolors.ENDC)
# if items.checksum:
# download_release_file(item["id"], release_ordinal, release, "checksum") releases = get_releases(item_id, API_KEY=api_key, latest=items_latest)
# if items.signature: if not releases:
# download_release_file(item["id"], release_ordinal, release, "signature") error_exit(f"No releases found for item {item_id}.")
# if items.public_key:
# download_release_file(item["id"], release_ordinal, release, "publicKey", "public key") # Process each release in reverse order
for release_ordinal, release in enumerate(releases[::-1], 1):
#Verify Hash if available # Download archive file
if "hash" in item: file_destination = download_release_file(item_id, release_ordinal, release, "archive")
validate_download_hash(file_destination, item["hash"])
# Optional files
#Unzip downloaded .zip # if items.checksum:
unzip_download(file_destination) # download_release_file(item["id"], release_ordinal, release, "checksum")
# if items.signature:
#Extract Tables to parquet # download_release_file(item["id"], release_ordinal, release, "signature")
if "extract" in item: # if items.public_key:
item["extract"]() # download_release_file(item["id"], release_ordinal, release, "publicKey", "public key")
print(f"Downloaded {release_ordinal} release(s) for item {item_id}.") #Verify Hash if available
if "hash" in item:
print(f"TRUD installation completed") validate_download_hash(file_destination, item["hash"])
\ No newline at end of file
#Unzip downloaded .zip
unzip_download(file_destination)
#Extract Tables to parquet
if "extract" in item:
item["extract"]()
print(f"Downloaded {release_ordinal} release(s) for item {item_id}.")
print(f"TRUD installation completed")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment