From 43692665e52622b26cc6a109cad8682fe695cc87 Mon Sep 17 00:00:00 2001
From: Michael Boniface <m.j.boniface@soton.ac.uk>
Date: Tue, 18 Feb 2025 10:15:47 +0000
Subject: [PATCH] moved trud api key to an environment variable; stored trud
 versions in file trud_version.json to keep track of the source mappings when
 we commit to a repo

---
 README.md |  26 +++++++++
 acmc.py   |   3 +-
 trud.py   | 171 ++++++++++++++++++++++++++++--------------------------
 3 files changed, 117 insertions(+), 83 deletions(-)

diff --git a/README.md b/README.md
index c2dd4da..0029a7a 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,32 @@ The tool supports verification and mapping across diagnostic coding formats belo
 - [**ICD-10:**](https://icd.who.int/browse10/2019/en) International Classification of Diseases (ICD) is a medical classification list from the World Health Organization (WHO) and widely used in hospital settings, e.g. Hospital Episode Statistics (HES).
 - [**ATC Codes:**](https://www.who.int/tools/atc-ddd-toolkit/atc-classification) Anatomical Therapeutic Chemical (ATC) Classification is a drug classification list from the World Health Organization (WHO)
 
+## Notes
+
+Linux/macOS:
+
+```
+export ACMC_TRUD_API_KEY="your_api_key"
+export ACMC_GITLAB_PAT="your_personal_access_token"
+export ACMC_GITHUB_PAT="your_personal_access_token"
+```
+
+Windows (Command prompt):
+
+```
+set ACMC_TRUD_API_KEY=your_api_key
+set ACMC_GITLAB_PAT=your_personal_access_token
+set ACMC_GITHUB_PAT=your_personal_access_token
+```
+
+Windows (Powershell):
+
+```
+$env:ACMC_TRUD_API_KEY="your_api_key"
+$env:ACMC_GITLAB_PAT="your_personal_access_token"
+$env:ACMC_GITHUB_PAT="your_personal_access_token"
+```
+
 ## Installation
 
 **1. Setup Conda Enviroment**
diff --git a/acmc.py b/acmc.py
index 6ecf035..e79b7f9 100644
--- a/acmc.py
+++ b/acmc.py
@@ -10,7 +10,7 @@ BUILD_PATH = Path('build')
 
 def trud_install(args):
     """Handle the `trud install` command."""
-    trud.install(args.api_key)
+    trud.install()
 
 def omop_install(args):
     """Handle the `omop install` command."""
@@ -72,7 +72,6 @@ def main():
 	
 	# trud install
 	trud_install_parser = trud_subparsers.add_parser("install", help="Install TRUD components")
-	trud_install_parser.add_argument("-k", "--api-key", required=True, help="TRUD API Key")
 	trud_install_parser.set_defaults(func=trud_install)
 	
 	### OMOP Command ###
diff --git a/trud.py b/trud.py
index 0b28b51..a491684 100644
--- a/trud.py
+++ b/trud.py
@@ -15,19 +15,15 @@ import simpledbf
 
 # Constants
 FQDN = "isd.digital.nhs.uk"
-TRUD_DIR = Path('./build/trud')
-TRUD_DOWNLOADS_DIR = TRUD_DIR / 'downloads'
-TRUD_PROCESSED_DIR = TRUD_DIR / 'processed'
+TRUD_PATH = Path('./build/trud')
+TRUD_VERSION_PATH = TRUD_PATH / 'trud_version.json'
+TRUD_DOWNLOADS_DIR = TRUD_PATH / 'downloads'
+TRUD_PROCESSED_DIR = TRUD_PATH / 'processed'
 
 def error_exit(message):
     print(message, "error")
     sys.exit(1)
 
-def validate_api_key(api_key):
-    """Validate that the API key is 40-character hexadecimal."""
-    if not api_key or len(api_key) != 40 or not all(c in "0123456789abcdef" for c in api_key.lower()):
-        error_exit("Invalid API key format. Expected a 40-character hexadecimal string.")
-
 def get_releases(item_id, API_KEY, latest=False):
     """Retrieve release information for an item from the TRUD API."""
     url = f"https://{FQDN}/trud/api/v1/keys/{API_KEY}/items/{item_id}/releases"
@@ -255,11 +251,11 @@ def create_map_directories():
 
     # Check if build directory exists
     create_map_dirs = False   
-    if TRUD_DIR.exists(): 
-        user_input = input(f"The map directory {TRUD_DIR} already exists. Do you want to download and process trud data again? (y/n): ").strip().lower()
+    if TRUD_PATH.exists(): 
+        user_input = input(f"The map directory {TRUD_PATH} already exists. Do you want to download and process trud data again? (y/n): ").strip().lower()
         if user_input == "y":
             # delete all build files
-            shutil.rmtree(TRUD_DIR)
+            shutil.rmtree(TRUD_PATH)
             create_map_dirs = True
         elif user_input == "n":
             print("Exiting TRUD installation")
@@ -269,76 +265,89 @@ def create_map_directories():
 
     if create_map_dirs:
         # create maps directories
-        TRUD_DIR.mkdir(parents=True, exist_ok=True)
+        TRUD_PATH.mkdir(parents=True, exist_ok=True)
         TRUD_DOWNLOADS_DIR.mkdir(parents=True, exist_ok=True)            
         TRUD_PROCESSED_DIR.mkdir(parents=True,exist_ok=True)                                 
 
-def install(api_key):
-    print(f"Installing TRUD")
-    create_map_directories()
-
-    items_latest = True
-    items = [
-        {
-            "id": 259,
-            "name": "NHS ICD-10 5th Edition XML data files",
-            "hash": "A4F7BBA6E86349AADD0F4696C5E91152EB99CC06121427FC359160439B9F883F",
-            "extract": extract_icd10,
-        },
-        {
-            "id": 119,
-            "name": "OPCS-4 data files",
-            "hash": "0615A2BF43FFEF94517F1D1E0C05493B627839F323F22C52CBCD8B40BF767CD3",
-            "extract": extract_opsc4,
-        },
-        {
-            "id": 9,
-            "name": "NHS Data Migration",
-            "hash": "D4317B3ADBA6E1247CF17F0B7CD2B8850FD36C0EA2923BF684EA6159F3A54765",
-            "extract": extract_nhs_data_migrations,
-        },
-        {
-            "id": 8,
-            "name": "NHS Read Browser",
-            "hash": "1FFF2CBF11D0E6D7FC6CC6F13DD52D2F459095C3D83A3F754E6C359F16913C5E",
-            "extract": extract_nhs_read_browser,
-        },
-        # TODO: Download BNF from separate site? https://www.nhsbsa.nhs.uk/sites/default/files/2024-10/BNF%20Snomed%20Mapping%20data%2020241016.zip
-    ]
-    
-    # Validate and process each item ID
-    for item in items:
-        item_id = item["id"]
-        print(bcolors.HEADER, "---"+item["name"]+"---", bcolors.ENDC)
-
-        releases = get_releases(item_id, API_KEY=api_key, latest=items_latest)
-        if not releases:
-            error_exit(f"No releases found for item {item_id}.")
-
-        # Process each release in reverse order
-        for release_ordinal, release in enumerate(releases[::-1], 1):
-            # Download archive file
-            file_destination = download_release_file(item_id, release_ordinal, release, "archive")
-            
-            # Optional files
-            # if items.checksum:
-            #     download_release_file(item["id"], release_ordinal, release, "checksum")
-            # if items.signature:
-            #     download_release_file(item["id"], release_ordinal, release, "signature")
-            # if items.public_key:
-            #     download_release_file(item["id"], release_ordinal, release, "publicKey", "public key")
-
-            #Verify Hash if available
-            if "hash" in item:
-                validate_download_hash(file_destination, item["hash"])
-
-            #Unzip downloaded .zip
-            unzip_download(file_destination)
-
-            #Extract Tables to parquet
-            if "extract" in item:
-                item["extract"]()
-            
-        print(f"Downloaded {release_ordinal} release(s) for item {item_id}.")
-    
-    print(f"TRUD installation completed")	
\ No newline at end of file
+def install():
+	print(f"Installing TRUD")
+	# get TRUD api key from environment variable
+	api_key = os.getenv("ACMC_TRUD_API_KEY")
+	if not api_key:
+		raise ValueError("TRUD API KEY not found. Set the ACMC_TRUD_API_KEY environment variable.")	
+	
+	create_map_directories()
+	
+	items_latest = True
+	items = [
+		{
+			"id": 259,
+			"name": "NHS ICD-10 5th Edition XML data files",
+			"hash": "A4F7BBA6E86349AADD0F4696C5E91152EB99CC06121427FC359160439B9F883F",
+			"extract": extract_icd10,
+		},
+		{
+			"id": 119,
+			"name": "OPCS-4 data files",
+			"hash": "0615A2BF43FFEF94517F1D1E0C05493B627839F323F22C52CBCD8B40BF767CD3",
+			"extract": extract_opsc4,
+		},
+		{
+			"id": 9,
+			"name": "NHS Data Migration",
+			"hash": "D4317B3ADBA6E1247CF17F0B7CD2B8850FD36C0EA2923BF684EA6159F3A54765",
+			"extract": extract_nhs_data_migrations,
+		},
+		{
+			"id": 8,
+			"name": "NHS Read Browser",
+			"hash": "1FFF2CBF11D0E6D7FC6CC6F13DD52D2F459095C3D83A3F754E6C359F16913C5E",
+			"extract": extract_nhs_read_browser,
+		}
+		# TODO: Download BNF from separate site? https://www.nhsbsa.nhs.uk/sites/default/files/2024-10/BNF%20Snomed%20Mapping%20data%2020241016.zip
+	]
+
+	# save TRUD versions to file to main record of what was downloaded
+	with open(TRUD_VERSION_PATH, "w", encoding="utf-8") as f:
+		# remove function from items
+		data = [{k: v for k, v in d.items() if k != "extract"} for d in items]
+		json.dump(data, f, indent=4) 
+	
+	# Validate and process each item ID
+	for item in items:
+		item_id = item["id"]
+		print(bcolors.HEADER, "---"+item["name"]+"---", bcolors.ENDC)
+		
+		releases = get_releases(item_id, API_KEY=api_key, latest=items_latest)
+		if not releases:
+			error_exit(f"No releases found for item {item_id}.")
+		
+		# Process each release in reverse order
+		for release_ordinal, release in enumerate(releases[::-1], 1):
+			# Download archive file
+			file_destination = download_release_file(item_id, release_ordinal, release, "archive")
+			
+			# Optional files
+			# if items.checksum:
+			#     download_release_file(item["id"], release_ordinal, release, "checksum")
+			# if items.signature:
+			#     download_release_file(item["id"], release_ordinal, release, "signature")
+			# if items.public_key:
+			#     download_release_file(item["id"], release_ordinal, release, "publicKey", "public key")
+			
+			#Verify Hash if available
+			if "hash" in item:
+				validate_download_hash(file_destination, item["hash"])
+				
+			#Unzip downloaded .zip
+			unzip_download(file_destination)
+			
+			#Extract Tables to parquet
+			if "extract" in item:
+				item["extract"]()
+				
+		print(f"Downloaded {release_ordinal} release(s) for item {item_id}.")
+
+
+	
+	print(f"TRUD installation completed")	
\ No newline at end of file
-- 
GitLab