added OMOP versioning

28aecd75 · mjbonifa · c87d1411 · 28aecd75 · 28aecd75 · 28aecd75
Commit 28aecd75 authored 5 months ago by mjbonifa
--- a/README.md
+++ b/README.md
@@ -54,6 +54,12 @@ The tool supports verification and mapping across diagnostic coding formats belo

 ## Notes

+OMOP
+
+Content of your package
+
+Vocabularies release version: v20240830 
+
 Linux/macOS:

 ```

--- a/acmc.py
+++ b/acmc.py
@@ -14,7 +14,7 @@ def trud_install(args):

 def omop_install(args):
    """Handle the `omop install` command."""
-    omop.install(omop.OMOP_DB_PATH, args.omop_folder)
+    omop.install(args.omop_dir, args.version)

 def omop_clear(args):
    """Handle the `omop clear` command."""
@@ -80,7 +80,8 @@ def main():
 	
 	# omop install
 	omop_install_parser = omop_subparsers.add_parser("install", help="Install OMOP codes within database")
-	omop_install_parser.add_argument("-f", "--omop-folder", required=True, help="Path to extracted OMOP downloads folder")
+	omop_install_parser.add_argument("-d", "--omop-dir", required=True, help="Directory path to extracted OMOP downloads")
+	omop_install_parser.add_argument("-v", "--version", required=True, help="OMOP vocabularies release version")	
 	omop_install_parser.set_defaults(func=omop_install)
 	
 	# omop clear
@@ -112,7 +113,7 @@ def main():
 	phen_map_parser.add_argument("-t", "--target-coding", required=True, choices=['read2', 'read3', 'icd10', 'snomed', 'opcs4'], help="Specify the target coding (read2, read3, icd10, snomed, opcs4)")
 	# phen map flags
 	phen_map_parser.add_argument("-tr", "--translate", action="store_true", default=False, help="Translate code types")
-	phen_map_parser.add_argument("-v", "--verify", action="store_true", default=False, help="Verify codes")
+	phen_map_parser.add_argument("-ve", "--verify", action="store_true", default=False, help="Verify codes")
 	phen_map_parser.set_defaults(func=phen_map)    

 	# phen publish
@@ -124,7 +125,7 @@ def main():
 	phen_copy_parser = phen_subparsers.add_parser("copy", help="Publish phenotype configuration")
 	phen_copy_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory")
 	phen_copy_parser.add_argument("-td", "--target-dir", type=str, default=str(BUILD_PATH.resolve()), help="Target directory for the copy")
-	phen_copy_parser.add_argument("-ve", "--version", type=str, default=None, help="Version to copy")		
+	phen_copy_parser.add_argument("-v", "--version", type=str, default=None, help="Phenotype version to copy")		
 	phen_copy_parser.set_defaults(func=phen_copy)

 	# phen diff

--- a/examples/config.json
+++ b/examples/config.json
--- a/omop.py
+++ b/omop.py
@@ -2,14 +2,37 @@ import os
 import argparse
 import sqlite3
 import pandas as pd
-
+import json
 from pathlib import Path

 OMOP_DB_DIR = Path('./build/omop')
 OMOP_DB_PATH = OMOP_DB_DIR / 'omop_54.sqlite'
+VERSION_FILE = 'omop_version.json'
+VERSION_PATH = OMOP_DB_DIR / VERSION_FILE
+
+vocabularies = {
+	"source": "OHDSI Athena", 
+	"url": "https://athena.ohdsi.org/vocabulary/list",
+	"version": "",
+	"vocabularies": [
+		{ "id": 1, "name": "SNOMED"},
+		{ "id": 2, "name": "ICD9CM"},
+		{ "id": 17, "name": "Readv2"},
+		{ "id": 21, "name": "ATC"},
+		{ "id": 55, "name": "OPCS4"},
+		{ "id": 57, "name": "HES Specialty"},
+		{ "id": 70, "name": "ICD10CM"},
+		{ "id": 75, "name": "dm+d"},
+		{ "id": 144, "name": "UK Biobank"},
+		{ "id": 154, "name": "NHS Ethnic Category"},
+		{ "id": 155, "name": "NHS Place of Service"}
+	],
+	"model": []
+}

 #Populate SQLite3 Database with default OMOP CONCEPTS 
-def install (db_path, omop_install_folder):
+def install (omop_install_folder, version, db_path=OMOP_DB_PATH):
+	"""Installs the OMOP release csv files in a file-based sql database"""
 	print(f"Installing OMOP database from {omop_install_folder}")
 	
 	# check folder for omop install files is a directory
@@ -20,7 +43,7 @@ def install (db_path, omop_install_folder):
 	# check codes directory exists and if not create it
 	if not OMOP_DB_DIR.exists():  
 		OMOP_DB_DIR.mkdir(parents=True)
-        print(f"Codes directory '{OMOP_DB_DIR}' created.")    
+		print(f"OMOP directory '{OMOP_DB_DIR}' created.")    

 	# connect to database, if it does not exist it will be created
 	conn = sqlite3.connect(OMOP_DB_PATH)    
@@ -30,28 +53,38 @@ def install (db_path, omop_install_folder):
 			file_path = os.path.join(omop_install_folder, filename)
 			try:
 				print(f"Reading file: {file_path}")
-                # Read the CSV file with the specified delimiter
+				# read the CSV file with the specified delimiter
 				df = pd.read_csv(file_path, delimiter="\t", low_memory=False)
 				table_name = os.path.splitext(os.path.basename(file_path))[0] #Get name of file
 			
-                #Export Table to sqlite db
+				# export Table to sqlite db
 				df.to_sql(table_name, conn, if_exists='replace', index=False)
 				
+				# add to the metadata
+				vocabularies["model"].append(filename)
 			except Exception as e:
 				raise Exception(f"Error reading file {file_path}: {e}")
-
 	conn.close()
+
+	# write version file
+	write_version_file(version)
+
 	print(f"OMOP installation completed")

+def write_version_file(version):
+	"""Writes the OMOP vocaburaries and version to a file"""		
+	vocabularies['version'] = version
+	with open(VERSION_PATH, "w", encoding="utf-8") as f:
+		json.dump(vocabularies, f, indent=4) 
+		
 def clear(db_path):
+	"""Clears the OMOP sql database"""			
 	print(f"Clearing OMOP data from database")
 	omop_db_path = Path(db_path)
 	if not omop_db_path.is_file():  
 		raise FileNotFoundError(f"Error: OMOP DB file '{omop_db_path}' does not exist.")
-    
 	conn = sqlite3.connect(db_path)
 	cur = conn.cursor()
-
 	cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
 	
 	# Fetch and print table names
@@ -65,6 +98,7 @@ def clear(db_path):
 	print(f"OMOP database cleared")
 	
 def delete(db_path):
+	"""Deletes the OMOP sql database"""
 	print(f"Deleting OMOP database")
 	omop_db_path = Path(db_path)
 	if not omop_db_path.is_file():  

--- a/phen.py
+++ b/phen.py
@@ -14,9 +14,10 @@ from urllib.parse import urlparse, urlunparse

 # acmc dependencies 
 import trud
+import omop
 from base import log_invalid_code, bcolors, raise_
 from parse import Read2, Read3, Icd10, Snomed, Opcs4, Atc, code_types, vocab_types
-from omop import OMOP_DB_PATH, publish_concept_sets, setup
+from omop import publish_concept_sets, setup

 pd.set_option("mode.chained_assignment", None)

@@ -530,14 +531,6 @@ def map(phen_dir, target_code_type, translate=True, verify=True):
 	out.to_csv(map_path, index=False)
 	print(f"Saved mapped concepts to {str(map_path.resolve())}")	

-	# save error File
-	error_path = phen_path / ERROR_FILE
-	if error_path.exists():	
-		error_df = pd.read_csv(error_path)
-		error_df = error_df.drop_duplicates()  # Remove Duplicates from Error file
-		error_df = error_df.sort_values(by=["SOURCE", "VOCABULARY", "CONCEPT"])
-		error_df.to_csv(error_path, index=False)
-
 	# save concept sets as separate files
 	concept_set_path = phen_path / CONCEPT_SET_DIR / target_code_type
 	
@@ -559,6 +552,18 @@ def map(phen_dir, target_code_type, translate=True, verify=True):
 		concept_path = concept_set_path / filename
 		concept.to_csv(concept_path, index=False )

+	# copy version files used for mapping to repo
+	shutil.copy(trud.VERSION_PATH, phen_path / trud.VERSION_FILE)
+	shutil.copy(omop.VERSION_PATH, phen_path / omop.VERSION_FILE)
+
+	# write erros to a file
+	error_path = phen_path / ERROR_FILE
+	if error_path.exists():	
+		error_df = pd.read_csv(error_path)
+		error_df = error_df.drop_duplicates()  # Remove Duplicates from Error file
+		error_df = error_df.sort_values(by=["SOURCE", "VOCABULARY", "CONCEPT"])
+		error_df.to_csv(error_path, index=False)
+	
 	print(f"Saved concept_sets to {str(concept_set_path.resolve())}")	
 	
 	print(f"Phenotype processing completed")
@@ -662,6 +667,7 @@ def copy(phen_dir, target_dir, version=None):
 	print(f"Copied {phen_path} {repo.head.commit.hexsha[:7]} in {copy_path}")

 def diff(phen_dir, phen_old_dir):
+	"""Compare the differences between two versions of a phenotype"""	
 	
 	# validate phenotype directories
 	validate(phen_old_dir)	

--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@@ -67,3 +67,9 @@ def test_phen_diff():
        args = argparse.Namespace(phen_dir="/path/to/phen", phen_dir_old="/path/to/old_phen")
        phen_diff(args)
        mock_diff.assert_called_once_with("/path/to/phen", "/path/to/old_phen")
+
+def test_phen_local():
+    with patch("phen.diff") as mock_diff:
+        args = argparse.Namespace(phen_dir="/path/to/phen", phen_dir_old="/path/to/old_phen")
+        phen_diff(args)
+        mock_diff.assert_called_once_with("/path/to/phen", "/path/to/old_phen")
--- a/trud.py
+++ b/trud.py
@@ -16,7 +16,8 @@ import simpledbf
 # Constants
 FQDN = "isd.digital.nhs.uk"
 TRUD_PATH = Path('./build/trud')
-TRUD_VERSION_PATH = TRUD_PATH / 'trud_version.json'
+VERSION_FILE = 'trud_version.json'
+VERSION_PATH = TRUD_PATH / VERSION_FILE
 TRUD_DOWNLOADS_DIR = TRUD_PATH / 'downloads'
 TRUD_PROCESSED_DIR = TRUD_PATH / 'processed'

@@ -309,7 +310,7 @@ def install():
 	]

 	# save TRUD versions to file to main record of what was downloaded
-	with open(TRUD_VERSION_PATH, "w", encoding="utf-8") as f:
+	with open(VERSION_PATH, "w", encoding="utf-8") as f:
 		# remove function from items
 		data = [{k: v for k, v in d.items() if k != "extract"} for d in items]
 		json.dump(data, f, indent=4)