renamed trud_api.py to trud.py; fixed error for r3->snomed as missed the file path

47aeae38 · mjbonifa · 83694229 · 47aeae38 · 47aeae38
Commit 47aeae38 authored 6 months ago by mjbonifa
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ Each data file has a "Subscribe" link that will take you to the licence. You wil

 5. **Install TRUD:** Download and install NHS TRUD medical code resources.

-Executing the script using the command: `python trud_api.py --key <API_KEY>`.
+Executing the script using the command: `python trud.py --key <API_KEY>`.

 Processed tables will be saved as `.parquet` files in the `maps/processed/` directory.
 	- *Note: NHS TRUD defines one-way mappings and does <b>NOT ADVISE</b> reversing the mappings. If you still wish to reverse these into two-way mappings, duplicate the given `.parquet` table and reverse the filename (e.g. `read2_code_to_snomed_code.parquet` to `snomed_code_to_read2_code.parquet`)*

--- a/trud_api.py
+++ b/trud_api.py
@@ -97,9 +97,9 @@ def extract_opsc4():

 def extract_nhs_data_migrations():
    #NHS Data Migrations
-    file_path = Path('build') / 'maps' / 'downloads' / 'Mapping Tables' / 'Updated' / 'Clinically Assured' / 'sctcremap_uk_20200401000001.txt'
    
    #snomed only
+    file_path = Path('build') / 'maps' / 'downloads' / 'Mapping Tables' / 'Updated' / 'Clinically Assured' / 'sctcremap_uk_20200401000001.txt'    
    df = pd.read_csv(file_path, sep='\t')    
    df = df[["SCT_CONCEPTID"]]
    df = df.rename(columns={"SCT_CONCEPTID":"snomed_code"})
@@ -110,7 +110,6 @@ def extract_nhs_data_migrations():

    #r2 -> r3
    file_path = Path('build') / 'maps' / 'downloads' / 'Mapping Tables' / 'Updated' / 'Clinically Assured' / 'rctctv3map_uk_20200401000001.txt'
-
    df = pd.read_csv(file_path, sep='\t')
    df = df[["V2_CONCEPTID", "CTV3_CONCEPTID"]]
    df = df.rename(columns={"V2_CONCEPTID":"read2_code",
@@ -120,7 +119,6 @@ def extract_nhs_data_migrations():

    #r3->r2
    file_path = Path('build') / 'maps' / 'downloads' / 'Mapping Tables' / 'Updated' / 'Clinically Assured' / 'ctv3rctmap_uk_20200401000002.txt'
-
    df = pd.read_csv(file_path, sep='\t')
    df = df[["CTV3_CONCEPTID", "V2_CONCEPTID"]]
    df = df.rename(columns={"CTV3_CONCEPTID":"read3_code", 
@@ -132,7 +130,6 @@ def extract_nhs_data_migrations():

    #r2 -> snomed
    file_path = Path('build') / 'maps' / 'downloads' / 'Mapping Tables' / 'Updated' / 'Clinically Assured' / 'rcsctmap2_uk_20200401000001.txt'
-
    df = pd.read_csv(file_path, sep='\t', dtype=str)
    df = df[["ReadCode", "ConceptId"]]
    df = df.rename(columns={"ReadCode":"read2_code",
@@ -142,7 +139,7 @@ def extract_nhs_data_migrations():

    #r3->snomed
    file_path = Path('build') / 'maps' / 'downloads' / 'Mapping Tables' / 'Updated' / 'Clinically Assured' / 'ctv3sctmap2_uk_20200401000001.txt'
-
+    df = pd.read_csv(file_path, sep='\t', dtype=str)
    df = df[["CTV3_TERMID", "SCT_CONCEPTID"]]
    df = df.rename(columns={"CTV3_TERMID":"read3_code",
                            "SCT_CONCEPTID":"snomed_code"})
@@ -242,6 +239,8 @@ def create_build_directories(build_dir='build'):
        maps_processed_path.mkdir(parents=True,exist_ok=True)                                 

 def main():
+    print("Processing TRUD files")
+    
    parser = argparse.ArgumentParser(
        description="Download releases of items using the TRUD API.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
@@ -322,5 +321,6 @@ def main():
            
        print(f"Downloaded {release_ordinal} release(s) for item {item_id}.")

+    print(f"Successfully completed TRUD processing")
 if __name__ == "__main__":
    main()