diff --git a/README.md b/README.md index 955bb3b7e6cf367e0ce23f11e10fa83e526ece97..de767fc2364ef07e20c240f0d9609016f32fb90b 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ Each data file has a "Subscribe" link that will take you to the licence. You wil 5. **Install TRUD:** Download and install NHS TRUD medical code resources. -Executing the script using the command: `python trud_api.py --key <API_KEY>`. +Executing the script using the command: `python trud.py --key <API_KEY>`. Processed tables will be saved as `.parquet` files in the `maps/processed/` directory. - *Note: NHS TRUD defines one-way mappings and does <b>NOT ADVISE</b> reversing the mappings. If you still wish to reverse these into two-way mappings, duplicate the given `.parquet` table and reverse the filename (e.g. `read2_code_to_snomed_code.parquet` to `snomed_code_to_read2_code.parquet`)* diff --git a/trud_api.py b/trud.py similarity index 98% rename from trud_api.py rename to trud.py index 815403136e754cb96781752d3082354be932d37f..95a30e6e5c9dbfebfa9a04f557ba4efb17c625ff 100644 --- a/trud_api.py +++ b/trud.py @@ -97,10 +97,10 @@ def extract_opsc4(): def extract_nhs_data_migrations(): #NHS Data Migrations - file_path = Path('build') / 'maps' / 'downloads' / 'Mapping Tables' / 'Updated' / 'Clinically Assured' / 'sctcremap_uk_20200401000001.txt' - + #snomed only - df = pd.read_csv(file_path, sep='\t') + file_path = Path('build') / 'maps' / 'downloads' / 'Mapping Tables' / 'Updated' / 'Clinically Assured' / 'sctcremap_uk_20200401000001.txt' + df = pd.read_csv(file_path, sep='\t') df = df[["SCT_CONCEPTID"]] df = df.rename(columns={"SCT_CONCEPTID":"snomed_code"}) df = df.drop_duplicates() @@ -110,7 +110,6 @@ def extract_nhs_data_migrations(): #r2 -> r3 file_path = Path('build') / 'maps' / 'downloads' / 'Mapping Tables' / 'Updated' / 'Clinically Assured' / 'rctctv3map_uk_20200401000001.txt' - df = pd.read_csv(file_path, sep='\t') df = df[["V2_CONCEPTID", "CTV3_CONCEPTID"]] df = df.rename(columns={"V2_CONCEPTID":"read2_code", @@ -120,7 +119,6 @@ def extract_nhs_data_migrations(): #r3->r2 file_path = Path('build') / 'maps' / 'downloads' / 'Mapping Tables' / 'Updated' / 'Clinically Assured' / 'ctv3rctmap_uk_20200401000002.txt' - df = pd.read_csv(file_path, sep='\t') df = df[["CTV3_CONCEPTID", "V2_CONCEPTID"]] df = df.rename(columns={"CTV3_CONCEPTID":"read3_code", @@ -132,7 +130,6 @@ def extract_nhs_data_migrations(): #r2 -> snomed file_path = Path('build') / 'maps' / 'downloads' / 'Mapping Tables' / 'Updated' / 'Clinically Assured' / 'rcsctmap2_uk_20200401000001.txt' - df = pd.read_csv(file_path, sep='\t', dtype=str) df = df[["ReadCode", "ConceptId"]] df = df.rename(columns={"ReadCode":"read2_code", @@ -142,7 +139,7 @@ def extract_nhs_data_migrations(): #r3->snomed file_path = Path('build') / 'maps' / 'downloads' / 'Mapping Tables' / 'Updated' / 'Clinically Assured' / 'ctv3sctmap2_uk_20200401000001.txt' - + df = pd.read_csv(file_path, sep='\t', dtype=str) df = df[["CTV3_TERMID", "SCT_CONCEPTID"]] df = df.rename(columns={"CTV3_TERMID":"read3_code", "SCT_CONCEPTID":"snomed_code"}) @@ -242,6 +239,8 @@ def create_build_directories(build_dir='build'): maps_processed_path.mkdir(parents=True,exist_ok=True) def main(): + print("Processing TRUD files") + parser = argparse.ArgumentParser( description="Download releases of items using the TRUD API.", formatter_class=argparse.ArgumentDefaultsHelpFormatter @@ -322,5 +321,6 @@ def main(): print(f"Downloaded {release_ordinal} release(s) for item {item_id}.") + print(f"Successfully completed TRUD processing") if __name__ == "__main__": main()