From 4541b584f318aaf5b9bedf236516681c2f8dbb13 Mon Sep 17 00:00:00 2001 From: Michael Boniface <m.j.boniface@soton.ac.uk> Date: Thu, 20 Feb 2025 17:34:10 +0000 Subject: [PATCH] updated readme --- README.md | 292 +++++++++++++++++++++++++++++++------------------ acmc/main.py | 24 +++- pyproject.toml | 4 +- 3 files changed, 207 insertions(+), 113 deletions(-) diff --git a/README.md b/README.md index 6e7375b..3b5689f 100644 --- a/README.md +++ b/README.md @@ -84,68 +84,98 @@ $env:ACMC_GITLAB_PAT="your_personal_access_token" $env:ACMC_GITHUB_PAT="your_personal_access_token" ``` +## Requirements + +- Python 3.9 or higher + ## Installation -**1. Setup Conda Enviroment** +To install the `acmc` package, simply run: -ACMC requires Python and the enviroment is maintained using conda. +```bash +pip install acmc +``` -* Ensure you have conda installed, e.g. following instructions for miniconda from [https://docs.conda.io/en/latest/miniconda.html](https://docs.conda.io/en/latest/miniconda.html). -* Create environment: `conda env create -f conda.yaml` -* Activate environment: `conda activate acmc` +Once installed, you'll be ready to use the `acmc` tool along with the associated vocabularies. -**2. Register at TRUD** to access clinically assured terminology mappings [NHS TRUD](https://isd.digital.nhs.uk/trud/user/guest/group/0/account/form) +## Getting Started -**3. Subscribe and accept the following licenses** +### Install Clinically Assured NHS TRUD Code Mappings -ACMC uses clinically assured medical terminologies provided by the NHS. The datafiles are downloaded automatically but you need to register, request subscription and obtain an API key. +1. **Register at TRUD**: Access clinically assured terminology mappings at [NHS TRUD](https://isd.digital.nhs.uk/trud/user/guest/group/0/account/form). -* [NHS Read Browser](https://isd.digital.nhs.uk/trud/users/guest/filters/2/categories/9/items/8/releases) -* [NHS Data Migration](https://isd.digital.nhs.uk/trud/users/guest/filters/0/categories/8/items/9/releases) -* [ICD10 Edition 5 XML](https://isd.digital.nhs.uk/trud/users/guest/filters/0/categories/28/items/259/releases) -* [OPCS-4.10 Data Files](https://isd.digital.nhs.uk/trud/users/guest/filters/0/categories/10/items/119/releases) - <!-- - [BNF/Snomed Mapping data.xlsx](https://www.nhsbsa.nhs.uk/prescription-data/understanding-our-data/bnf-snomed-mapping) --> +2. **Subscribe and Accept Licenses**: Subscribe to the following data files: -Each data file has a "Subscribe" link that will take you to the licence. You will need to "Tell us about your subscription request" that summarises why you need access to the data, e.g. for a specific research project. Your subscription will not be approved immediately and will remain in the "pending" state until it is. This is usually approved within 24 hours. - -**4. Get TRUD API Key** + - [NHS Read Browser](https://isd.digital.nhs.uk/trud/users/guest/filters/2/categories/9/items/8/releases) + - [NHS Data Migration](https://isd.digital.nhs.uk/trud/users/guest/filters/0/categories/8/items/9/releases) + - [ICD10 Edition 5 XML](https://isd.digital.nhs.uk/trud/users/guest/filters/0/categories/28/items/259/releases) + - [OPCS-4.10 Data Files](https://isd.digital.nhs.uk/trud/users/guest/filters/0/categories/10/items/119/releases) + + After subscribing, you'll receive an API key once your request is approved (usually within 24 hours). + +4. **Get TRUD API KEY**: Copy your API key from [NHS TRUD Account Management](https://isd.digital.nhs.uk/trud/users/authenticated/filters/0/account/manage) and store it securely. + +5. **Add TRUD API KEY to as an environment variable** + + To set the environment variable temporarily (for the current session), run: -Go to your [NHS TRUD Account Management](https://isd.digital.nhs.uk/trud/users/authenticated/filters/0/account/manage) and copy you api key to a safe place, e.g. a personnal key store. The api key is required by ACMC tools to download TRUD resources. + On macOS/Linux: -**5. Download and install TRUD resources** + ```bash + export ACMC_TRUD_API_KEY="your_api_key_here" + ``` -Execute the following script to download, install and process TRUD resources + On Windows (Command Prompt or PowerShell): -`python acmc.py trud install --key <API_KEY>`. + ```bash + setx ACMC_TRUD_API_KEY "your_api_key_here" + ``` -Processed TRUD resources are saved as `.parquet` files in the `build/maps/processed/` directory. +4. **Download and Install TRUD Resources**: -*Note: NHS TRUD defines one-way mappings and does <b>NOT ADVISE</b> reversing the mappings. If you still wish to reverse these into two-way mappings, duplicate the given `.parquet` table and reverse the filename (e.g. `read2_code_to_snomed_code.parquet` to `snomed_code_to_read2_code.parquet`)* + Run the following acmc command to download and process the TRUD resources: -**6. Optional: Install OMOP Database:** + ```bash + acmc trud install + ``` + Processed resources will be saved in the `build/maps/processed/` directory. -ACMC optionally supports outputting coding lists in structured OMOP database. To do this you will need to register with [Athena](https://athena.ohdsi.org/auth/login?forceSSO=true) and then download the following vocabularies manually from [Athena OHDSI](https://athena.ohdsi.org/vocabulary/list). +*Note: NHS TRUD provides one-way mappings. To reverse mappings, duplicate the `.parquet` file and reverse the filename (e.g., `read2_code_to_snomed_code.parquet` to `snomed_code_to_read2_code.parquet`).* -* Required vocabularies include: - * 1) SNOMED - * 2) ICD9CM - * 17) Readv2 - * 21) ATC - * 55) OPCS4 - * 57) HES Specialty - * 70) ICD10CM - * 75) dm+d - * 144) UK Biobank - * 154) NHS Ethnic Category - * 155) NHS Place of Service +### Install OMOP Vocabularies -The vocabularies will not be available immediately, you will be notified by email when they are ready. This process cannot be automated due to the way that Athena delivers vocabularies for download. +1. Register with [Athena](https://athena.ohdsi.org/auth/login) -* Un-zip the downloaded folder and copy it's path. +2. Download vocabularies [Athena OHDSI](https://athena.ohdsi.org/vocabulary/list). -* Install vocabularies using the following command: + * Required vocabularies include: + * 1) SNOMED + * 2) ICD9CM + * 17) Readv2 + * 21) ATC + * 55) OPCS4 + * 57) HES Specialty + * 70) ICD10CM + * 75) dm+d + * 144) UK Biobank + * 154) NHS Ethnic Category + * 155) NHS Place of Service -`python acmc.py omop install -f <Path to extracted OMOP downloads folder>` + You will be notified by email with a vocabularies version number and link to download a zip file of OMOP database tables in CSV format + +3. Un-zip the OMOP file + + Create a directory where you want the OMOP CSV tables to be stored, the default from the current working directory is ./build/omop + + Unzip the OMOP files into that directory + +5. Install OMOP vocabularies + + Run the following acmc command to create a local OMOP database from the download: + + ```bash + acmc omop install -d <Directory path to extracted OMOP downloads> -v <release version from email> + ``` ## Defining phenotypes @@ -283,100 +313,146 @@ Need to split column into multiple columns, so only one code type per column. **<b>Large Code lists</b> with numerous phenotypes (e.g. Ho et al), require lots of JSON to be generated. See the "Ho generate JSON" section in process_codes_WP.ipynb for example code to generate* -## Usage - ACMC Command-Line Tool - ## Usage -The tool follows a structured command system: + +The `acmc` command-line tool provides various commands to interact with TRUD, OMOP, and Phenotype data. Below are the usage details for each command. + +### General Syntax ```bash -python acmc.py <command> <subcommand> [options] +acmc [OPTIONS] COMMAND [SUBCOMMAND] [ARGUMENTS] ``` -### Available Commands -- **`trud`** – Manage TRUD components -- **`omop`** – Manage OMOP codes and database -- **`map`** – Process mapping configurations +Where: +- `[OPTIONS]` are global options that apply to all commands (e.g., `--debug`). +- `[COMMAND]` is the top-level command (e.g., `trud`, `omop`, `phen`). +- `[SUBCOMMAND]` refers to the specific operation within the command (e.g., `install`, `validate`). ---- +### Global Options -## TRUD Command -### Install TRUD Components -```bash -acmc trud install -k <TRUD_API_KEY> -``` -**Options:** -- `-k, --api-key` _(required)_ – TRUD API key +- `--debug`: Enable debug mode for more verbose logging. ---- +### Commands -## OMOP Commands -### Install OMOP Codes -```bash -acmc omop install -f <OMOP_FOLDER_PATH> -``` -**Options:** -- `-f, --omop-folder` _(required)_ – Path to extracted OMOP downloads folder +#### TRUD Command -### Clear OMOP Data -```bash -acmc omop clear -``` -_Removes OMOP data from the database._ +The `trud` command is used for installing NHS TRUD vocabularies. -### Delete OMOP Database -```bash -acmc omop delete -``` -_Deletes the entire OMOP database._ +- **Install TRUD** ---- + Install clinically assurred TRUD medical code mappings: -## MAP Commands -### Process Phenotype Configuration -```bash -acmc map process -c <CONFIG_FILE> -s <SOURCE_CODES_DIR> -o <OUTPUT_DIR> -t <TARGET_CODING> [options] -``` + ```bash + acmc trud install + ``` -**Required Options:** -- `-c, --config-file` – Path to the phenotype configuration file -- `-s, --source-codes-dir` – Root directory of source codes -- `-o, --output-dir` – Directory for CSV or OMOP database output -- `-t, --target-coding` – Target coding system _(choices: read2, read3, icd10, snomed, opcs4)_ +#### OMOP Command -**Optional Flags:** -- `-tr, --translate` – Enable code translation (default: disabled) -- `-v, --verify` – Enable code verification (default: disabled) +The `omop` command is used for installing OMOP vocabularies. -**Optional Arguments:** -- `-l, --error-log` – Filepath to save error log (default: `error.csv`) +- **Install OMOP** ---- + Install vocabularies in a local OMOP database: -## Examples -### Install TRUD Components -```bash -acmc trud install -k my-trud-api-key -``` + ```bash + acmc omop install -d <OMOP_DIRECTORY_PATH> -v <OMOP_VERSION> + ``` -### Install OMOP Codes -```bash -acmc omop install -f /path/to/omop -``` + - `-d`, `--omop-dir`: (Optional) Directory path to extracted OMOP downloads, default is `./build/omop` + - `-v`, `--version`: OMOP vocabularies release version. -### Process Mapping Configuration with Read2 Target Coding -```bash -acmc map process -c config.json -s /data/source -o /data/output -t read2 --translate --verify -``` +- **Clear OMOP** -## License -MIT License + Clear data from the local OMOP database: -## Support -For issues, open a ticket in the repository or contact support@example.com. + ```bash + acmc omop clear + ``` + +- **Delete OMOP** + + Delete the local OMOP database: + + ```bash + acmc omop delete + ``` + +#### PHEN Command + +The `phen` command is used phenotype-related operations. + +- **Initialize Phenotype** + + Initialize a phenotype directory locally or from a remote git repository: + + ```bash + acmc phen init -d <PHENOTYPE_DIRECTORY> -r <REMOTE_URL> + ``` + + - `-d`, `--phen-dir`: (Optional) Directory to write phenotype configuration (the default is ./build/phen). + - `-r`, `--remote_url`: (Optional) URL to a remote git repository. + +- **Validate Phenotype** + + Validate the phenotype configuration: + + ```bash + acmc phen validate -d <PHENOTYPE_DIRECTORY> + ``` + + - `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen). + +- **Map Phenotype** + + Process phenotype mapping and specify the target coding and output format: + ```bash + acmc phen map -d <PHENOTYPE_DIRECTORY> -t <TARGET_CODING> -o <OUTPUT_FORMAT> + ``` + - `-t`, `--target-coding`: Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`). + - `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen). + - `-o`, `--output`: Output format(s) (`csv`, `omop`, or both), default is 'csv'. +- **Publish Phenotype Configuration** + Publish a phenotype configuration, committing all changes and tagging with a new version number. If the phenotype has been initialised from a remote git URL, then the commit and new version tag will be pushed to the remote repo: + + ```bash + acmc phen publish -d <PHENOTYPE_DIRECTORY> + ``` + + - `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen). + +- **Copy Phenotype Configuration** + + Copy a phenotype configuration from a source directory to a target directory at a specific version. This is used when wanting to compare versions of phenotypes using the `acmc phen diff` command: + + ```bash + acmc phen copy -d <PHENOTYPE_DIRECTORY> -td <TARGET_DIRECTORY> -v <PHENOTYPE_VERSION> + ``` + + - `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen). + - `-td`, `--target-dir`: (Optional) Directory to copy the phenotype configuration to, (the default is ./build). + - `-v`, `--version`: The phenotype version to copy. + +- **Compare Phenotype Configurations** + + Compare a a new phenotype version with pervious version of a phenotype: + + ```bash + acmc phen diff -d <NEW_PHENOTYPE_DIRECTORY> -old <OLD_PHENOTYPE_DIRECTORY> + ``` + + - `-d`, `--phen-dir`: (Optional) Directory of current phenotype configuration (the default is ./build/phen). + - `-old`, `--phen-dir-old`: (Required) Directory of old phenotype version) + + +## License +MIT License + +## Support +For issues, open a ticket in the repository or contact support@example.com. ## Contributing diff --git a/acmc/main.py b/acmc/main.py index 3ddb2a0..3f544ea 100644 --- a/acmc/main.py +++ b/acmc/main.py @@ -110,13 +110,31 @@ def main(): # phen map phen_map_parser = phen_subparsers.add_parser("map", help="Process phen mapping") - phen_map_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory") - phen_map_parser.add_argument("-t", "--target-coding", required=True, choices=['read2', 'read3', 'icd10', 'snomed', 'opcs4'], help="Specify the target coding (read2, read3, icd10, snomed, opcs4)") + phen_map_parser.add_argument("-d", + "--phen-dir", + type=str, + default=str(phen.DEFAULT_PHEN_PATH.resolve()), + help="Phenotype directory") + phen_map_parser.add_argument("-t", + "--target-coding", + required=True, + choices=['read2', 'read3', 'icd10', 'snomed', 'opcs4'], + help="Specify the target coding (read2, read3, icd10, snomed, opcs4)") + phen_map_parser.add_argument("-o", + "--output", + choices=["csv", "omop"], + nargs="+", # allows one or more values + default=["csv"], # default to CSV if not specified + help="Specify output format(s): 'csv', 'omop', or both (default: csv)") phen_map_parser.set_defaults(func=phen_map) # phen publish phen_publish_parser = phen_subparsers.add_parser("publish", help="Publish phenotype configuration") - phen_publish_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory") + phen_publish_parser.add_argument("-d", + "--phen-dir", + type=str, + default=str(phen.DEFAULT_PHEN_PATH.resolve()), + help="Phenotype directory") phen_publish_parser.set_defaults(func=phen_publish) # phen copy diff --git a/pyproject.toml b/pyproject.toml index a917394..84e7cf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "acmc" -version = "0.0.1" +version = "0.0.2" authors = [ { name = "Jakub Dylag", email = "j.j.dylag@soton.ac.uk" }, { name = "Michael Boniface", email = "m.j.boniface@soton.ac.uk" } @@ -62,4 +62,4 @@ packages = ["acmc"] dependencies = [ "hatch", "pytest" - ] \ No newline at end of file + ] -- GitLab