diff --git a/.gitignore b/.gitignore index 753f3431d1698c58226483087102f4a343adf283..e385af1d1f474f6dec80537cd2136e03b6bf3c68 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ workspace/* v[0-9]*.[0-9]*.[0-9]*/ *output* *.log +.acmc/* diff --git a/README.md b/README.md index 2175c98051a9edc3ab93c8947c6cab233a955fe4..bae697d3b11404af3e2eb3a3ca7ab06e8e9de82f 100644 --- a/README.md +++ b/README.md @@ -178,7 +178,7 @@ Expected Output: From the command prompt, copy medical code lists `/examples/codes`to the phenotype code directory: ```bash -cp -r ./examples/codes/* ./workspace/phen/codes +cp -r ./examples/concepts/* ./workspace/phen/concepts ``` - You can view the source code list here [`res176-abdominal-pain.csv`](.//examples/codes/clinical-codes-org/Symptom%20code%20lists/Abdominal%20pain/res176-abdominal-pain.csv) @@ -215,7 +215,7 @@ Expected Output: Use the following `acmc` command to generate the phenotype in `read2` format: ```bash -acmc phen map +acmc phen map -t read2 ``` Expected Output: @@ -224,9 +224,10 @@ Expected Output: [INFO] - Processing phenotype: <path>/concepts-processing/workspace/phen [INFO] - Validating phenotype: <path>/concepts-processing/workspace/phen [INFO] - Phenotype validated successfully -[INFO] - Processing read2 codes... +[INFO] - Processing read2 codes for <path>/concepts-processing/workspace/phen/concepts/clinical-codes-org/Symptom code lists/Abdominal pain/res176-abdominal-pain.csv [INFO] - Converting to target code type read2 [INFO] - Saved mapped concepts to <path>/concepts-processing/workspace/phen/map/read2.csv +[INFO] - Phenotype processed target code type read2 [INFO] - Phenotype processed successfully ``` @@ -243,18 +244,18 @@ acmc phen publish Expected Output: ```bash -[INFO] - Validating phenotype: /home/mjbonifa/datahdd/brcbat/derived_datasets/mjbonifa/concepts-processing/workspace/phen +[INFO] - Validating phenotype: <path>/concepts-processing/workspace/phen [INFO] - Phenotype validated successfully -[INFO] - New version: v1.0.3 +[INFO] - New version: 0.0.1 [INFO] - Phenotype published successfully ``` -7. **Generate phenotype in SNOWMED code format** +7. **Generate phenotype in Read3 code format** -Generate the phenotype in `snomed` format: +Generate the phenotype in `read3` format: ```bash -acmc phen map -t snomed +acmc phen map -t read3 ``` Expected Output: @@ -263,40 +264,21 @@ Expected Output: [INFO] - Processing phenotype: <path>/concepts-processing/workspace/phen [INFO] - Validating phenotype: <path>/concepts-processing/workspace/phen [INFO] - Phenotype validated successfully -[INFO] - Processing read2 codes... -[INFO] - Converting to target code type snomed -[INFO] - Saved mapped concepts to <path>/concepts-processing/workspace/phen/map/snomed.csv +[INFO] - Processing read2 codes for <path>/concepts-processing/workspace/phen/concepts/clinical-codes-org/Symptom code lists/Abdominal pain/res176-abdominal-pain.csv +[INFO] - Converting to target code type read3 +[INFO] - Saved mapped concepts to <path>/concepts-processing/workspace/phen/map/read3.csv +[INFO] - Phenotype processed target code type read3 [INFO] - Phenotype processed successfully ``` The concept sets translating read2 to snomed will be stored in acmc CSV format in `./workspace/phen/concept-set/snomed/`, e.g. `./workspace/phen/concept-set/snomed/ABDO_PAIN.csv` -8. **Get a copy of the previous version from the repo** - - Use the following `acmc` command to retrieve a copy of the previous version (`v1.0.3`) from the repository: - -```bash -acmc phen copy -v v1.0.3 -``` - -Expected Output: - -```bash -[INFO] - Validating phenotype: <path>/concepts-processing/workspace/phen -[INFO] - Phenotype validated successfully -[INFO] - Copying repo <path>/concepts-processing/workspace/phen to <path>/concepts-processing/workspace/v1.0.3 -[INFO] - Checking out version v1.0.3... -[INFO] - Phenotype copied successfully -``` - -A copy of the phenotype will be created in the directory `./workspace/v1.0.3` +8. **Compare the previous version `0.0.1` with the latest version** -9. **Compare the previous version `v1.0.3` with the latest version** - - Use the following `acmc` command to compare the previous version `v1.0.3` with the latest version in the repository: + Use the following `acmc` command to compare the previous version `0.0.1` with the latest version in the workspace phen directory: ```bash -acmc phen diff -old ./workspace/v1.0.3/ +acmc phen diff -ov 0.0.1 ``` Expected Output: @@ -309,14 +291,14 @@ Expected Output: [INFO] - Phenotypes diff'd successfully ``` -A report comparing the phenotype versions will be created in the workspace called './workspace/phen/v1.0.3_diff.md' +A report comparing the phenotype versions will be created in the workspace called './workspace/phen/latest_0.0.1_diff.md' -10. **Publish the phenotype at the next version** +9. **Publish the phenotype at a major version** - Use the following `acmc` command to publish the phenotype at the next version: + Use the following `acmc` command to publish the phenotype at a major version: ```bash -acmc phen publish +acmc phen publish -i major ``` Expected Output: @@ -324,7 +306,7 @@ Expected Output: ```bash [INFO] - Validating phenotype: /home/mjbonifa/datahdd/brcbat/derived_datasets/mjbonifa/concepts-processing/workspace/phen [INFO] - Phenotype validated successfully -[INFO] - New version: v1.0.4 +[INFO] - New version: 1.0.0 [INFO] - Phenotype published successfully ``` diff --git a/acmc/main.py b/acmc/main.py index 5c0b30a95b92715e625ff712420d147adc801542..64d8ed5cbaf14694e190539d713b848b2733bdfc 100644 --- a/acmc/main.py +++ b/acmc/main.py @@ -63,7 +63,7 @@ def phen_copy(args): def phen_diff(args): """Handle the `phen diff` command.""" - phen.diff(args.phen_dir, args.phen_dir_old) + phen.diff(args.phen_dir, args.version, args.old_phen_dir, args.old_version) def main(): @@ -255,13 +255,26 @@ def main(): "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), - help="Directory for the new phenotype version", + help="Directory for the changed phenotype version, defaults to workspace directory", ) phen_diff_parser.add_argument( - "-old", - "--phen-dir-old", + "-v", + "--version", + default="latest", + help="Phenotype version to compare with an old version, defaults to the HEAD of the workspace directory", + ) + phen_diff_parser.add_argument( + "-od", + "--old-phen-dir", + type=str, + default=str(phen.DEFAULT_PHEN_PATH.resolve()), + help="Directory for the old phenotype version, defaults to workspace directory", + ) + phen_diff_parser.add_argument( + "-ov", + "--old-version", required=True, - help="Directory of the old phenotype version that is compared to the new one", + help="Old phenotype version to compare with the changed version", ) phen_diff_parser.set_defaults(func=phen_diff) diff --git a/acmc/omop.py b/acmc/omop.py index b9761913667b3da763e0d311160e32f3ace12e14..a66607463beb8f16e7a1c8b55facfba6440bd887 100644 --- a/acmc/omop.py +++ b/acmc/omop.py @@ -312,7 +312,7 @@ def export(map_path, export_path, version, omop_metadata): # Get the list of all tables cur.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = cur.fetchall() # List of tables - + # Export each table to a separate CSV file for table in tables: table_name = table[0] diff --git a/acmc/phen.py b/acmc/phen.py index f2111ac7c0fb311fb64b64a973cefc75aea84b13..03df2d4647e7689aa600b2cc31dbcd816ccaea0f 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -6,6 +6,7 @@ import os import sqlite3 import sys import shutil +import time import git import re import logging @@ -1187,19 +1188,19 @@ def diff_map_files(old_map_path, new_map_path): return report -def diff(phen_dir, phen_old_dir): +def diff_phen(new_phen_path, new_version, old_phen_path, old_version, report_path): """Compare the differences between two versions of a phenotype""" # validate phenotypes - validate(phen_old_dir) - validate(phen_dir) + logger.debug(f"Validating for diff old path: {str(old_phen_path.resolve())}") + validate(str(old_phen_path.resolve())) + logger.debug(f"Validating for diff new path: {str(new_phen_path.resolve())}") + validate(str(new_phen_path.resolve())) # get old and new config - old_phen_path = Path(phen_old_dir) old_config = old_phen_path / CONFIG_FILE with old_config.open("r") as file: old_config = yaml.safe_load(file) - new_phen_path = Path(phen_dir) new_config = new_phen_path / CONFIG_FILE with new_config.open("r") as file: new_config = yaml.safe_load(file) @@ -1208,11 +1209,11 @@ def diff(phen_dir, phen_old_dir): report = f"# Phenotype Comparison Report\n" report += f"## Original phenotype\n" report += f" - {old_config['phenotype']['omop']['vocabulary_id']}\n" - report += f" - {old_config['phenotype']['version']}\n" + report += f" - {old_version}\n" report += f" - {str(old_phen_path.resolve())}\n" report += f"## Changed phenotype:\n" report += f" - {new_config['phenotype']['omop']['vocabulary_id']}\n" - report += f" - {new_config['phenotype']['version']}\n" + report += f" - {new_version}\n" report += f" - {str(new_phen_path.resolve())}\n" # Step 1: check differences configuration files @@ -1226,11 +1227,75 @@ def diff(phen_dir, phen_old_dir): report += diff_map_files(old_map_path, new_map_path) # initialise report file - report_file_name = old_phen_path.name + "_diff.md" - report_path = new_phen_path / report_file_name logger.debug(f"Writing to report file {str(report_path.resolve())}") report_file = open(report_path, "w") report_file.write(report) report_file.close() logger.info(f"Phenotypes diff'd successfully") + + +def diff(phen_dir, version, old_phen_dir, old_version): + # make tmp directory .acmc + timestamp = time.strftime("%Y%m%d_%H%M%S") + temp_dir = Path(f".acmc/diff_{timestamp}") + + changed_phen_path = Path(phen_dir) + if not changed_phen_path.exists(): + raise ValueError( + f"Changed phenotype directory does not exist: {str(changed_phen_path.resolve())}" + ) + + old_phen_path = Path(old_phen_dir) + if not old_phen_path.exists(): + raise ValueError( + f"Old phenotype directory does not exist: {str(old_phen_path.resolve())}" + ) + + try: + # Create the directory + temp_dir.mkdir(parents=True, exist_ok=True) + logger.debug(f"Temporary directory created: {temp_dir}") + + # Create temporary directories + changed_path = temp_dir / "changed" + changed_path.mkdir(parents=True, exist_ok=True) + old_path = temp_dir / "old" + old_path.mkdir(parents=True, exist_ok=True) + + # checkout changed + if version == "latest": + logger.debug( + f"Copying changed repo from {phen_dir} into {changed_path} at version {version}..." + ) + shutil.copytree(changed_phen_path, changed_path, dirs_exist_ok=True) + else: + logger.debug( + f"Cloning changed repo from {phen_dir} into {changed_path} at version {version}..." + ) + changed_repo = git.Repo.clone_from(changed_phen_path, changed_path) + changed_repo.git.checkout(version) + + # checkout old + if old_version == "latest": + logger.debug( + f"Copying old repo from {old_phen_dir} into {old_path} at version {old_version}..." + ) + shutil.copytree(old_phen_path, old_path, dirs_exist_ok=True) + else: + logger.debug( + f"Cloning old repo from {old_phen_dir} into {old_path} at version {old_version}..." + ) + old_repo = git.Repo.clone_from(old_phen_dir, old_path) + old_repo.git.checkout(old_version) + + report_filename = f"{version}_{old_version}_diff.md" + report_path = changed_phen_path / report_filename + # diff old with new + diff_phen(changed_path, version, old_path, old_version, report_path) + + finally: + # clean up tmp directory + if temp_dir.exists(): + shutil.rmtree(temp_dir) + print(f"Temporary directory removed: {temp_dir}") diff --git a/docs/usage.md b/docs/usage.md index e0428261f5f8e49a796dd42500a8ae955f904151..bbc7ab369b1fdd96d88ff880cc886dc060e154e9 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -132,5 +132,8 @@ The `phen` command is used phenotype-related operations. acmc phen diff -d <NEW_PHENOTYPE_DIRECTORY> -old <OLD_PHENOTYPE_DIRECTORY> ``` - - `-d`, `--phen-dir`: (Optional) Directory of current phenotype configuration (the default is ./build/phen). - - `-old`, `--phen-dir-old`: (Required) Directory of old phenotype version) \ No newline at end of file + - `-d`, `--phen-dir`: (Optional) Directory of changed phenotype, default is `./workspace/phen`. + - `-v`, `--version`: (Optional) Directory of changed phenotype version, default is `latest` which is the current files in the changed phen directory. + - `-od`, `--old-phen-dir`: (Optional) Directory of old phenotype, default is `./workspace/phen`. + - `-ov`, `--old-version`: (Required) Old phenotype version to compare with the chnaged version, default is `latest` which are the current files in the old phen directory. + diff --git a/tests/test_acmc.py b/tests/test_acmc.py index ee5dcf1dbe239e206569991b79540d144e8b681e..c8ece570f855715e2432d371fba9b976b1201b8f 100644 --- a/tests/test_acmc.py +++ b/tests/test_acmc.py @@ -159,8 +159,8 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file): "diff", "-d", str(phen_path.resolve()), - "-old", - str(old_path.resolve()), + "-ov", + "0.0.1", ], ) main.main() @@ -272,15 +272,15 @@ def test_diff(tmp_dir, monkeypatch, caplog): "diff", "-d", str(phen_path.resolve()), - "-old", - str(old_path.resolve()), + "-ov", + "0.0.1", ], ) main.main() assert "Phenotypes diff'd successfully" in caplog.text # check changes - with open(phen_path / "0.0.1_diff.md", "r") as file: + with open(phen_path / "latest_0.0.1_diff.md", "r") as file: content = file.read() assert "Removed concepts ['ABDO_PAIN']" in content assert "Added concepts ['DID_NOT_ATTEND']" in content @@ -318,14 +318,14 @@ def test_diff(tmp_dir, monkeypatch, caplog): "diff", "-d", str(phen_path.resolve()), - "-old", - str(old_path.resolve()), + "-ov", + "0.0.1", ], ) main.main() assert "Phenotypes diff'd successfully" in caplog.text - with open(phen_path / "0.0.1_diff.md", "r") as file: + with open(phen_path / "latest_0.0.1_diff.md", "r") as file: content = file.read() assert "Removed concepts ['ABDO_PAIN']" in content assert "Added concepts ['DEPRESSION', 'DID_NOT_ATTEND', 'HYPERTENSION']" in content