From 9df8d5d286de18026eafdd4ea9520b83d3cd74cd Mon Sep 17 00:00:00 2001 From: Michael Boniface <m.j.boniface@soton.ac.uk> Date: Thu, 6 Mar 2025 09:43:09 +0000 Subject: [PATCH] (fix) Changed the versioning of phenotypes to semantic versioning using the last tag in the repo rather than the commit count. The user can specify if they want to increment major, minor or patch when they publish which keeps the versioning simple. Previosuly we used the commit count but that was problematic due to always incrementing the patch versions and if using an existing the commit history could be large. This would especiually be the case when forking a repo where the commit history is retained. Closes #51 --- acmc/main.py | 10 +++++- acmc/phen.py | 74 ++++++++++++++++++++++++++----------------- docs/usage.md | 3 +- examples/config1.yaml | 2 +- examples/config2.yaml | 2 +- examples/config3.yaml | 2 +- pyproject.toml | 3 +- tests/test_acmc.py | 18 +++++------ 8 files changed, 70 insertions(+), 44 deletions(-) diff --git a/acmc/main.py b/acmc/main.py index ec29b5d..5c0b30a 100644 --- a/acmc/main.py +++ b/acmc/main.py @@ -53,7 +53,7 @@ def phen_export(args): def phen_publish(args): """Handle the `phen publish` command.""" - phen.publish(args.phen_dir, args.msg, args.remote_url) + phen.publish(args.phen_dir, args.msg, args.remote_url, args.increment) def phen_copy(args): @@ -203,6 +203,14 @@ def main(): default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype workspace directory", ) + phen_publish_parser.add_argument( + "-i", + "--increment", + type=str, + default=phen.DEFAULT_VERSION_INC, + choices=phen.SEMANTIC_VERSION_TYPES, + help=f"Version increment: {phen.SEMANTIC_VERSION_TYPES}, default is {phen.DEFAULT_VERSION_INC} increment", + ) phen_publish_parser.add_argument( "-m", "--msg", help="Message to include with the published version" ) diff --git a/acmc/phen.py b/acmc/phen.py index 86675bf..d4a95d0 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -11,6 +11,7 @@ import re import logging import requests import yaml +import semver from cerberus import Validator from deepdiff import DeepDiff from pathlib import Path @@ -37,6 +38,8 @@ OMOP_PATH = Path(CONCEPT_SET_DIR) / "omop" DEFAULT_PHEN_DIR_LIST = [CONCEPTS_DIR, MAP_DIR, CONCEPT_SET_DIR] CONFIG_FILE = "config.yaml" VOCAB_VERSION_FILE = "vocab_version.yaml" +SEMANTIC_VERSION_TYPES = ["major", "minor", "patch"] +DEFAULT_VERSION_INC = "patch" DEFAULT_GIT_BRANCH = "main" @@ -58,7 +61,7 @@ CONFIG_SCHEMA = { "version": { "type": "string", "required": True, - "regex": r"^v\d+\.\d+\.\d+$", # Enforces 'vN.N.N' format + "regex": r"^\d+\.\d+\.\d+$", # Enforces 'vN.N.N' format }, "omop": { "type": "dict", @@ -258,15 +261,10 @@ def init(phen_dir, remote_url): for d in DEFAULT_PHEN_DIR_LIST: create_empty_git_dir(phen_path / d) - # set initial version based on the number of commits in the repo, depending on how the repo was created - # e.g., with a README.md, then there will be some initial commits before the phen config is added - next_commit_count = commit_count + 1 - initial_version = f"v1.0.{next_commit_count}" - # create empty phen config file config = { "phenotype": { - "version": initial_version, + "version": "0.0.0", "omop": { "vocabulary_id": "", "vocabulary_name": "", @@ -365,7 +363,7 @@ def validate(phen_dir): code_types = parse.CodeTypeParser().code_types # check the version number is of the format vn.n.n - match = re.match(r"v(\d+\.\d+\.\d+)", phenotype["version"]) + match = re.match(r"(\d+\.\d+\.\d+)", phenotype["version"]) if not match: validation_errors.append( f"Invalid version format in configuration file: {phenotype['version']}" @@ -840,7 +838,35 @@ def map_target_code_type(phen_path, phenotype, target_code_type): logger.info(f"Phenotype processed target code type {target_code_type}") -def publish(phen_dir, msg, remote_url): +def generate_version_tag(repo, increment=DEFAULT_VERSION_INC, use_v_prefix=False): + # Get all valid semantic version tags + versions = [] + for tag in repo.tags: + tag_name = ( + tag.name.lstrip("v") if use_v_prefix else tag.name + ) # Remove 'v' if needed + if semver.Version.is_valid(tag_name): + versions.append(semver.Version.parse(tag_name)) + + # Determine the next version + if not versions: + new_version = semver.Version(0, 0, 1) + else: + latest_version = max(versions) + if increment == "major": + new_version = latest_version.bump_major() + elif increment == "minor": + new_version = latest_version.bump_minor() + else: + new_version = latest_version.bump_patch() + + # Create the new tag + new_version_str = f"v{new_version}" if use_v_prefix else str(new_version) + + return new_version_str + + +def publish(phen_dir, msg, remote_url, increment=DEFAULT_VERSION_INC): """Publishes updates to the phenotype by commiting all changes to the repo directory""" # Validate config @@ -862,21 +888,16 @@ def publish(phen_dir, msg, remote_url): logger.info("Nothing to publish, no changes to the repo") return - # get major version from configuration file + # get next version + new_version_str = generate_version_tag(repo, increment) + logger.info(f"New version: {new_version_str}") + + # Write version in configuration file config_path = phen_path / CONFIG_FILE with config_path.open("r") as file: config = yaml.safe_load(file) - match = re.match(r"v(\d+\.\d+)", config["phenotype"]["version"]) - major_version = match.group(1) - - # get latest minor version from git commit count - commit_count = len(list(repo.iter_commits("HEAD"))) - # set version and write to config file so consistent with repo version - next_minor_version = commit_count + 1 - version = f"v{major_version}.{next_minor_version}" - logger.debug(f"New version: {version}") - config["phenotype"]["version"] = version + config["phenotype"]["version"] = new_version_str with open(config_path, "w") as file: yaml.dump( config, @@ -887,18 +908,13 @@ def publish(phen_dir, msg, remote_url): default_style='"', ) - # Add and commit changes to repo + # Add and commit changes to repo including version updates commit_message = f"Committing updates to phenotype {phen_path}" repo.git.add("--all") repo.index.commit(commit_message) - # Create and push the tag - if version in repo.tags: - raise Exception(f"Tag {version} already exists in repo {phen_path}") - if msg is None: - msg = f"Release {version}" - repo.create_tag(version, message=msg) - logger.info(f"New version: {version}") + # Add tag to the repo + repo.create_tag(new_version_str) # push to origin if a remote repo if remote_url is not None and "origin" not in repo.remotes: @@ -916,7 +932,7 @@ def publish(phen_dir, msg, remote_url): else: logger.debug("Remote 'origin' is not set") except Exception as e: - repo.delete_tag(version) + repo.delete_tag(new_version_str) repo.git.reset("--soft", "HEAD~1") raise e diff --git a/docs/usage.md b/docs/usage.md index b364849..e042826 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -108,7 +108,8 @@ The `phen` command is used phenotype-related operations. ``` - `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen). - - `-m`, `--msg`: (Optional) Message to include with the published version- + - `-i`, `--increment`: (Optional) Version increment: `major`, `minor`, or `patch`, default is `patch` increment + - `-m`, `--msg`: (Optional) Message to include with the published version - `-r`, `--remote_url`: (Optional) URL to a remote git repository, only supports an empty repo without existing commits. - **Copy Phenotype Configuration** diff --git a/examples/config1.yaml b/examples/config1.yaml index 09d0e80..2709d3f 100644 --- a/examples/config1.yaml +++ b/examples/config1.yaml @@ -1,5 +1,5 @@ phenotype: - version: "v1.0.1" + version: "0.0.0" omop: vocabulary_id: "ACMC_Example_1" vocabulary_name: "ACMC example 1 phenotype" diff --git a/examples/config2.yaml b/examples/config2.yaml index 4c6252e..4a9ad79 100644 --- a/examples/config2.yaml +++ b/examples/config2.yaml @@ -1,5 +1,5 @@ phenotype: - version: "v1.0.1" + version: "0.0.0" omop: vocabulary_id: "ACMC_Example_2" vocabulary_name: "ACMC example 2 phenotype" diff --git a/examples/config3.yaml b/examples/config3.yaml index 764d7d8..2e07427 100644 --- a/examples/config3.yaml +++ b/examples/config3.yaml @@ -1,5 +1,5 @@ phenotype: - version: "v1.0.1" + version: "0.0.0" omop: vocabulary_id: "ACMC_Example_3" vocabulary_name: "ACMC example 3 phenotype" diff --git a/pyproject.toml b/pyproject.toml index c340e83..39f046a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,8 @@ dependencies = [ "tables", "pytest", "pyyaml", - "requests", + "requests", + "semver", "simpledbf", "smmap", "sqlalchemy", diff --git a/tests/test_acmc.py b/tests/test_acmc.py index a9bda40..ee5dcf1 100644 --- a/tests/test_acmc.py +++ b/tests/test_acmc.py @@ -141,7 +141,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file): "-td", str(tmp_dir.resolve()), "-v", - "v1.0.3", + "0.0.1", ], ) main.main() @@ -149,7 +149,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file): # diff phenotype with caplog.at_level(logging.DEBUG): - old_path = tmp_dir / "v1.0.3" + old_path = tmp_dir / "0.0.1" monkeypatch.setattr( sys, "argv", @@ -234,7 +234,7 @@ def test_diff(tmp_dir, monkeypatch, caplog): "-td", str(tmp_dir.resolve()), "-v", - "v1.0.3", + "0.0.1", ], ) main.main() @@ -260,9 +260,9 @@ def test_diff(tmp_dir, monkeypatch, caplog): main.main() assert "Phenotype processed successfully" in caplog.text - # diff phenotype with v1.0.3 + # diff phenotype with 0.0.1 with caplog.at_level(logging.DEBUG): - old_path = tmp_dir / "v1.0.3" + old_path = tmp_dir / "0.0.1" monkeypatch.setattr( sys, "argv", @@ -280,7 +280,7 @@ def test_diff(tmp_dir, monkeypatch, caplog): assert "Phenotypes diff'd successfully" in caplog.text # check changes - with open(phen_path / "v1.0.3_diff.md", "r") as file: + with open(phen_path / "0.0.1_diff.md", "r") as file: content = file.read() assert "Removed concepts ['ABDO_PAIN']" in content assert "Added concepts ['DID_NOT_ATTEND']" in content @@ -306,9 +306,9 @@ def test_diff(tmp_dir, monkeypatch, caplog): main.main() assert "Phenotype processed successfully" in caplog.text - # diff phenotype with v1.0.3 + # diff phenotype with 0.0.1 with caplog.at_level(logging.DEBUG): - old_path = tmp_dir / "v1.0.3" + old_path = tmp_dir / "0.0.1" monkeypatch.setattr( sys, "argv", @@ -325,7 +325,7 @@ def test_diff(tmp_dir, monkeypatch, caplog): main.main() assert "Phenotypes diff'd successfully" in caplog.text - with open(phen_path / "v1.0.3_diff.md", "r") as file: + with open(phen_path / "0.0.1_diff.md", "r") as file: content = file.read() assert "Removed concepts ['ABDO_PAIN']" in content assert "Added concepts ['DEPRESSION', 'DID_NOT_ATTEND', 'HYPERTENSION']" in content -- GitLab