Skip to content
Snippets Groups Projects
Commit 9df8d5d2 authored by mjbonifa's avatar mjbonifa
Browse files

(fix) Changed the versioning of phenotypes to semantic versioning using the...

(fix) Changed the versioning of phenotypes to semantic versioning using the last tag in the repo rather than the commit count. The user can specify if they want to increment major, minor or patch when they publish which keeps the versioning simple. Previosuly we used the commit count but that was problematic due to always incrementing the patch versions and if using an existing the commit history could be large. This would especiually be the case when forking a repo where the commit history is retained. Closes #51
parent 34f5abf4
No related branches found
No related tags found
No related merge requests found
...@@ -53,7 +53,7 @@ def phen_export(args): ...@@ -53,7 +53,7 @@ def phen_export(args):
def phen_publish(args): def phen_publish(args):
"""Handle the `phen publish` command.""" """Handle the `phen publish` command."""
phen.publish(args.phen_dir, args.msg, args.remote_url) phen.publish(args.phen_dir, args.msg, args.remote_url, args.increment)
def phen_copy(args): def phen_copy(args):
...@@ -203,6 +203,14 @@ def main(): ...@@ -203,6 +203,14 @@ def main():
default=str(phen.DEFAULT_PHEN_PATH.resolve()), default=str(phen.DEFAULT_PHEN_PATH.resolve()),
help="Phenotype workspace directory", help="Phenotype workspace directory",
) )
phen_publish_parser.add_argument(
"-i",
"--increment",
type=str,
default=phen.DEFAULT_VERSION_INC,
choices=phen.SEMANTIC_VERSION_TYPES,
help=f"Version increment: {phen.SEMANTIC_VERSION_TYPES}, default is {phen.DEFAULT_VERSION_INC} increment",
)
phen_publish_parser.add_argument( phen_publish_parser.add_argument(
"-m", "--msg", help="Message to include with the published version" "-m", "--msg", help="Message to include with the published version"
) )
......
...@@ -11,6 +11,7 @@ import re ...@@ -11,6 +11,7 @@ import re
import logging import logging
import requests import requests
import yaml import yaml
import semver
from cerberus import Validator from cerberus import Validator
from deepdiff import DeepDiff from deepdiff import DeepDiff
from pathlib import Path from pathlib import Path
...@@ -37,6 +38,8 @@ OMOP_PATH = Path(CONCEPT_SET_DIR) / "omop" ...@@ -37,6 +38,8 @@ OMOP_PATH = Path(CONCEPT_SET_DIR) / "omop"
DEFAULT_PHEN_DIR_LIST = [CONCEPTS_DIR, MAP_DIR, CONCEPT_SET_DIR] DEFAULT_PHEN_DIR_LIST = [CONCEPTS_DIR, MAP_DIR, CONCEPT_SET_DIR]
CONFIG_FILE = "config.yaml" CONFIG_FILE = "config.yaml"
VOCAB_VERSION_FILE = "vocab_version.yaml" VOCAB_VERSION_FILE = "vocab_version.yaml"
SEMANTIC_VERSION_TYPES = ["major", "minor", "patch"]
DEFAULT_VERSION_INC = "patch"
DEFAULT_GIT_BRANCH = "main" DEFAULT_GIT_BRANCH = "main"
...@@ -58,7 +61,7 @@ CONFIG_SCHEMA = { ...@@ -58,7 +61,7 @@ CONFIG_SCHEMA = {
"version": { "version": {
"type": "string", "type": "string",
"required": True, "required": True,
"regex": r"^v\d+\.\d+\.\d+$", # Enforces 'vN.N.N' format "regex": r"^\d+\.\d+\.\d+$", # Enforces 'vN.N.N' format
}, },
"omop": { "omop": {
"type": "dict", "type": "dict",
...@@ -258,15 +261,10 @@ def init(phen_dir, remote_url): ...@@ -258,15 +261,10 @@ def init(phen_dir, remote_url):
for d in DEFAULT_PHEN_DIR_LIST: for d in DEFAULT_PHEN_DIR_LIST:
create_empty_git_dir(phen_path / d) create_empty_git_dir(phen_path / d)
# set initial version based on the number of commits in the repo, depending on how the repo was created
# e.g., with a README.md, then there will be some initial commits before the phen config is added
next_commit_count = commit_count + 1
initial_version = f"v1.0.{next_commit_count}"
# create empty phen config file # create empty phen config file
config = { config = {
"phenotype": { "phenotype": {
"version": initial_version, "version": "0.0.0",
"omop": { "omop": {
"vocabulary_id": "", "vocabulary_id": "",
"vocabulary_name": "", "vocabulary_name": "",
...@@ -365,7 +363,7 @@ def validate(phen_dir): ...@@ -365,7 +363,7 @@ def validate(phen_dir):
code_types = parse.CodeTypeParser().code_types code_types = parse.CodeTypeParser().code_types
# check the version number is of the format vn.n.n # check the version number is of the format vn.n.n
match = re.match(r"v(\d+\.\d+\.\d+)", phenotype["version"]) match = re.match(r"(\d+\.\d+\.\d+)", phenotype["version"])
if not match: if not match:
validation_errors.append( validation_errors.append(
f"Invalid version format in configuration file: {phenotype['version']}" f"Invalid version format in configuration file: {phenotype['version']}"
...@@ -840,7 +838,35 @@ def map_target_code_type(phen_path, phenotype, target_code_type): ...@@ -840,7 +838,35 @@ def map_target_code_type(phen_path, phenotype, target_code_type):
logger.info(f"Phenotype processed target code type {target_code_type}") logger.info(f"Phenotype processed target code type {target_code_type}")
def publish(phen_dir, msg, remote_url): def generate_version_tag(repo, increment=DEFAULT_VERSION_INC, use_v_prefix=False):
# Get all valid semantic version tags
versions = []
for tag in repo.tags:
tag_name = (
tag.name.lstrip("v") if use_v_prefix else tag.name
) # Remove 'v' if needed
if semver.Version.is_valid(tag_name):
versions.append(semver.Version.parse(tag_name))
# Determine the next version
if not versions:
new_version = semver.Version(0, 0, 1)
else:
latest_version = max(versions)
if increment == "major":
new_version = latest_version.bump_major()
elif increment == "minor":
new_version = latest_version.bump_minor()
else:
new_version = latest_version.bump_patch()
# Create the new tag
new_version_str = f"v{new_version}" if use_v_prefix else str(new_version)
return new_version_str
def publish(phen_dir, msg, remote_url, increment=DEFAULT_VERSION_INC):
"""Publishes updates to the phenotype by commiting all changes to the repo directory""" """Publishes updates to the phenotype by commiting all changes to the repo directory"""
# Validate config # Validate config
...@@ -862,21 +888,16 @@ def publish(phen_dir, msg, remote_url): ...@@ -862,21 +888,16 @@ def publish(phen_dir, msg, remote_url):
logger.info("Nothing to publish, no changes to the repo") logger.info("Nothing to publish, no changes to the repo")
return return
# get major version from configuration file # get next version
new_version_str = generate_version_tag(repo, increment)
logger.info(f"New version: {new_version_str}")
# Write version in configuration file
config_path = phen_path / CONFIG_FILE config_path = phen_path / CONFIG_FILE
with config_path.open("r") as file: with config_path.open("r") as file:
config = yaml.safe_load(file) config = yaml.safe_load(file)
match = re.match(r"v(\d+\.\d+)", config["phenotype"]["version"])
major_version = match.group(1)
# get latest minor version from git commit count
commit_count = len(list(repo.iter_commits("HEAD")))
# set version and write to config file so consistent with repo version config["phenotype"]["version"] = new_version_str
next_minor_version = commit_count + 1
version = f"v{major_version}.{next_minor_version}"
logger.debug(f"New version: {version}")
config["phenotype"]["version"] = version
with open(config_path, "w") as file: with open(config_path, "w") as file:
yaml.dump( yaml.dump(
config, config,
...@@ -887,18 +908,13 @@ def publish(phen_dir, msg, remote_url): ...@@ -887,18 +908,13 @@ def publish(phen_dir, msg, remote_url):
default_style='"', default_style='"',
) )
# Add and commit changes to repo # Add and commit changes to repo including version updates
commit_message = f"Committing updates to phenotype {phen_path}" commit_message = f"Committing updates to phenotype {phen_path}"
repo.git.add("--all") repo.git.add("--all")
repo.index.commit(commit_message) repo.index.commit(commit_message)
# Create and push the tag # Add tag to the repo
if version in repo.tags: repo.create_tag(new_version_str)
raise Exception(f"Tag {version} already exists in repo {phen_path}")
if msg is None:
msg = f"Release {version}"
repo.create_tag(version, message=msg)
logger.info(f"New version: {version}")
# push to origin if a remote repo # push to origin if a remote repo
if remote_url is not None and "origin" not in repo.remotes: if remote_url is not None and "origin" not in repo.remotes:
...@@ -916,7 +932,7 @@ def publish(phen_dir, msg, remote_url): ...@@ -916,7 +932,7 @@ def publish(phen_dir, msg, remote_url):
else: else:
logger.debug("Remote 'origin' is not set") logger.debug("Remote 'origin' is not set")
except Exception as e: except Exception as e:
repo.delete_tag(version) repo.delete_tag(new_version_str)
repo.git.reset("--soft", "HEAD~1") repo.git.reset("--soft", "HEAD~1")
raise e raise e
......
...@@ -108,7 +108,8 @@ The `phen` command is used phenotype-related operations. ...@@ -108,7 +108,8 @@ The `phen` command is used phenotype-related operations.
``` ```
- `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen). - `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen).
- `-m`, `--msg`: (Optional) Message to include with the published version- - `-i`, `--increment`: (Optional) Version increment: `major`, `minor`, or `patch`, default is `patch` increment
- `-m`, `--msg`: (Optional) Message to include with the published version
- `-r`, `--remote_url`: (Optional) URL to a remote git repository, only supports an empty repo without existing commits. - `-r`, `--remote_url`: (Optional) URL to a remote git repository, only supports an empty repo without existing commits.
- **Copy Phenotype Configuration** - **Copy Phenotype Configuration**
......
phenotype: phenotype:
version: "v1.0.1" version: "0.0.0"
omop: omop:
vocabulary_id: "ACMC_Example_1" vocabulary_id: "ACMC_Example_1"
vocabulary_name: "ACMC example 1 phenotype" vocabulary_name: "ACMC example 1 phenotype"
......
phenotype: phenotype:
version: "v1.0.1" version: "0.0.0"
omop: omop:
vocabulary_id: "ACMC_Example_2" vocabulary_id: "ACMC_Example_2"
vocabulary_name: "ACMC example 2 phenotype" vocabulary_name: "ACMC example 2 phenotype"
......
phenotype: phenotype:
version: "v1.0.1" version: "0.0.0"
omop: omop:
vocabulary_id: "ACMC_Example_3" vocabulary_id: "ACMC_Example_3"
vocabulary_name: "ACMC example 3 phenotype" vocabulary_name: "ACMC example 3 phenotype"
......
...@@ -37,7 +37,8 @@ dependencies = [ ...@@ -37,7 +37,8 @@ dependencies = [
"tables", "tables",
"pytest", "pytest",
"pyyaml", "pyyaml",
"requests", "requests",
"semver",
"simpledbf", "simpledbf",
"smmap", "smmap",
"sqlalchemy", "sqlalchemy",
......
...@@ -141,7 +141,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file): ...@@ -141,7 +141,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
"-td", "-td",
str(tmp_dir.resolve()), str(tmp_dir.resolve()),
"-v", "-v",
"v1.0.3", "0.0.1",
], ],
) )
main.main() main.main()
...@@ -149,7 +149,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file): ...@@ -149,7 +149,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
# diff phenotype # diff phenotype
with caplog.at_level(logging.DEBUG): with caplog.at_level(logging.DEBUG):
old_path = tmp_dir / "v1.0.3" old_path = tmp_dir / "0.0.1"
monkeypatch.setattr( monkeypatch.setattr(
sys, sys,
"argv", "argv",
...@@ -234,7 +234,7 @@ def test_diff(tmp_dir, monkeypatch, caplog): ...@@ -234,7 +234,7 @@ def test_diff(tmp_dir, monkeypatch, caplog):
"-td", "-td",
str(tmp_dir.resolve()), str(tmp_dir.resolve()),
"-v", "-v",
"v1.0.3", "0.0.1",
], ],
) )
main.main() main.main()
...@@ -260,9 +260,9 @@ def test_diff(tmp_dir, monkeypatch, caplog): ...@@ -260,9 +260,9 @@ def test_diff(tmp_dir, monkeypatch, caplog):
main.main() main.main()
assert "Phenotype processed successfully" in caplog.text assert "Phenotype processed successfully" in caplog.text
# diff phenotype with v1.0.3 # diff phenotype with 0.0.1
with caplog.at_level(logging.DEBUG): with caplog.at_level(logging.DEBUG):
old_path = tmp_dir / "v1.0.3" old_path = tmp_dir / "0.0.1"
monkeypatch.setattr( monkeypatch.setattr(
sys, sys,
"argv", "argv",
...@@ -280,7 +280,7 @@ def test_diff(tmp_dir, monkeypatch, caplog): ...@@ -280,7 +280,7 @@ def test_diff(tmp_dir, monkeypatch, caplog):
assert "Phenotypes diff'd successfully" in caplog.text assert "Phenotypes diff'd successfully" in caplog.text
# check changes # check changes
with open(phen_path / "v1.0.3_diff.md", "r") as file: with open(phen_path / "0.0.1_diff.md", "r") as file:
content = file.read() content = file.read()
assert "Removed concepts ['ABDO_PAIN']" in content assert "Removed concepts ['ABDO_PAIN']" in content
assert "Added concepts ['DID_NOT_ATTEND']" in content assert "Added concepts ['DID_NOT_ATTEND']" in content
...@@ -306,9 +306,9 @@ def test_diff(tmp_dir, monkeypatch, caplog): ...@@ -306,9 +306,9 @@ def test_diff(tmp_dir, monkeypatch, caplog):
main.main() main.main()
assert "Phenotype processed successfully" in caplog.text assert "Phenotype processed successfully" in caplog.text
# diff phenotype with v1.0.3 # diff phenotype with 0.0.1
with caplog.at_level(logging.DEBUG): with caplog.at_level(logging.DEBUG):
old_path = tmp_dir / "v1.0.3" old_path = tmp_dir / "0.0.1"
monkeypatch.setattr( monkeypatch.setattr(
sys, sys,
"argv", "argv",
...@@ -325,7 +325,7 @@ def test_diff(tmp_dir, monkeypatch, caplog): ...@@ -325,7 +325,7 @@ def test_diff(tmp_dir, monkeypatch, caplog):
main.main() main.main()
assert "Phenotypes diff'd successfully" in caplog.text assert "Phenotypes diff'd successfully" in caplog.text
with open(phen_path / "v1.0.3_diff.md", "r") as file: with open(phen_path / "0.0.1_diff.md", "r") as file:
content = file.read() content = file.read()
assert "Removed concepts ['ABDO_PAIN']" in content assert "Removed concepts ['ABDO_PAIN']" in content
assert "Added concepts ['DEPRESSION', 'DID_NOT_ATTEND', 'HYPERTENSION']" in content assert "Added concepts ['DEPRESSION', 'DID_NOT_ATTEND', 'HYPERTENSION']" in content
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment