diff --git a/acmc/main.py b/acmc/main.py index 64d8ed5cbaf14694e190539d713b848b2733bdfc..72ce3948011c83032d3b8f129236c1995661b02b 100644 --- a/acmc/main.py +++ b/acmc/main.py @@ -36,6 +36,16 @@ def phen_init(args): phen.init(args.phen_dir, args.remote_url) +def phen_fork(args): + """Handle the `phen fork` command.""" + phen.fork( + args.phen_dir, + args.upstream_url, + args.upstream_version, + new_origin_url=args.remote_url, + ) + + def phen_validate(args): """Handle the `phen validate` command.""" phen.validate(args.phen_dir) @@ -135,13 +145,45 @@ def main(): "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), - help="Phenotype workspace directory", + help="(Optional) Local phenotype workspace directory (default is ./workspace/phen).", ) phen_init_parser.add_argument( - "-r", "--remote_url", help="URL to remote git repository" + "-r", + "--remote_url", + help="(Optional) URL to repository where the phenotype will be published.", ) phen_init_parser.set_defaults(func=phen_init) + # phen fork + phen_fork_parser = phen_subparsers.add_parser( + "fork", help="Fork an existing phenotype" + ) + phen_fork_parser.add_argument( + "-d", + "--phen-dir", + type=str, + default=str(phen.DEFAULT_PHEN_PATH.resolve()), + help="(Optional) Local phenotype workspace directory (default is ./workspace/phen).", + ) + phen_fork_parser.add_argument( + "-r", + "--remote_url", + help="(Optional) URL to repository where the forked phenotype will be published.", + ) + phen_fork_parser.add_argument( + "-u", + "--upstream-url", + required=True, + help="(Required) URL to the phenotype repository to fork.", + ) + phen_fork_parser.add_argument( + "-v", + "--upstream-version", + required=True, + help="(Required) Phenotype version to fork.", + ) + phen_fork_parser.set_defaults(func=phen_fork) + # phen validate phen_validate_parser = phen_subparsers.add_parser( "validate", help="Validate phenotype configuration" @@ -151,7 +193,7 @@ def main(): "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), - help="Phenotype workspace directory", + help="(Optional) Local phenotype workspace directory (default is ./workspace/phen).", ) phen_validate_parser.set_defaults(func=phen_validate) @@ -162,7 +204,7 @@ def main(): "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), - help="Phenotype workspace directory", + help="(Optional) Local phenotype workspace directory (default is ./workspace/phen).", ) phen_map_parser.add_argument( "-t", @@ -181,7 +223,7 @@ def main(): "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), - help="Phenotype workspace directory", + help="(Optional) Local phenotype workspace directory (default is ./workspace/phen).", ) phen_export_parser.add_argument( "-v", @@ -201,7 +243,7 @@ def main(): "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), - help="Phenotype workspace directory", + help="(Optional) Local phenotype workspace directory (default is ./workspace/phen).", ) phen_publish_parser.add_argument( "-i", @@ -228,7 +270,7 @@ def main(): "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), - help="Phenotype workspace directory", + help="(Optional) Local phenotype workspace directory (default is ./workspace/phen).", ) phen_copy_parser.add_argument( "-td", @@ -255,7 +297,7 @@ def main(): "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), - help="Directory for the changed phenotype version, defaults to workspace directory", + help="(Optional) Local phenotype workspace directory (default is ./workspace/phen).", ) phen_diff_parser.add_argument( "-v", diff --git a/acmc/phen.py b/acmc/phen.py index 03df2d4647e7689aa600b2cc31dbcd816ccaea0f..39568d559f1e62ad904255c5958a8a8250c6bfbe 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -182,6 +182,84 @@ def check_delete_dir(path, msg): return deleted +def fork(phen_dir, upstream_url, upstream_version, new_origin_url=None): + logger.info( + f"Forking upstream repo {upstream_url} {upstream_version} into directory: {phen_dir}" + ) + + phen_path = Path(phen_dir) + # check if directory already exists and ask user if they want to recreate it + if ( + phen_path.exists() and phen_path.is_dir() + ): # Check if it exists and is a directory + configure = check_delete_dir( + phen_path, + f"The phen directory already exists. Do you want to reinitialise? (yes/no): ", + ) + else: + configure = True + + if not configure: + logger.info(f"Exiting, phenotype not initiatised") + return + + try: + # Clone repo + git_url = construct_git_url(upstream_url) + repo = git.Repo.clone_from(git_url, phen_path) + + # Fetch all branches and tags + repo.remotes.origin.fetch() + + # Check if the version exists + available_refs = [ref.name.split("/")[-1] for ref in repo.references] + if upstream_version not in available_refs: + raise ValueError( + f"Version '{upstream_version}' not found in the repository: {upstream_url}." + ) + + # Checkout the specified version + repo.git.checkout(upstream_version) + main_branch = repo.heads[DEFAULT_GIT_BRANCH] + main_branch.checkout() + + # Check if 'config.yaml' exists in the root directory + config_path = phen_path / "config.yaml" + if not os.path.isfile(config_path): + raise ValueError( + f"The forked repository is not a valid ACMC repo because 'config.yaml' is missing in the root directory." + ) + + # Validate the phenotype is compatible with the acmc tool + validate(phen_path) + + # Delete each tag locally + tags = repo.tags + for tag in tags: + repo.delete_tag(tag) + logger.debug(f"Deleted tags from forked repo: {tag}") + + # Add upstream remote + repo.create_remote("upstream", upstream_url) + repo.delete_remote("origin") # Remove existing origin + + # Optionally set a new origin remote + if new_origin_url: + git_url = construct_git_url(new_origin_url) + repo.create_remote("origin", git_url) + repo.git.push("--set-upstream", "origin", "main") + + logger.info(f"Repository forked successfully at {phen_path}") + logger.info(f"Upstream set to {upstream_url}") + if new_origin_url: + logger.info(f"Origin set to {new_origin_url}") + + except Exception as e: + if phen_path.exists(): + shutil.rmtree(phen_path) + raise ValueError(f"Error occurred during repository fork: {str(e)}") + + def init(phen_dir, remote_url): """Initial phenotype directory as git repo with standard structure""" logger.info(f"Initialising Phenotype in directory: {phen_dir}") @@ -886,8 +964,11 @@ def publish(phen_dir, msg, remote_url, increment=DEFAULT_VERSION_INC): # check if any changes to publish if not repo.is_dirty() and not repo.untracked_files: - logger.info("Nothing to publish, no changes to the repo") - return + if remote_url is not None and "origin" not in repo.remotes: + logger.info(f"First publish to remote url {remote_url}") + else: + logger.info("Nothing to publish, no changes to the repo") + return # get next version new_version_str = generate_version_tag(repo, increment) @@ -919,15 +1000,16 @@ def publish(phen_dir, msg, remote_url, increment=DEFAULT_VERSION_INC): # push to origin if a remote repo if remote_url is not None and "origin" not in repo.remotes: - repo.create_remote("origin", remote_url) + git_url = construct_git_url(remote_url) + repo.create_remote("origin", git_url) try: if "origin" in repo.remotes: - logger.debug(f"Remote 'origin' is already set {repo.remotes.origin.url}") + logger.debug(f"Remote 'origin' is set {repo.remotes.origin.url}") origin = repo.remotes.origin - logger.info(f"Pushing main branch to {repo.remotes.origin.url}") - origin.push("main") - logger.info(f"Pushing tags to {repo.remotes.origin.url}") + logger.info(f"Pushing main branch to remote repo") + repo.git.push("--set-upstream", "origin", "main") + logger.info(f"Pushing version tags to remote git repo") origin.push(tags=True) logger.debug("Changes pushed to 'origin'") else: diff --git a/docs/usage.md b/docs/usage.md index bbc7ab369b1fdd96d88ff880cc886dc060e154e9..168ab9c00bbdd2f7a802f5576f6cdfff92381edc 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -67,16 +67,29 @@ The `omop` command is used for installing OMOP vocabularies. The `phen` command is used phenotype-related operations. -- **Initialize Phenotype** +- **Initialise Phenotype** - Initialize a phenotype directory locally or from a remote git repository: + Initialise a phenotype directory locally or with remote git repository: ```bash acmc phen init -d <PHENOTYPE_DIRECTORY> -r <REMOTE_URL> ``` - - `-d`, `--phen-dir`: (Optional) Directory to write phenotype configuration (the default is ./build/phen). - - `-r`, `--remote_url`: (Optional) URL to a remote git repository. + - `-d`, `--phen-dir`: (Optional) Directory to write phenotype configuration (the default is ./workspace/phen). + - `-r`, `--remote-url`: (Optional) URL to a remote git repository where the phenotype will be published, only supports an empty repo without existing commits. + +- **Fork Existing Phenotype** + + Initialise a phenotype an existing phenotype publish in a git repository: + + ```bash + acmc phen fork -d <PHENOTYPE_DIRECTORY> -r <REMOTE_URL> + ``` + + - `-d`, `--phen-dir`: (Optional) Local phenotype workspace directory (default is ./workspace/phen). + - `-r`, `--remote-url`: (Optional) URL to a remote git repository where the phenotype will be published, only supports an empty repo without existing commits. + - `-u`, `--upstream-url`: (Required) URL to the phenotype repository to fork. + - `-v`, `--upstream-version`: (Required) Phenotype version to fork. - **Validate Phenotype** @@ -86,7 +99,7 @@ The `phen` command is used phenotype-related operations. acmc phen validate -d <PHENOTYPE_DIRECTORY> ``` - - `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen). + - `-d`, `--phen-dir`: (Optional) Local phenotype workspace directory (default is ./workspace/phen). - **Map Phenotype** @@ -97,7 +110,7 @@ The `phen` command is used phenotype-related operations. ``` - `-t`, `--target-coding`: (Optional) Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`). - - `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen). + - `-d`, `--phen-dir`: (Optional) Local phenotype workspace directory (default is ./workspace/phen). - **Publish Phenotype Configuration** @@ -107,10 +120,10 @@ The `phen` command is used phenotype-related operations. acmc phen publish -d <PHENOTYPE_DIRECTORY> ``` - - `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen). + - `-d`, `--phen-dir`: (Optional) Local phenotype workspace directory (default is ./workspace/phen). - `-i`, `--increment`: (Optional) Version increment: `major`, `minor`, or `patch`, default is `patch` increment - `-m`, `--msg`: (Optional) Message to include with the published version - - `-r`, `--remote_url`: (Optional) URL to a remote git repository, only supports an empty repo without existing commits. + - `-r`, `--remote_url`: (Optional) URL to a remote git repository where the phenotype will be published, only supports an empty repo without existing commits. - **Copy Phenotype Configuration** @@ -120,7 +133,7 @@ The `phen` command is used phenotype-related operations. acmc phen copy -d <PHENOTYPE_DIRECTORY> -td <TARGET_DIRECTORY> -v <PHENOTYPE_VERSION> ``` - - `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen). + - `-d`, `--phen-dir`: (Optional) Local phenotype workspace directory (default is ./workspace/phen). - `-td`, `--target-dir`: (Optional) Directory to copy the phenotype configuration to, (the default is ./build). - `-v`, `--version`: The phenotype version to copy. @@ -132,7 +145,7 @@ The `phen` command is used phenotype-related operations. acmc phen diff -d <NEW_PHENOTYPE_DIRECTORY> -old <OLD_PHENOTYPE_DIRECTORY> ``` - - `-d`, `--phen-dir`: (Optional) Directory of changed phenotype, default is `./workspace/phen`. + - `-d`, `--phen-dir`: (Optional) Local phenotype workspace directory (default is ./workspace/phen). - `-v`, `--version`: (Optional) Directory of changed phenotype version, default is `latest` which is the current files in the changed phen directory. - `-od`, `--old-phen-dir`: (Optional) Directory of old phenotype, default is `./workspace/phen`. - `-ov`, `--old-version`: (Required) Old phenotype version to compare with the chnaged version, default is `latest` which are the current files in the old phen directory. diff --git a/tests/test_acmc.py b/tests/test_acmc.py index c8ece570f855715e2432d371fba9b976b1201b8f..7ebdaa4aad3ee42bc4818258253ace29edab629c 100644 --- a/tests/test_acmc.py +++ b/tests/test_acmc.py @@ -330,3 +330,18 @@ def test_diff(tmp_dir, monkeypatch, caplog): assert "Removed concepts ['ABDO_PAIN']" in content assert "Added concepts ['DEPRESSION', 'DID_NOT_ATTEND', 'HYPERTENSION']" in content assert "Added outputs: ['read3.csv', 'snomed.csv']" in content + + +# TEST REPO NEEDS TO BE AUTOMATED + +# Create remote repo acmc-test1 (https://git.soton.ac.uk/mjbonifa/acmc-test1.git) and acmc-test2 (https://git.soton.ac.uk/mjbonifa/acmc-test2.git) + +# Init repo from the remote acmc-test1 + +# Copy example and run map + +# Publish creating a version on the remote repo + +# Fork repo from acmc-test1 with remote acmc-test2 + +# Publish repo creating a version on the new repo