From 66f7c9e973b181bd771aabdc2fd9e9db33fa7df4 Mon Sep 17 00:00:00 2001 From: Michael Boniface <m.j.boniface@soton.ac.uk> Date: Tue, 18 Feb 2025 12:37:53 +0000 Subject: [PATCH] tidied phen init --- phen.py | 197 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 101 insertions(+), 96 deletions(-) diff --git a/phen.py b/phen.py index fca8c08..48c7d1b 100644 --- a/phen.py +++ b/phen.py @@ -42,9 +42,12 @@ class PhenValidationException(Exception): self.validation_errors = validation_errors def construct_git_url(remote_url): + """Constructs a git url for github or gitlab including a PAT token environment variable""" # check the url parsed_url = urlparse(remote_url) - + + # if github in the URL otherwise assume it's gitlab, if we want to use others such as codeberg we'd + # need to update this function if the URL scheme is different. if "github.com" in parsed_url.netloc: # get GitHub PAT from environment variable auth = os.getenv("ACMC_GITHUB_PAT") @@ -78,104 +81,106 @@ def init(phen_dir, remote_url): else: configure=True - # configure phen directories - if configure: - repo = None - # if remote then clone the repo otherwise init a local repo - if remote_url != None: - # add PAT token to the URL - git_url = construct_git_url(remote_url) - - # clone the repo - repo = git.cmd.Git() - repo.clone(git_url, phen_path) - # open repo - repo = git.Repo(phen_path) - - # check if there are any commits (new repo has no commits) - if len(repo.branches) == 0 or repo.head.is_detached: # Handle detached HEAD (e.g., after init) - print("The phen repository has no commits yet.") - commit_count = 0 - else: - # Get the total number of commits in the default branch - commit_count = sum(1 for _ in repo.iter_commits()) - print(f"Repo has previous commits: {commit_count}") - else: - # local repo, create the directories and init - phen_path.mkdir(parents=True, exist_ok=True) - print(f"Phen directory '{phen_path}' has been created.") - repo = git.Repo.init(phen_path) - commit_count = 0 + if not configure: + print(f"Exiting, phenotype not initiatised") + return - # initialise empty repos - if commit_count == 0: - # create initial commit - initial_file_path = phen_path / "README.md" - with open(initial_file_path, "w") as file: - file.write("# Initial commit\nThis is the first commit in the phen repository.\n") - repo.index.add([initial_file_path]) - repo.index.commit("Initial commit") - commit_count = 1 - - # Checkout the phens default branch, creating it if it does not exist - if DEFAULT_GIT_BRANCH in repo.branches: - main_branch = repo.heads[DEFAULT_GIT_BRANCH] - main_branch.checkout() - else: - main_branch = repo.create_head(DEFAULT_GIT_BRANCH) - main_branch.checkout() - - # if the phen path does not contain the config file then initialise the phen type - config_path = phen_path / CONFIG_FILE - if not config_path.exists(): - print("Creating configuration files") - # create codes directory - codes_path = phen_path / CODES_DIR - codes_path.mkdir(exist_ok=True) - keep_path = codes_path / '.gitkeep' - keep_path.touch(exist_ok=True) - # create maps directory - output_path = phen_path / OUTPUT_DIR - output_path.mkdir(exist_ok=True) - keep_path = output_path / '.gitkeep' - keep_path.touch(exist_ok=True) - - # set initial version based on the number of commits in the repo, depending on how the repo was created - # e.g., with a README.md, then there will be some initial commits before the phen config is added - next_commit_count = commit_count + 1 - initial_version = f"v1.0.{next_commit_count}" - - # create empty phen config file - config = { - "concept_sets": { - "version": initial_version, - "omop": { - "vocabulary_id": "", - "vocabulary_name": "", - "vocabulary_reference": "" - }, - "concept_set": [ - ] - }, - "codes": [ - ] - } - config_path = phen_path / CONFIG_FILE - with open(config_path, "w", encoding="utf-8") as f: - json.dump(config, f, indent=4) - - # add to repo and commit - repo.git.add(codes_path) - repo.git.add(output_path) - repo.git.add(all=True) - msg = "initialised the phen git repo." - repo.index.commit(msg) - print(msg) + # Initialise repo from local or remote + repo = None + # if remote then clone the repo otherwise init a local repo + if remote_url != None: + # add PAT token to the URL + git_url = construct_git_url(remote_url) + + # clone the repo + repo = git.cmd.Git() + repo.clone(git_url, phen_path) + # open repo + repo = git.Repo(phen_path) + + # check if there are any commits (new repo has no commits) + if len(repo.branches) == 0 or repo.head.is_detached: # Handle detached HEAD (e.g., after init) + print("The phen repository has no commits yet.") + commit_count = 0 else: - print(f"Phenotype configuration files already exist") - print(f"Phenotype initialised") + # Get the total number of commits in the default branch + commit_count = sum(1 for _ in repo.iter_commits()) + print(f"Repo has previous commits: {commit_count}") + else: + # local repo, create the directories and init + phen_path.mkdir(parents=True, exist_ok=True) + print(f"Phen directory '{phen_path}' has been created.") + repo = git.Repo.init(phen_path) + commit_count = 0 + + # initialise empty repos + if commit_count == 0: + # create initial commit + initial_file_path = phen_path / "README.md" + with open(initial_file_path, "w") as file: + file.write("# Initial commit\nThis is the first commit in the phen repository.\n") + repo.index.add([initial_file_path]) + repo.index.commit("Initial commit") + commit_count = 1 + + # Checkout the phens default branch, creating it if it does not exist + if DEFAULT_GIT_BRANCH in repo.branches: + main_branch = repo.heads[DEFAULT_GIT_BRANCH] + main_branch.checkout() else: - print(f"Phenotype not initiatised") + main_branch = repo.create_head(DEFAULT_GIT_BRANCH) + main_branch.checkout() + + # if the phen path does not contain the config file then initialise the phen type + config_path = phen_path / CONFIG_FILE + if config_path.exists(): + print(f"Phenotype configuration files already exist") + return + + print("Creating phen directory structure and config files") + # create codes directory + codes_path = phen_path / CODES_DIR + codes_path.mkdir(exist_ok=True) + keep_path = codes_path / '.gitkeep' + keep_path.touch(exist_ok=True) + + # create concept sets directory + output_path = phen_path / OUTPUT_DIR + output_path.mkdir(exist_ok=True) + keep_path = output_path / '.gitkeep' + keep_path.touch(exist_ok=True) + + # set initial version based on the number of commits in the repo, depending on how the repo was created + # e.g., with a README.md, then there will be some initial commits before the phen config is added + next_commit_count = commit_count + 1 + initial_version = f"v1.0.{next_commit_count}" + + # create empty phen config file + config = { + "concept_sets": { + "version": initial_version, + "omop": { + "vocabulary_id": "", + "vocabulary_name": "", + "vocabulary_reference": "" + }, + "concept_set": [ + ] + }, + "codes": [ + ] + } + config_path = phen_path / CONFIG_FILE + with open(config_path, "w", encoding="utf-8") as f: + json.dump(config, f, indent=4) + + # add to git repo and commit + repo.git.add(codes_path) + repo.git.add(output_path) + repo.git.add(all=True) + repo.index.commit("initialised the phen git repo.") + + print(f"Phenotype initialised") def validate(phen_dir): """Validates the phenotype directory is a git repo with standard structure""" -- GitLab