From dd623973362e89bdd8b0f346a8dbaabf7614a586 Mon Sep 17 00:00:00 2001 From: Michael Boniface <m.j.boniface@soton.ac.uk> Date: Tue, 18 Feb 2025 12:28:50 +0000 Subject: [PATCH] fixed the version from commit, supports an empty repo or a repo that's been created with an initial readme. works for local repo too. Set default branch to main, it's what we assume. Also checks if there's changes to the repo and if not then does not update versions. Does not support pulling and merging if there's collaboration or someone changes a remote. That will need to be sorted by the user using git commands themselves. --- phen.py | 75 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/phen.py b/phen.py index ebfcc33..fca8c08 100644 --- a/phen.py +++ b/phen.py @@ -28,6 +28,8 @@ CONFIG_FILE = 'config.json' ERROR_FILE = 'errors.csv' REPORT_FILE = 'report.md' +DEFAULT_GIT_BRANCH = 'main' + SPLIT_COL_ACTION = "split_col" CODES_COL_ACTION = "codes_col" DIVIDE_COL_ACTION = "divide_col" @@ -87,13 +89,42 @@ def init(phen_dir, remote_url): # clone the repo repo = git.cmd.Git() repo.clone(git_url, phen_path) + # open repo repo = git.Repo(phen_path) + + # check if there are any commits (new repo has no commits) + if len(repo.branches) == 0 or repo.head.is_detached: # Handle detached HEAD (e.g., after init) + print("The phen repository has no commits yet.") + commit_count = 0 + else: + # Get the total number of commits in the default branch + commit_count = sum(1 for _ in repo.iter_commits()) + print(f"Repo has previous commits: {commit_count}") else: # local repo, create the directories and init phen_path.mkdir(parents=True, exist_ok=True) print(f"Phen directory '{phen_path}' has been created.") repo = git.Repo.init(phen_path) - + commit_count = 0 + + # initialise empty repos + if commit_count == 0: + # create initial commit + initial_file_path = phen_path / "README.md" + with open(initial_file_path, "w") as file: + file.write("# Initial commit\nThis is the first commit in the phen repository.\n") + repo.index.add([initial_file_path]) + repo.index.commit("Initial commit") + commit_count = 1 + + # Checkout the phens default branch, creating it if it does not exist + if DEFAULT_GIT_BRANCH in repo.branches: + main_branch = repo.heads[DEFAULT_GIT_BRANCH] + main_branch.checkout() + else: + main_branch = repo.create_head(DEFAULT_GIT_BRANCH) + main_branch.checkout() + # if the phen path does not contain the config file then initialise the phen type config_path = phen_path / CONFIG_FILE if not config_path.exists(): @@ -107,9 +138,14 @@ def init(phen_dir, remote_url): output_path = phen_path / OUTPUT_DIR output_path.mkdir(exist_ok=True) keep_path = output_path / '.gitkeep' - keep_path.touch(exist_ok=True) - # create empty config file - initial_version = "v1.0.0" + keep_path.touch(exist_ok=True) + + # set initial version based on the number of commits in the repo, depending on how the repo was created + # e.g., with a README.md, then there will be some initial commits before the phen config is added + next_commit_count = commit_count + 1 + initial_version = f"v1.0.{next_commit_count}" + + # create empty phen config file config = { "concept_sets": { "version": initial_version, @@ -125,17 +161,15 @@ def init(phen_dir, remote_url): ] } config_path = phen_path / CONFIG_FILE - # Write the JSON data to a file with open(config_path, "w", encoding="utf-8") as f: json.dump(config, f, indent=4) - # initialise phen directory as a git repo includ .gitkeep to keep the codes dir + # add to repo and commit repo.git.add(codes_path) repo.git.add(output_path) repo.git.add(all=True) - msg = "Initialized phen git repo." - repo.index.commit(msg) - #repo.create_tag(initial_version, message=f"Release {initial_version}") + msg = "initialised the phen git repo." + repo.index.commit(msg) print(msg) else: print(f"Phenotype configuration files already exist") @@ -512,9 +546,19 @@ def publish(phen_dir): validate(phen_dir) phen_path = Path(phen_dir) - # create git repo object + # load git repo and set the branch repo = git.Repo(phen_path) + if DEFAULT_GIT_BRANCH in repo.branches: + main_branch = repo.heads[DEFAULT_GIT_BRANCH] + main_branch.checkout() + else: + raise AttributeError(f"Phen repo does not contain the default branch {DEFAULT_GIT_BRANCH}") + # check if any changes to publish + if not repo.is_dirty() and not repo.untracked_files: + print("Nothing to publish, no changes to the repo") + return + # get major version from configuration file config_path = phen_path / CONFIG_FILE config = json.load(open(config_path, "rb")) @@ -522,12 +566,7 @@ def publish(phen_dir): major_version = match.group(1) # get latest minor version from git commit count - try: - commit_count = len(list(repo.iter_commits("HEAD"))) - #commit_hash = repo.head.object.hexsha[:7] # Short hash - except git.exc.GitCommandError: - commit_count = 0 - print("No commits to repo yet") + commit_count = len(list(repo.iter_commits("HEAD"))) # set version and write to config file so consistent with repo version next_minor_version = commit_count + 1 @@ -552,6 +591,7 @@ def publish(phen_dir): # push to origin if a remote repo try: origin = repo.remotes.origin + origin.push('main') origin.push(tags=True) print("Changes pushed to 'origin'.") except AttributeError: @@ -653,9 +693,6 @@ def diff(phen_dir, phen_old_dir): df2 = df2[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count() # Check for added and removed concepts - print(file) - print(df1.index) - print(df2.index) report.write("- Removed concepts {}\n".format(list(set(df1.index) - set(df2.index)))) report.write("- Added concepts {}\n".format(list(set(df2.index) - set(df1.index)))) -- GitLab