From dd623973362e89bdd8b0f346a8dbaabf7614a586 Mon Sep 17 00:00:00 2001
From: Michael Boniface <m.j.boniface@soton.ac.uk>
Date: Tue, 18 Feb 2025 12:28:50 +0000
Subject: [PATCH] fixed the version from commit, supports an empty repo or a
 repo that's been created with an initial readme. works for local repo too.
 Set default branch to main, it's what we assume. Also checks if there's
 changes to the repo and if not then does not update versions. Does not
 support pulling and merging if there's collaboration or someone changes a
 remote. That will need to be sorted by the user using git commands
 themselves.

---
 phen.py | 75 ++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 56 insertions(+), 19 deletions(-)

diff --git a/phen.py b/phen.py
index ebfcc33..fca8c08 100644
--- a/phen.py
+++ b/phen.py
@@ -28,6 +28,8 @@ CONFIG_FILE = 'config.json'
 ERROR_FILE = 'errors.csv'
 REPORT_FILE = 'report.md'
 
+DEFAULT_GIT_BRANCH = 'main'
+
 SPLIT_COL_ACTION = "split_col"
 CODES_COL_ACTION = "codes_col"
 DIVIDE_COL_ACTION = "divide_col"
@@ -87,13 +89,42 @@ def init(phen_dir, remote_url):
 			# clone the repo
 			repo = git.cmd.Git()
 			repo.clone(git_url, phen_path)	
+			# open repo
 			repo = git.Repo(phen_path)
+			
+			# check if there are any commits (new repo has no commits)
+			if len(repo.branches) == 0 or repo.head.is_detached:  # Handle detached HEAD (e.g., after init)
+				print("The phen repository has no commits yet.")
+				commit_count = 0
+			else:
+				# Get the total number of commits in the default branch
+				commit_count = sum(1 for _ in repo.iter_commits())
+				print(f"Repo has previous commits: {commit_count}")
 		else:
 			# local repo, create the directories and init
 			phen_path.mkdir(parents=True, exist_ok=True) 
 			print(f"Phen directory '{phen_path}' has been created.")
 			repo = git.Repo.init(phen_path)
-
+			commit_count = 0
+
+		# initialise empty repos
+		if commit_count == 0:
+			# create initial commit
+			initial_file_path = phen_path / "README.md"
+			with open(initial_file_path, "w") as file:
+				file.write("# Initial commit\nThis is the first commit in the phen repository.\n")
+			repo.index.add([initial_file_path])
+			repo.index.commit("Initial commit")
+			commit_count = 1
+
+		# Checkout the phens default branch, creating it if it does not exist
+		if DEFAULT_GIT_BRANCH in repo.branches:
+			main_branch = repo.heads[DEFAULT_GIT_BRANCH]
+			main_branch.checkout()
+		else:
+			main_branch = repo.create_head(DEFAULT_GIT_BRANCH)
+			main_branch.checkout()
+			
 		# if the phen path does not contain the config file then initialise the phen type
 		config_path = phen_path / CONFIG_FILE
 		if not config_path.exists():
@@ -107,9 +138,14 @@ def init(phen_dir, remote_url):
 			output_path = phen_path / OUTPUT_DIR
 			output_path.mkdir(exist_ok=True)
 			keep_path = output_path / '.gitkeep'
-			keep_path.touch(exist_ok=True)		
-			# create empty config file
-			initial_version = "v1.0.0"
+			keep_path.touch(exist_ok=True)
+
+			# set initial version based on the number of commits in the repo, depending on how the repo was created
+			# e.g., with a README.md, then there will be some initial commits before the phen config is added
+			next_commit_count =  commit_count + 1
+			initial_version = f"v1.0.{next_commit_count}"			
+			
+			# create empty phen config file
 			config = {
 				"concept_sets": {
 					"version": initial_version,
@@ -125,17 +161,15 @@ def init(phen_dir, remote_url):
 				]
 			}
 			config_path = phen_path / CONFIG_FILE
-			# Write the JSON data to a file
 			with open(config_path, "w", encoding="utf-8") as f:
 				json.dump(config, f, indent=4) 
 				
-			# initialise phen directory as a git repo includ .gitkeep to keep the codes dir
+			# add to repo and commit
 			repo.git.add(codes_path)
 			repo.git.add(output_path)				
 			repo.git.add(all=True)
-			msg = "Initialized phen git repo."				
-			repo.index.commit(msg)
-			#repo.create_tag(initial_version, message=f"Release {initial_version}")					
+			msg = "initialised the phen git repo."				
+			repo.index.commit(msg)			
 			print(msg)
 		else:
 			print(f"Phenotype configuration files already exist")
@@ -512,9 +546,19 @@ def publish(phen_dir):
 	validate(phen_dir)
 	phen_path = Path(phen_dir)
 
-	# create git repo object
+	# load git repo and set the branch
 	repo = git.Repo(phen_path)
+	if DEFAULT_GIT_BRANCH in repo.branches:
+		main_branch = repo.heads[DEFAULT_GIT_BRANCH]
+		main_branch.checkout()
+	else:
+		raise AttributeError(f"Phen repo does not contain the default branch {DEFAULT_GIT_BRANCH}")	
 
+	# check if any changes to publish
+	if not repo.is_dirty() and not repo.untracked_files:
+		print("Nothing to publish, no changes to the repo")
+		return
+	 
 	# get major version from configuration file
 	config_path = phen_path / CONFIG_FILE
 	config = json.load(open(config_path, "rb"))
@@ -522,12 +566,7 @@ def publish(phen_dir):
 	major_version = match.group(1)
 	
 	# get latest minor version from git commit count
-	try:
-		commit_count = len(list(repo.iter_commits("HEAD")))
-		#commit_hash = repo.head.object.hexsha[:7]  # Short hash
-	except git.exc.GitCommandError:
-		commit_count = 0
-		print("No commits to repo yet")
+	commit_count = len(list(repo.iter_commits("HEAD")))
 
 	# set version and write to config file so consistent with repo version
 	next_minor_version = commit_count + 1
@@ -552,6 +591,7 @@ def publish(phen_dir):
 	# push to origin if a remote repo
 	try:
 		origin = repo.remotes.origin
+		origin.push('main')
 		origin.push(tags=True)
 		print("Changes pushed to 'origin'.")
 	except AttributeError:
@@ -653,9 +693,6 @@ def diff(phen_dir, phen_old_dir):
 		df2 = df2[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count()
 
 		# Check for added and removed concepts
-		print(file)
-		print(df1.index)
-		print(df2.index)
 		report.write("- Removed concepts {}\n".format(list(set(df1.index) - set(df2.index))))
 		report.write("- Added concepts {}\n".format(list(set(df2.index) - set(df1.index))))
 
-- 
GitLab