From 66f7c9e973b181bd771aabdc2fd9e9db33fa7df4 Mon Sep 17 00:00:00 2001
From: Michael Boniface <m.j.boniface@soton.ac.uk>
Date: Tue, 18 Feb 2025 12:37:53 +0000
Subject: [PATCH] tidied phen init

---
 phen.py | 197 +++++++++++++++++++++++++++++---------------------------
 1 file changed, 101 insertions(+), 96 deletions(-)

diff --git a/phen.py b/phen.py
index fca8c08..48c7d1b 100644
--- a/phen.py
+++ b/phen.py
@@ -42,9 +42,12 @@ class PhenValidationException(Exception):
 		self.validation_errors = validation_errors
 
 def construct_git_url(remote_url):
+	"""Constructs a git url for github or gitlab including a PAT token environment variable"""			
 	# check the url
 	parsed_url = urlparse(remote_url)
-	
+
+	# if github in the URL otherwise assume it's gitlab, if we want to use others such as codeberg we'd
+	# need to update this function if the URL scheme is different.
 	if "github.com" in parsed_url.netloc:
 		# get GitHub PAT from environment variable
 		auth = os.getenv("ACMC_GITHUB_PAT")
@@ -78,104 +81,106 @@ def init(phen_dir, remote_url):
 	else:
 		configure=True
 
-	# configure phen directories 
-	if configure:
-		repo = None
-		# if remote then clone the repo otherwise init a local repo
-		if remote_url != None:
-			# add PAT token to the URL
-			git_url = construct_git_url(remote_url)
-
-			# clone the repo
-			repo = git.cmd.Git()
-			repo.clone(git_url, phen_path)	
-			# open repo
-			repo = git.Repo(phen_path)
-			
-			# check if there are any commits (new repo has no commits)
-			if len(repo.branches) == 0 or repo.head.is_detached:  # Handle detached HEAD (e.g., after init)
-				print("The phen repository has no commits yet.")
-				commit_count = 0
-			else:
-				# Get the total number of commits in the default branch
-				commit_count = sum(1 for _ in repo.iter_commits())
-				print(f"Repo has previous commits: {commit_count}")
-		else:
-			# local repo, create the directories and init
-			phen_path.mkdir(parents=True, exist_ok=True) 
-			print(f"Phen directory '{phen_path}' has been created.")
-			repo = git.Repo.init(phen_path)
-			commit_count = 0
+	if not configure:
+		print(f"Exiting, phenotype not initiatised")
+		return
 
-		# initialise empty repos
-		if commit_count == 0:
-			# create initial commit
-			initial_file_path = phen_path / "README.md"
-			with open(initial_file_path, "w") as file:
-				file.write("# Initial commit\nThis is the first commit in the phen repository.\n")
-			repo.index.add([initial_file_path])
-			repo.index.commit("Initial commit")
-			commit_count = 1
-
-		# Checkout the phens default branch, creating it if it does not exist
-		if DEFAULT_GIT_BRANCH in repo.branches:
-			main_branch = repo.heads[DEFAULT_GIT_BRANCH]
-			main_branch.checkout()
-		else:
-			main_branch = repo.create_head(DEFAULT_GIT_BRANCH)
-			main_branch.checkout()
-			
-		# if the phen path does not contain the config file then initialise the phen type
-		config_path = phen_path / CONFIG_FILE
-		if not config_path.exists():
-			print("Creating configuration files")	
-			# create codes directory
-			codes_path = phen_path / CODES_DIR
-			codes_path.mkdir(exist_ok=True)
-			keep_path = codes_path / '.gitkeep'
-			keep_path.touch(exist_ok=True)
-			# create maps directory
-			output_path = phen_path / OUTPUT_DIR
-			output_path.mkdir(exist_ok=True)
-			keep_path = output_path / '.gitkeep'
-			keep_path.touch(exist_ok=True)
-
-			# set initial version based on the number of commits in the repo, depending on how the repo was created
-			# e.g., with a README.md, then there will be some initial commits before the phen config is added
-			next_commit_count =  commit_count + 1
-			initial_version = f"v1.0.{next_commit_count}"			
-			
-			# create empty phen config file
-			config = {
-				"concept_sets": {
-					"version": initial_version,
-					"omop": {
-						"vocabulary_id": "",
-						"vocabulary_name": "",
-						"vocabulary_reference": ""
-					},
-					"concept_set": [
-					]
-				},
-				"codes": [
-				]
-			}
-			config_path = phen_path / CONFIG_FILE
-			with open(config_path, "w", encoding="utf-8") as f:
-				json.dump(config, f, indent=4) 
-				
-			# add to repo and commit
-			repo.git.add(codes_path)
-			repo.git.add(output_path)				
-			repo.git.add(all=True)
-			msg = "initialised the phen git repo."				
-			repo.index.commit(msg)			
-			print(msg)
+	# Initialise repo from local or remote
+	repo = None
+	# if remote then clone the repo otherwise init a local repo	
+	if remote_url != None:
+		# add PAT token to the URL
+		git_url = construct_git_url(remote_url)
+
+		# clone the repo
+		repo = git.cmd.Git()
+		repo.clone(git_url, phen_path)	
+		# open repo
+		repo = git.Repo(phen_path)
+		
+		# check if there are any commits (new repo has no commits)
+		if len(repo.branches) == 0 or repo.head.is_detached:  # Handle detached HEAD (e.g., after init)
+			print("The phen repository has no commits yet.")
+			commit_count = 0
 		else:
-			print(f"Phenotype configuration files already exist")
-		print(f"Phenotype initialised")
+			# Get the total number of commits in the default branch
+			commit_count = sum(1 for _ in repo.iter_commits())
+			print(f"Repo has previous commits: {commit_count}")
+	else:
+		# local repo, create the directories and init
+		phen_path.mkdir(parents=True, exist_ok=True) 
+		print(f"Phen directory '{phen_path}' has been created.")
+		repo = git.Repo.init(phen_path)
+		commit_count = 0
+
+	# initialise empty repos
+	if commit_count == 0:
+		# create initial commit
+		initial_file_path = phen_path / "README.md"
+		with open(initial_file_path, "w") as file:
+			file.write("# Initial commit\nThis is the first commit in the phen repository.\n")
+		repo.index.add([initial_file_path])
+		repo.index.commit("Initial commit")
+		commit_count = 1
+
+	# Checkout the phens default branch, creating it if it does not exist
+	if DEFAULT_GIT_BRANCH in repo.branches:
+		main_branch = repo.heads[DEFAULT_GIT_BRANCH]
+		main_branch.checkout()
 	else:
-		print(f"Phenotype not initiatised")
+		main_branch = repo.create_head(DEFAULT_GIT_BRANCH)
+		main_branch.checkout()
+		
+	# if the phen path does not contain the config file then initialise the phen type
+	config_path = phen_path / CONFIG_FILE
+	if config_path.exists():
+		print(f"Phenotype configuration files already exist")		
+		return
+		
+	print("Creating phen directory structure and config files")
+	# create codes directory
+	codes_path = phen_path / CODES_DIR
+	codes_path.mkdir(exist_ok=True)
+	keep_path = codes_path / '.gitkeep'
+	keep_path.touch(exist_ok=True)
+	
+	# create concept sets directory
+	output_path = phen_path / OUTPUT_DIR
+	output_path.mkdir(exist_ok=True)
+	keep_path = output_path / '.gitkeep'
+	keep_path.touch(exist_ok=True)
+	
+	# set initial version based on the number of commits in the repo, depending on how the repo was created
+	# e.g., with a README.md, then there will be some initial commits before the phen config is added
+	next_commit_count =  commit_count + 1
+	initial_version = f"v1.0.{next_commit_count}"			
+	
+	# create empty phen config file
+	config = {
+		"concept_sets": {
+			"version": initial_version,
+			"omop": {
+				"vocabulary_id": "",
+				"vocabulary_name": "",
+				"vocabulary_reference": ""
+			},
+			"concept_set": [
+			]
+		},
+		"codes": [
+		]
+	}
+	config_path = phen_path / CONFIG_FILE
+	with open(config_path, "w", encoding="utf-8") as f:
+		json.dump(config, f, indent=4) 
+		
+	# add to git repo and commit
+	repo.git.add(codes_path)
+	repo.git.add(output_path)				
+	repo.git.add(all=True)		
+	repo.index.commit("initialised the phen git repo.")			
+		
+	print(f"Phenotype initialised")
 	
 def validate(phen_dir):
 	"""Validates the phenotype directory is a git repo with standard structure"""		
-- 
GitLab