diff --git a/phen.py b/phen.py index ebfcc338a62c8ad4d4fa80a804925a0a0350314b..fca8c085ca9c11007661a3e16945ba32b1fbf7d2 100644 --- a/phen.py +++ b/phen.py @@ -28,6 +28,8 @@ CONFIG_FILE = 'config.json' ERROR_FILE = 'errors.csv' REPORT_FILE = 'report.md' +DEFAULT_GIT_BRANCH = 'main' + SPLIT_COL_ACTION = "split_col" CODES_COL_ACTION = "codes_col" DIVIDE_COL_ACTION = "divide_col" @@ -87,13 +89,42 @@ def init(phen_dir, remote_url): # clone the repo repo = git.cmd.Git() repo.clone(git_url, phen_path) + # open repo repo = git.Repo(phen_path) + + # check if there are any commits (new repo has no commits) + if len(repo.branches) == 0 or repo.head.is_detached: # Handle detached HEAD (e.g., after init) + print("The phen repository has no commits yet.") + commit_count = 0 + else: + # Get the total number of commits in the default branch + commit_count = sum(1 for _ in repo.iter_commits()) + print(f"Repo has previous commits: {commit_count}") else: # local repo, create the directories and init phen_path.mkdir(parents=True, exist_ok=True) print(f"Phen directory '{phen_path}' has been created.") repo = git.Repo.init(phen_path) - + commit_count = 0 + + # initialise empty repos + if commit_count == 0: + # create initial commit + initial_file_path = phen_path / "README.md" + with open(initial_file_path, "w") as file: + file.write("# Initial commit\nThis is the first commit in the phen repository.\n") + repo.index.add([initial_file_path]) + repo.index.commit("Initial commit") + commit_count = 1 + + # Checkout the phens default branch, creating it if it does not exist + if DEFAULT_GIT_BRANCH in repo.branches: + main_branch = repo.heads[DEFAULT_GIT_BRANCH] + main_branch.checkout() + else: + main_branch = repo.create_head(DEFAULT_GIT_BRANCH) + main_branch.checkout() + # if the phen path does not contain the config file then initialise the phen type config_path = phen_path / CONFIG_FILE if not config_path.exists(): @@ -107,9 +138,14 @@ def init(phen_dir, remote_url): output_path = phen_path / OUTPUT_DIR output_path.mkdir(exist_ok=True) keep_path = output_path / '.gitkeep' - keep_path.touch(exist_ok=True) - # create empty config file - initial_version = "v1.0.0" + keep_path.touch(exist_ok=True) + + # set initial version based on the number of commits in the repo, depending on how the repo was created + # e.g., with a README.md, then there will be some initial commits before the phen config is added + next_commit_count = commit_count + 1 + initial_version = f"v1.0.{next_commit_count}" + + # create empty phen config file config = { "concept_sets": { "version": initial_version, @@ -125,17 +161,15 @@ def init(phen_dir, remote_url): ] } config_path = phen_path / CONFIG_FILE - # Write the JSON data to a file with open(config_path, "w", encoding="utf-8") as f: json.dump(config, f, indent=4) - # initialise phen directory as a git repo includ .gitkeep to keep the codes dir + # add to repo and commit repo.git.add(codes_path) repo.git.add(output_path) repo.git.add(all=True) - msg = "Initialized phen git repo." - repo.index.commit(msg) - #repo.create_tag(initial_version, message=f"Release {initial_version}") + msg = "initialised the phen git repo." + repo.index.commit(msg) print(msg) else: print(f"Phenotype configuration files already exist") @@ -512,9 +546,19 @@ def publish(phen_dir): validate(phen_dir) phen_path = Path(phen_dir) - # create git repo object + # load git repo and set the branch repo = git.Repo(phen_path) + if DEFAULT_GIT_BRANCH in repo.branches: + main_branch = repo.heads[DEFAULT_GIT_BRANCH] + main_branch.checkout() + else: + raise AttributeError(f"Phen repo does not contain the default branch {DEFAULT_GIT_BRANCH}") + # check if any changes to publish + if not repo.is_dirty() and not repo.untracked_files: + print("Nothing to publish, no changes to the repo") + return + # get major version from configuration file config_path = phen_path / CONFIG_FILE config = json.load(open(config_path, "rb")) @@ -522,12 +566,7 @@ def publish(phen_dir): major_version = match.group(1) # get latest minor version from git commit count - try: - commit_count = len(list(repo.iter_commits("HEAD"))) - #commit_hash = repo.head.object.hexsha[:7] # Short hash - except git.exc.GitCommandError: - commit_count = 0 - print("No commits to repo yet") + commit_count = len(list(repo.iter_commits("HEAD"))) # set version and write to config file so consistent with repo version next_minor_version = commit_count + 1 @@ -552,6 +591,7 @@ def publish(phen_dir): # push to origin if a remote repo try: origin = repo.remotes.origin + origin.push('main') origin.push(tags=True) print("Changes pushed to 'origin'.") except AttributeError: @@ -653,9 +693,6 @@ def diff(phen_dir, phen_old_dir): df2 = df2[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count() # Check for added and removed concepts - print(file) - print(df1.index) - print(df2.index) report.write("- Removed concepts {}\n".format(list(set(df1.index) - set(df2.index)))) report.write("- Added concepts {}\n".format(list(set(df2.index) - set(df1.index))))