Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
concepts-processing
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Package registry
Operate
Terraform modules
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
meldb
concepts-processing
Commits
66f7c9e9
Commit
66f7c9e9
authored
3 months ago
by
mjbonifa
Browse files
Options
Downloads
Patches
Plain Diff
tidied phen init
parent
dd623973
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
phen.py
+101
-96
101 additions, 96 deletions
phen.py
with
101 additions
and
96 deletions
phen.py
+
101
−
96
View file @
66f7c9e9
...
...
@@ -42,9 +42,12 @@ class PhenValidationException(Exception):
self
.
validation_errors
=
validation_errors
def
construct_git_url
(
remote_url
):
"""
Constructs a git url for github or gitlab including a PAT token environment variable
"""
# check the url
parsed_url
=
urlparse
(
remote_url
)
# if github in the URL otherwise assume it's gitlab, if we want to use others such as codeberg we'd
# need to update this function if the URL scheme is different.
if
"
github.com
"
in
parsed_url
.
netloc
:
# get GitHub PAT from environment variable
auth
=
os
.
getenv
(
"
ACMC_GITHUB_PAT
"
)
...
...
@@ -78,104 +81,106 @@ def init(phen_dir, remote_url):
else
:
configure
=
True
# configure phen directories
if
configure
:
repo
=
None
# if remote then clone the repo otherwise init a local repo
if
remote_url
!=
None
:
# add PAT token to the URL
git_url
=
construct_git_url
(
remote_url
)
# clone the repo
repo
=
git
.
cmd
.
Git
()
repo
.
clone
(
git_url
,
phen_path
)
# open repo
repo
=
git
.
Repo
(
phen_path
)
# check if there are any commits (new repo has no commits)
if
len
(
repo
.
branches
)
==
0
or
repo
.
head
.
is_detached
:
# Handle detached HEAD (e.g., after init)
print
(
"
The phen repository has no commits yet.
"
)
commit_count
=
0
else
:
# Get the total number of commits in the default branch
commit_count
=
sum
(
1
for
_
in
repo
.
iter_commits
())
print
(
f
"
Repo has previous commits:
{
commit_count
}
"
)
else
:
# local repo, create the directories and init
phen_path
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
print
(
f
"
Phen directory
'
{
phen_path
}
'
has been created.
"
)
repo
=
git
.
Repo
.
init
(
phen_path
)
commit_count
=
0
if
not
configure
:
print
(
f
"
Exiting, phenotype not initiatised
"
)
return
# initialise empty repos
if
commit_count
==
0
:
# create initial commit
initial_file_path
=
phen_path
/
"
README.md
"
with
open
(
initial_file_path
,
"
w
"
)
as
file
:
file
.
write
(
"
# Initial commit
\n
This is the first commit in the phen repository.
\n
"
)
repo
.
index
.
add
([
initial_file_path
])
repo
.
index
.
commit
(
"
Initial commit
"
)
commit_count
=
1
# Checkout the phens default branch, creating it if it does not exist
if
DEFAULT_GIT_BRANCH
in
repo
.
branches
:
main_branch
=
repo
.
heads
[
DEFAULT_GIT_BRANCH
]
main_branch
.
checkout
()
else
:
main_branch
=
repo
.
create_head
(
DEFAULT_GIT_BRANCH
)
main_branch
.
checkout
()
# if the phen path does not contain the config file then initialise the phen type
config_path
=
phen_path
/
CONFIG_FILE
if
not
config_path
.
exists
():
print
(
"
Creating configuration files
"
)
# create codes directory
codes_path
=
phen_path
/
CODES_DIR
codes_path
.
mkdir
(
exist_ok
=
True
)
keep_path
=
codes_path
/
'
.gitkeep
'
keep_path
.
touch
(
exist_ok
=
True
)
# create maps directory
output_path
=
phen_path
/
OUTPUT_DIR
output_path
.
mkdir
(
exist_ok
=
True
)
keep_path
=
output_path
/
'
.gitkeep
'
keep_path
.
touch
(
exist_ok
=
True
)
# set initial version based on the number of commits in the repo, depending on how the repo was created
# e.g., with a README.md, then there will be some initial commits before the phen config is added
next_commit_count
=
commit_count
+
1
initial_version
=
f
"
v1.0.
{
next_commit_count
}
"
# create empty phen config file
config
=
{
"
concept_sets
"
:
{
"
version
"
:
initial_version
,
"
omop
"
:
{
"
vocabulary_id
"
:
""
,
"
vocabulary_name
"
:
""
,
"
vocabulary_reference
"
:
""
},
"
concept_set
"
:
[
]
},
"
codes
"
:
[
]
}
config_path
=
phen_path
/
CONFIG_FILE
with
open
(
config_path
,
"
w
"
,
encoding
=
"
utf-8
"
)
as
f
:
json
.
dump
(
config
,
f
,
indent
=
4
)
# add to repo and commit
repo
.
git
.
add
(
codes_path
)
repo
.
git
.
add
(
output_path
)
repo
.
git
.
add
(
all
=
True
)
msg
=
"
initialised the phen git repo.
"
repo
.
index
.
commit
(
msg
)
print
(
msg
)
# Initialise repo from local or remote
repo
=
None
# if remote then clone the repo otherwise init a local repo
if
remote_url
!=
None
:
# add PAT token to the URL
git_url
=
construct_git_url
(
remote_url
)
# clone the repo
repo
=
git
.
cmd
.
Git
()
repo
.
clone
(
git_url
,
phen_path
)
# open repo
repo
=
git
.
Repo
(
phen_path
)
# check if there are any commits (new repo has no commits)
if
len
(
repo
.
branches
)
==
0
or
repo
.
head
.
is_detached
:
# Handle detached HEAD (e.g., after init)
print
(
"
The phen repository has no commits yet.
"
)
commit_count
=
0
else
:
print
(
f
"
Phenotype configuration files already exist
"
)
print
(
f
"
Phenotype initialised
"
)
# Get the total number of commits in the default branch
commit_count
=
sum
(
1
for
_
in
repo
.
iter_commits
())
print
(
f
"
Repo has previous commits:
{
commit_count
}
"
)
else
:
# local repo, create the directories and init
phen_path
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
print
(
f
"
Phen directory
'
{
phen_path
}
'
has been created.
"
)
repo
=
git
.
Repo
.
init
(
phen_path
)
commit_count
=
0
# initialise empty repos
if
commit_count
==
0
:
# create initial commit
initial_file_path
=
phen_path
/
"
README.md
"
with
open
(
initial_file_path
,
"
w
"
)
as
file
:
file
.
write
(
"
# Initial commit
\n
This is the first commit in the phen repository.
\n
"
)
repo
.
index
.
add
([
initial_file_path
])
repo
.
index
.
commit
(
"
Initial commit
"
)
commit_count
=
1
# Checkout the phens default branch, creating it if it does not exist
if
DEFAULT_GIT_BRANCH
in
repo
.
branches
:
main_branch
=
repo
.
heads
[
DEFAULT_GIT_BRANCH
]
main_branch
.
checkout
()
else
:
print
(
f
"
Phenotype not initiatised
"
)
main_branch
=
repo
.
create_head
(
DEFAULT_GIT_BRANCH
)
main_branch
.
checkout
()
# if the phen path does not contain the config file then initialise the phen type
config_path
=
phen_path
/
CONFIG_FILE
if
config_path
.
exists
():
print
(
f
"
Phenotype configuration files already exist
"
)
return
print
(
"
Creating phen directory structure and config files
"
)
# create codes directory
codes_path
=
phen_path
/
CODES_DIR
codes_path
.
mkdir
(
exist_ok
=
True
)
keep_path
=
codes_path
/
'
.gitkeep
'
keep_path
.
touch
(
exist_ok
=
True
)
# create concept sets directory
output_path
=
phen_path
/
OUTPUT_DIR
output_path
.
mkdir
(
exist_ok
=
True
)
keep_path
=
output_path
/
'
.gitkeep
'
keep_path
.
touch
(
exist_ok
=
True
)
# set initial version based on the number of commits in the repo, depending on how the repo was created
# e.g., with a README.md, then there will be some initial commits before the phen config is added
next_commit_count
=
commit_count
+
1
initial_version
=
f
"
v1.0.
{
next_commit_count
}
"
# create empty phen config file
config
=
{
"
concept_sets
"
:
{
"
version
"
:
initial_version
,
"
omop
"
:
{
"
vocabulary_id
"
:
""
,
"
vocabulary_name
"
:
""
,
"
vocabulary_reference
"
:
""
},
"
concept_set
"
:
[
]
},
"
codes
"
:
[
]
}
config_path
=
phen_path
/
CONFIG_FILE
with
open
(
config_path
,
"
w
"
,
encoding
=
"
utf-8
"
)
as
f
:
json
.
dump
(
config
,
f
,
indent
=
4
)
# add to git repo and commit
repo
.
git
.
add
(
codes_path
)
repo
.
git
.
add
(
output_path
)
repo
.
git
.
add
(
all
=
True
)
repo
.
index
.
commit
(
"
initialised the phen git repo.
"
)
print
(
f
"
Phenotype initialised
"
)
def
validate
(
phen_dir
):
"""
Validates the phenotype directory is a git repo with standard structure
"""
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment