Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
concepts-processing
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Package registry
Operate
Terraform modules
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
meldb
concepts-processing
Commits
e5cbdbd2
Commit
e5cbdbd2
authored
4 months ago
by
mjbonifa
Browse files
Options
Downloads
Patches
Plain Diff
changed phen/output directory to phen/map and added concept-sets directory
parent
66f7c9e9
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
phen.py
+24
-23
24 additions, 23 deletions
phen.py
with
24 additions
and
23 deletions
phen.py
+
24
−
23
View file @
e5cbdbd2
...
@@ -22,8 +22,12 @@ pd.set_option("mode.chained_assignment", None)
...
@@ -22,8 +22,12 @@ pd.set_option("mode.chained_assignment", None)
PHEN_DIR
=
'
phen
'
PHEN_DIR
=
'
phen
'
DEFAULT_PHEN_PATH
=
Path
(
'
build
'
)
/
PHEN_DIR
DEFAULT_PHEN_PATH
=
Path
(
'
build
'
)
/
PHEN_DIR
CODES_DIR
=
'
codes
'
CODES_DIR
=
'
codes
'
OUTPUT_DIR
=
'
output
'
MAP_DIR
=
'
map
'
CONCEPT_SET_DIR
=
'
concept-set
'
DEFAULT_PHEN_DIR_LIST
=
[
CODES_DIR
,
MAP_DIR
,
CONCEPT_SET_DIR
]
CONFIG_FILE
=
'
config.json
'
CONFIG_FILE
=
'
config.json
'
ERROR_FILE
=
'
errors.csv
'
ERROR_FILE
=
'
errors.csv
'
REPORT_FILE
=
'
report.md
'
REPORT_FILE
=
'
report.md
'
...
@@ -64,6 +68,12 @@ def construct_git_url(remote_url):
...
@@ -64,6 +68,12 @@ def construct_git_url(remote_url):
new_netloc
=
f
"
{
auth
}
@
{
parsed_url
.
netloc
}
"
new_netloc
=
f
"
{
auth
}
@
{
parsed_url
.
netloc
}
"
return
urlunparse
((
parsed_url
.
scheme
,
new_netloc
,
parsed_url
.
path
,
parsed_url
.
params
,
parsed_url
.
query
,
parsed_url
.
fragment
))
return
urlunparse
((
parsed_url
.
scheme
,
new_netloc
,
parsed_url
.
path
,
parsed_url
.
params
,
parsed_url
.
query
,
parsed_url
.
fragment
))
def
create_empty_git_dir
(
path
):
"""
Creates a directory with a .gitkeep file so that it
'
s tracked in git
"""
path
.
mkdir
(
exist_ok
=
True
)
keep_path
=
path
/
'
.gitkeep
'
keep_path
.
touch
(
exist_ok
=
True
)
def
init
(
phen_dir
,
remote_url
):
def
init
(
phen_dir
,
remote_url
):
"""
Initial phenotype directory as git repo with standard structure
"""
"""
Initial phenotype directory as git repo with standard structure
"""
print
(
f
"
Initialising Phenotype in directory:
{
phen_dir
}
"
)
print
(
f
"
Initialising Phenotype in directory:
{
phen_dir
}
"
)
...
@@ -138,17 +148,8 @@ def init(phen_dir, remote_url):
...
@@ -138,17 +148,8 @@ def init(phen_dir, remote_url):
return
return
print
(
"
Creating phen directory structure and config files
"
)
print
(
"
Creating phen directory structure and config files
"
)
# create codes directory
for
d
in
DEFAULT_PHEN_DIR_LIST
:
codes_path
=
phen_path
/
CODES_DIR
create_empty_git_dir
(
phen_path
/
d
)
codes_path
.
mkdir
(
exist_ok
=
True
)
keep_path
=
codes_path
/
'
.gitkeep
'
keep_path
.
touch
(
exist_ok
=
True
)
# create concept sets directory
output_path
=
phen_path
/
OUTPUT_DIR
output_path
.
mkdir
(
exist_ok
=
True
)
keep_path
=
output_path
/
'
.gitkeep
'
keep_path
.
touch
(
exist_ok
=
True
)
# set initial version based on the number of commits in the repo, depending on how the repo was created
# set initial version based on the number of commits in the repo, depending on how the repo was created
# e.g., with a README.md, then there will be some initial commits before the phen config is added
# e.g., with a README.md, then there will be some initial commits before the phen config is added
...
@@ -175,8 +176,8 @@ def init(phen_dir, remote_url):
...
@@ -175,8 +176,8 @@ def init(phen_dir, remote_url):
json
.
dump
(
config
,
f
,
indent
=
4
)
json
.
dump
(
config
,
f
,
indent
=
4
)
# add to git repo and commit
# add to git repo and commit
repo
.
git
.
add
(
codes_path
)
for
d
in
DEFAULT_PHEN_DIR_LIST
:
repo
.
git
.
add
(
output_path
)
repo
.
git
.
add
(
phen_path
/
d
)
repo
.
git
.
add
(
all
=
True
)
repo
.
git
.
add
(
all
=
True
)
repo
.
index
.
commit
(
"
initialised the phen git repo.
"
)
repo
.
index
.
commit
(
"
initialised the phen git repo.
"
)
...
@@ -529,10 +530,10 @@ def map(phen_dir,
...
@@ -529,10 +530,10 @@ def map(phen_dir,
else
:
else
:
output_filename
=
target_code_type
+
'
_no_translate.csv
'
output_filename
=
target_code_type
+
'
_no_translate.csv
'
output
_path
=
phen_path
/
OUTPUT
_DIR
/
output_filename
map
_path
=
phen_path
/
MAP
_DIR
/
output_filename
out
.
to_csv
(
output
_path
,
index
=
False
)
out
.
to_csv
(
map
_path
,
index
=
False
)
print
(
"
Saved t
o
"
,
output
_path
)
print
(
"
Saved t
ranslations to
"
,
map
_path
)
# Save Error File
# Save Error File
error_path
=
phen_path
/
ERROR_FILE
error_path
=
phen_path
/
ERROR_FILE
...
@@ -660,12 +661,12 @@ def diff(phen_dir, phen_old_dir):
...
@@ -660,12 +661,12 @@ def diff(phen_dir, phen_old_dir):
raise
ValueError
(
f
"
Unsupported filetype provided for report file
{
str
(
report_path
.
resolve
())
}
"
)
raise
ValueError
(
f
"
Unsupported filetype provided for report file
{
str
(
report_path
.
resolve
())
}
"
)
# Get maps files from phenotype
# Get maps files from phenotype
old_
output
_path
=
old_phen_path
/
OUTPUT
_DIR
old_
map
_path
=
old_phen_path
/
MAP
_DIR
new_
output
_path
=
new_phen_path
/
OUTPUT
_DIR
new_
map
_path
=
new_phen_path
/
MAP
_DIR
# List files from output directories
# List files from output directories
old_output_files
=
[
file
.
name
for
file
in
old_
output
_path
.
iterdir
()
if
file
.
is_file
()
and
not
file
.
name
.
startswith
(
'
.
'
)]
old_output_files
=
[
file
.
name
for
file
in
old_
map
_path
.
iterdir
()
if
file
.
is_file
()
and
not
file
.
name
.
startswith
(
'
.
'
)]
new_output_files
=
[
file
.
name
for
file
in
new_
output
_path
.
iterdir
()
if
file
.
is_file
()
and
not
file
.
name
.
startswith
(
'
.
'
)]
new_output_files
=
[
file
.
name
for
file
in
new_
map
_path
.
iterdir
()
if
file
.
is_file
()
and
not
file
.
name
.
startswith
(
'
.
'
)]
# Convert the lists to sets for easy comparison
# Convert the lists to sets for easy comparison
old_output_set
=
set
(
old_output_files
)
old_output_set
=
set
(
old_output_files
)
...
@@ -689,8 +690,8 @@ def diff(phen_dir, phen_old_dir):
...
@@ -689,8 +690,8 @@ def diff(phen_dir, phen_old_dir):
report
.
write
(
f
"
\n\n
## Compare concepts
{
str
(
old_phen_path
.
resolve
())
}
to
{
str
(
new_phen_path
.
resolve
())
}
\n\n
"
)
report
.
write
(
f
"
\n\n
## Compare concepts
{
str
(
old_phen_path
.
resolve
())
}
to
{
str
(
new_phen_path
.
resolve
())
}
\n\n
"
)
# Compare common outputs between versions
# Compare common outputs between versions
for
file
in
common_outputs
:
for
file
in
common_outputs
:
old_output
=
old_
output
_path
/
file
old_output
=
old_
map
_path
/
file
new_output
=
new_
output
_path
/
file
new_output
=
new_
map
_path
/
file
df1
=
pd
.
read_csv
(
old_output
)
df1
=
pd
.
read_csv
(
old_output
)
df1
=
df1
[[
"
CONCEPT
"
,
"
CONCEPT_SET
"
]].
groupby
(
"
CONCEPT_SET
"
).
count
()
df1
=
df1
[[
"
CONCEPT
"
,
"
CONCEPT_SET
"
]].
groupby
(
"
CONCEPT_SET
"
).
count
()
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment