Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
concepts-processing
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Package registry
Operate
Terraform modules
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
meldb
concepts-processing
Commits
8c0be40f
Commit
8c0be40f
authored
4 months ago
by
mjbonifa
Browse files
Options
Downloads
Patches
Plain Diff
added phen validate as a command to validate the configuration in a specified directory
parent
c156c2d0
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
acmc.py
+9
-0
9 additions, 0 deletions
acmc.py
phen.py
+69
-61
69 additions, 61 deletions
phen.py
with
78 additions
and
61 deletions
acmc.py
+
9
−
0
View file @
8c0be40f
...
@@ -26,6 +26,10 @@ def phen_init(args):
...
@@ -26,6 +26,10 @@ def phen_init(args):
"""
Handle the `phen init` command.
"""
"""
Handle the `phen init` command.
"""
phen
.
init
(
args
.
phen_dir
)
phen
.
init
(
args
.
phen_dir
)
def
phen_validate
(
args
):
"""
Handle the `phen validate` command.
"""
phen
.
validate
(
args
.
phen_dir
)
def
phen_map
(
args
):
def
phen_map
(
args
):
"""
Handle the `phen map` command.
"""
"""
Handle the `phen map` command.
"""
phen
.
map
(
args
.
phen_dir
,
phen
.
map
(
args
.
phen_dir
,
...
@@ -74,6 +78,11 @@ def main():
...
@@ -74,6 +78,11 @@ def main():
phen_init_parser
.
add_argument
(
"
-d
"
,
"
--phen-dir
"
,
type
=
str
,
default
=
phen
.
DEFAULT_PHEN_PATH
.
resolve
,
help
=
"
Phenotype directory
"
)
phen_init_parser
.
add_argument
(
"
-d
"
,
"
--phen-dir
"
,
type
=
str
,
default
=
phen
.
DEFAULT_PHEN_PATH
.
resolve
,
help
=
"
Phenotype directory
"
)
phen_init_parser
.
set_defaults
(
func
=
phen_init
)
phen_init_parser
.
set_defaults
(
func
=
phen_init
)
# phen validate
phen_validate_parser
=
phen_subparsers
.
add_parser
(
"
validate
"
,
help
=
"
Validate phenotype configuration
"
)
phen_validate_parser
.
add_argument
(
"
-d
"
,
"
--phen-dir
"
,
type
=
str
,
default
=
phen
.
DEFAULT_PHEN_PATH
.
resolve
,
help
=
"
Phenotype directory
"
)
phen_validate_parser
.
set_defaults
(
func
=
phen_validate
)
# phen map
# phen map
phen_map_parser
=
phen_subparsers
.
add_parser
(
"
map
"
,
help
=
"
Process phen configuration file
"
)
phen_map_parser
=
phen_subparsers
.
add_parser
(
"
map
"
,
help
=
"
Process phen configuration file
"
)
phen_map_parser
.
add_argument
(
"
-d
"
,
"
--phen-dir
"
,
type
=
str
,
default
=
phen
.
DEFAULT_PHEN_PATH
.
resolve
,
help
=
"
Phenotype directory
"
)
phen_map_parser
.
add_argument
(
"
-d
"
,
"
--phen-dir
"
,
type
=
str
,
default
=
phen
.
DEFAULT_PHEN_PATH
.
resolve
,
help
=
"
Phenotype directory
"
)
...
...
This diff is collapsed.
Click to expand it.
phen.py
+
69
−
61
View file @
8c0be40f
...
@@ -97,6 +97,69 @@ def init(phen_dir):
...
@@ -97,6 +97,69 @@ def init(phen_dir):
print
(
f
"
Phenotype initialised
"
)
print
(
f
"
Phenotype initialised
"
)
def
validate
(
phen_dir
):
print
(
f
"
Validating phenotype configuration
{
phen_dir
}
"
)
phen_path
=
Path
(
phen_dir
)
if
not
phen_path
.
is_dir
():
raise
NotADirectoryError
(
f
"
Error:
'
{
phen_path
}
'
is not a directory
"
)
config_path
=
phen_path
/
CONFIG_FILE
if
not
config_path
.
is_file
():
raise
FileNotFoundError
(
f
"
Error: phen configuration file
'
{
config_path
}
'
does not exist.
"
)
codes_path
=
phen_path
/
CODES_DIR
if
not
codes_path
.
is_dir
():
raise
FileNotFoundError
(
f
"
Error: source codes directory
{
source_codes_dir
}
does not exist.
"
)
# Load configuration File
if
config_path
.
suffix
==
"
.json
"
:
mapping
=
json
.
load
(
open
(
config_path
,
"
rb
"
))
else
:
raise
Exception
(
f
"
Unsupported configuration filetype:
{
str
(
config_path
.
resolve
())
}
"
)
concept_sets
=
mapping
[
"
concept_sets
"
]
concept_codes
=
mapping
[
"
codes
"
]
validation_errors
=
[]
concept_set_names
=
[]
for
item
in
concept_sets
[
'
concept_set
'
]:
concept_set_names
.
append
(
item
[
'
concept_set_name
'
])
for
item
in
concept_codes
:
# check concept codes path is a directory
concept_code_dir_path
=
codes_path
/
item
[
'
folder
'
]
if
not
concept_code_dir_path
.
is_dir
():
validation_errors
.
append
(
f
"
Folder directory
{
str
(
concept_code_dir_path
.
resolve
())
}
is not a directory
"
)
for
file
in
item
[
"
files
"
]:
# check concepte code file exists
concept_code_file_path
=
concept_code_dir_path
/
file
[
'
file
'
]
if
not
concept_code_file_path
.
exists
():
validation_errors
.
append
(
f
"
Coding file
{
str
(
concept_code_file_path
.
resolve
())
}
does not exist
"
)
# check columns specified are a supported medical coding type
for
column
in
file
[
'
columns
'
]:
if
column
not
in
code_types
and
column
!=
'
metadata
'
:
validation_errors
.
append
(
f
"
Column type
{
column
}
for file
{
concept_code_file_path
}
is not supported
"
)
# check concept_set defined for the mapping
for
concept_set_mapping
in
file
[
'
concept_set
'
]:
if
concept_set_mapping
not
in
concept_set_names
:
validation_errors
.
append
(
f
"
Concept set name
{
concept_set_mapping
}
for file
{
concept_code_file_path
}
does not exist in concept set list
"
)
# check the actions are supported
if
'
actions
'
in
file
:
for
action
in
file
[
'
actions
'
]:
if
action
not
in
COL_ACTIONS
:
validation_errors
.
append
(
f
"
Action
{
action
}
is not supported
"
)
if
len
(
validation_errors
)
>
0
:
print
(
validation_errors
)
raise
Exception
(
f
"
Configuration file
{
str
(
config_path
.
resolve
())
}
failed validation
"
)
print
(
f
"
Phenotype configuration validated successfully
"
)
def
read_table_file
(
path
,
excel_sheet
=
None
):
def
read_table_file
(
path
,
excel_sheet
=
None
):
"""
"""
Load Code List File
Load Code List File
...
@@ -115,7 +178,6 @@ def read_table_file(path, excel_sheet=None):
...
@@ -115,7 +178,6 @@ def read_table_file(path, excel_sheet=None):
return
df
return
df
def
preprocess_code
(
out
,
codes
,
checker
,
output_col
,
df_meta
,
verify
=
True
):
def
preprocess_code
(
out
,
codes
,
checker
,
output_col
,
df_meta
,
verify
=
True
):
codes
=
codes
.
astype
(
str
)
# convert to string
codes
=
codes
.
astype
(
str
)
# convert to string
codes
=
codes
.
str
.
strip
()
# remove excess spaces
codes
=
codes
.
str
.
strip
()
# remove excess spaces
...
@@ -226,46 +288,6 @@ def map_file(df, target_code_type, out, concepts, meta_columns=[], translate=Tru
...
@@ -226,46 +288,6 @@ def map_file(df, target_code_type, out, concepts, meta_columns=[], translate=Tru
out
=
pd
.
concat
([
out
,
codes
])
out
=
pd
.
concat
([
out
,
codes
])
return
out
return
out
def
validate_config
(
codes_path
,
mapping
):
concept_sets
=
mapping
[
"
concept_sets
"
]
concept_codes
=
mapping
[
"
codes
"
]
validation_errors
=
[]
concept_set_names
=
[]
for
item
in
concept_sets
[
'
concept_set
'
]:
concept_set_names
.
append
(
item
[
'
concept_set_name
'
])
for
item
in
concept_codes
:
# check concept codes path is a directory
concept_code_dir_path
=
codes_path
/
item
[
'
folder
'
]
if
not
concept_code_dir_path
.
is_dir
():
validation_errors
.
append
(
f
"
Folder directory
{
str
(
concept_code_dir_path
.
resolve
())
}
is not a directory
"
)
for
file
in
item
[
"
files
"
]:
# check concepte code file exists
concept_code_file_path
=
concept_code_dir_path
/
file
[
'
file
'
]
if
not
concept_code_file_path
.
exists
():
validation_errors
.
append
(
f
"
Coding file
{
str
(
concept_code_file_path
.
resolve
())
}
does not exist
"
)
# check columns specified are a supported medical coding type
for
column
in
file
[
'
columns
'
]:
if
column
not
in
code_types
and
column
!=
'
metadata
'
:
validation_errors
.
append
(
f
"
Column type
{
column
}
for file
{
concept_code_file_path
}
is not supported
"
)
# check concept_set defined for the mapping
for
concept_set_mapping
in
file
[
'
concept_set
'
]:
if
concept_set_mapping
not
in
concept_set_names
:
validation_errors
.
append
(
f
"
Concept set name
{
concept_set_mapping
}
for file
{
concept_code_file_path
}
does not exist in concept set list
"
)
# check the actions are supported
if
'
actions
'
in
file
:
for
action
in
file
[
'
actions
'
]:
if
action
not
in
COL_ACTIONS
:
validation_errors
.
append
(
f
"
Action
{
action
}
is not supported
"
)
return
validation_errors
def
sql_row_exist
(
conn
,
table
,
column
,
value
):
def
sql_row_exist
(
conn
,
table
,
column
,
value
):
# Execute and check if a result exists
# Execute and check if a result exists
cur
=
conn
.
cursor
()
cur
=
conn
.
cursor
()
...
@@ -290,28 +312,14 @@ def map(phen_dir,
...
@@ -290,28 +312,14 @@ def map(phen_dir,
else
:
else
:
print
(
"
Not verifying codes.
"
)
print
(
"
Not verifying codes.
"
)
phen_path
=
Path
(
phen_dir
)
# Validate the configuration
if
not
phen_path
.
is_dir
():
validate
(
phen_dir
)
raise
NotADirectoryError
(
f
"
Error:
'
{
phen_path
}
'
is not a directory
"
)
phen_path
=
Path
(
phen_dir
)
config_path
=
phen_path
/
CONFIG_FILE
config_path
=
phen_path
/
CONFIG_FILE
if
not
config_path
.
is_file
():
raise
FileNotFoundError
(
f
"
Error: phen configuration file
'
{
config_path
}
'
does not exist.
"
)
codes_path
=
phen_path
/
CODES_DIR
codes_path
=
phen_path
/
CODES_DIR
if
not
codes_path
.
is_dir
():
raise
FileNotFoundError
(
f
"
Error: source codes directory
{
source_codes_dir
}
does not exist.
"
)
# Load configuration File
if
config_path
.
suffix
==
"
.json
"
:
mapping
=
json
.
load
(
open
(
config_path
,
"
rb
"
))
mapping
=
json
.
load
(
open
(
config_path
,
"
rb
"
))
validation_errors
=
validate_config
(
codes_path
,
mapping
)
if
len
(
validation_errors
)
>
0
:
print
(
validation_errors
)
raise
Exception
(
f
"
Configuration file
{
str
(
config_path
.
resolve
())
}
failed validation
"
)
else
:
raise
Exception
(
f
"
Unsupported configuration filetype:
{
str
(
config_path
.
resolve
())
}
"
)
summary_config
=
mapping
[
"
concept_sets
"
]
summary_config
=
mapping
[
"
concept_sets
"
]
folders
=
mapping
[
"
codes
"
]
folders
=
mapping
[
"
codes
"
]
out
=
pd
.
DataFrame
([])
# Create Output dataframe to append to
out
=
pd
.
DataFrame
([])
# Create Output dataframe to append to
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment