Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
C
concepts-processing
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Package registry
Operate
Terraform modules
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
meldb
concepts-processing
Commits
3c0f4c4a
Commit
3c0f4c4a
authored
5 months ago
by
Jakub Dylag
Browse files
Options
Downloads
Patches
Plain Diff
Rename "code" -> "CONCEPT" and "MELDB_concept" -> "CONCEPT_SET"
parent
4d8cbf8a
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
main.py
+9
-9
9 additions, 9 deletions
main.py
publish.py
+2
-2
2 additions, 2 deletions
publish.py
report.py
+4
-4
4 additions, 4 deletions
report.py
with
15 additions
and
15 deletions
main.py
+
9
−
9
View file @
3c0f4c4a
...
...
@@ -134,11 +134,11 @@ def map_file(df, target_code_type, out, concepts, meta_columns=[], no_translate=
#Append to out df
if
len
(
codes
)
>
0
:
codes
=
pd
.
DataFrame
({
"
code
"
:
codes
"
CONCEPT
"
:
codes
})
codes
=
codes
.
join
(
df_meta
)
for
concept
in
concepts
:
codes
[
"
MELDB_concept
"
]
=
np
.
repeat
(
concept
.
strip
(),
len
(
codes
))
codes
[
"
CONCEPT_SET
"
]
=
np
.
repeat
(
concept
.
strip
(),
len
(
codes
))
out
=
pd
.
concat
([
out
,
codes
])
return
out
...
...
@@ -200,7 +200,7 @@ def omop_publish_concept_sets(out, db_path, vocab_output, vocab_type):
conn
=
sqlite3
.
connect
(
db_path
)
cur
=
conn
.
cursor
()
for
concept_set_name
,
grp
in
out
.
groupby
(
"
MELDB_concept
"
):
for
concept_set_name
,
grp
in
out
.
groupby
(
"
CONCEPT_SET
"
):
#Create Concept_Set
if
not
sql_row_exist
(
conn
,
"
CONCEPT_SET
"
,
"
concept_set_name
"
,
concept_set_name
):
cur
.
execute
(
f
"
INSERT INTO CONCEPT_SET (concept_set_name, vocabulary_id) VALUES (
'
{
concept_set_name
}
'
,
'
MELDB
'
);
"
)
...
...
@@ -214,7 +214,7 @@ def omop_publish_concept_sets(out, db_path, vocab_output, vocab_type):
concept_set_id
=
cur
.
fetchone
()[
0
]
#Get corresponing Concept_id (OMOP) for each Concept_code (e.g. SNOMED)
concept_codes
=
"'"
+
"'
,
'"
.
join
(
list
(
grp
[
"
code
"
].
astype
(
str
)))
+
"'"
concept_codes
=
"'"
+
"'
,
'"
.
join
(
list
(
grp
[
"
CONCEPT
"
].
astype
(
str
)))
+
"'"
query
=
f
"
SELECT concept_id FROM CONCEPT WHERE vocabulary_id = ? AND concept_code IN (
{
concept_codes
}
);
"
cur
.
execute
(
query
,
(
vocab_type
,
))
df_out
=
pd
.
DataFrame
(
cur
.
fetchall
(),
columns
=
[
"
concept_id
"
])
...
...
@@ -329,8 +329,8 @@ def run_all(mapping_file, target_code_type,
#Final Processing
out
=
out
.
reset_index
(
drop
=
True
)
out
=
out
.
drop_duplicates
(
subset
=
[
"
MELDB_concept
"
,
"
code
"
])
out
=
out
.
sort_values
(
by
=
[
"
MELDB_concept
"
,
"
code
"
])
out
=
out
.
drop_duplicates
(
subset
=
[
"
CONCEPT_SET
"
,
"
CONCEPT
"
])
out
=
out
.
sort_values
(
by
=
[
"
CONCEPT_SET
"
,
"
CONCEPT
"
])
#Merge with Concept Types in Summary Excel File
summary_config
=
mapping
[
"
concepts
"
]
...
...
@@ -346,9 +346,9 @@ def run_all(mapping_file, target_code_type,
summary_cols_all
+=
v
summary_df
=
summary_df
[
summary_cols_all
]
#select all relevant columns
summary_df
=
summary_df
.
rename
(
columns
=
{
summary_config
[
"
columns
"
][
"
concept_name
"
]:
"
MELDB_concept
"
})
summary_df
=
summary_df
.
rename
(
columns
=
{
summary_config
[
"
columns
"
][
"
concept_name
"
]:
"
CONCEPT_SET
"
})
summary_df
=
summary_df
.
drop_duplicates
()
#remove duplicates
out
=
out
.
merge
(
summary_df
,
how
=
"
left
"
,
on
=
'
MELDB_concept
'
)
out
=
out
.
merge
(
summary_df
,
how
=
"
left
"
,
on
=
'
CONCEPT_SET
'
)
# Save Output File
print
(
bcolors
.
HEADER
,
"
---
"
*
5
,
"
OUTPUT
"
,
"
---
"
*
5
,
bcolors
.
ENDC
)
...
...
@@ -379,7 +379,7 @@ def run_all(mapping_file, target_code_type,
if
os
.
path
.
exists
(
log_errors_path
):
error_df
=
pd
.
read_csv
(
log_errors_path
)
error_df
=
error_df
.
drop_duplicates
()
#Remove Duplicates from Error file
error_df
=
error_df
.
sort_values
(
by
=
[
"
SOURCE
"
,
"
CODE_TYPE
"
,
"
CODE
"
])
error_df
=
error_df
.
sort_values
(
by
=
[
"
SOURCE
"
,
"
VOCABULARY
"
,
"
CODE
"
])
error_df
.
to_csv
(
log_errors_path
,
index
=
False
)
...
...
This diff is collapsed.
Click to expand it.
publish.py
+
2
−
2
View file @
3c0f4c4a
...
...
@@ -9,8 +9,8 @@ def main(config):
else
:
raise
Exception
(
"
Concepts file must be
'
.csv
'
filetype
"
)
for
name
,
concept
in
df
.
groupby
(
"
MELDB_concept
"
):
concept
=
concept
.
sort_values
(
by
=
"
code
"
)
#sort rows
for
name
,
concept
in
df
.
groupby
(
"
CONCEPT_SET
"
):
concept
=
concept
.
sort_values
(
by
=
"
CONCEPT
"
)
#sort rows
concept
=
concept
.
dropna
(
how
=
'
all
'
,
axis
=
1
)
#remove empty cols
concept
=
concept
.
reindex
(
sorted
(
concept
.
columns
),
axis
=
1
)
#sort cols alphabetically
...
...
This diff is collapsed.
Click to expand it.
report.py
+
4
−
4
View file @
3c0f4c4a
...
...
@@ -111,9 +111,9 @@ def test_concept_changes(config, report):
report
.
write
(
f
"
`
{
out1
}
` to `
{
out2
}
`
\n
"
)
df1
=
pd
.
read_csv
(
out1
)
df1
=
df1
[[
"
code
"
,
"
MELDB_concept
"
]].
groupby
(
"
MELDB_concept
"
).
count
()
df1
=
df1
[[
"
CONCEPT
"
,
"
CONCEPT_SET
"
]].
groupby
(
"
CONCEPT_SET
"
).
count
()
df2
=
pd
.
read_csv
(
out2
)
df2
=
df2
[[
"
code
"
,
"
MELDB_concept
"
]].
groupby
(
"
MELDB_concept
"
).
count
()
df2
=
df2
[[
"
CONCEPT
"
,
"
CONCEPT_SET
"
]].
groupby
(
"
CONCEPT_SET
"
).
count
()
#Added/Removed Concepts
report
.
write
(
"
- Removed Concepts {}
\n
"
.
format
(
list
(
set
(
df1
.
index
)
-
set
(
df2
.
index
))))
...
...
@@ -121,10 +121,10 @@ def test_concept_changes(config, report):
#Changed Concepts
diff
=
df2
-
df1
#diff in counts
diff
=
diff
[(
~
(
diff
[
"
code
"
]
==
0.0
))
&
diff
[
"
code
"
].
notna
()]
#get non-zero counts
diff
=
diff
[(
~
(
diff
[
"
CONCEPT
"
]
==
0.0
))
&
diff
[
"
CONCEPT
"
].
notna
()]
#get non-zero counts
s
=
"
\n
"
for
concept
,
row
in
diff
.
iterrows
():
s
+=
"
\t
- {} {}
\n
"
.
format
(
concept
,
row
[
"
code
"
])
s
+=
"
\t
- {} {}
\n
"
.
format
(
concept
,
row
[
"
CONCEPT
"
])
report
.
write
(
"
- Changed Concepts {}
\n\n
"
.
format
(
s
))
# ✅ ❌
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment