Skip to content
Snippets Groups Projects
Commit 3c0f4c4a authored by Jakub Dylag's avatar Jakub Dylag
Browse files

Rename "code" -> "CONCEPT" and "MELDB_concept" -> "CONCEPT_SET"

parent 4d8cbf8a
No related branches found
No related tags found
No related merge requests found
......@@ -134,11 +134,11 @@ def map_file(df, target_code_type, out, concepts, meta_columns=[], no_translate=
#Append to out df
if len(codes) > 0:
codes = pd.DataFrame({
"code":codes
"CONCEPT":codes
})
codes = codes.join(df_meta)
for concept in concepts:
codes["MELDB_concept"] = np.repeat(concept.strip(), len(codes))
codes["CONCEPT_SET"] = np.repeat(concept.strip(), len(codes))
out = pd.concat([out, codes])
return out
......@@ -200,7 +200,7 @@ def omop_publish_concept_sets(out, db_path, vocab_output, vocab_type):
conn = sqlite3.connect(db_path)
cur = conn.cursor()
for concept_set_name, grp in out.groupby("MELDB_concept"):
for concept_set_name, grp in out.groupby("CONCEPT_SET"):
#Create Concept_Set
if not sql_row_exist(conn, "CONCEPT_SET", "concept_set_name", concept_set_name):
cur.execute(f"INSERT INTO CONCEPT_SET (concept_set_name, vocabulary_id) VALUES ('{concept_set_name}', 'MELDB');")
......@@ -214,7 +214,7 @@ def omop_publish_concept_sets(out, db_path, vocab_output, vocab_type):
concept_set_id = cur.fetchone()[0]
#Get corresponing Concept_id (OMOP) for each Concept_code (e.g. SNOMED)
concept_codes = "'"+"', '".join(list(grp["code"].astype(str)))+"'"
concept_codes = "'"+"', '".join(list(grp["CONCEPT"].astype(str)))+"'"
query = f"SELECT concept_id FROM CONCEPT WHERE vocabulary_id = ? AND concept_code IN ({concept_codes});"
cur.execute(query, (vocab_type, ))
df_out = pd.DataFrame(cur.fetchall(), columns=["concept_id"])
......@@ -329,8 +329,8 @@ def run_all(mapping_file, target_code_type,
#Final Processing
out = out.reset_index(drop=True)
out = out.drop_duplicates(subset=["MELDB_concept", "code"])
out = out.sort_values(by=["MELDB_concept", "code"])
out = out.drop_duplicates(subset=["CONCEPT_SET", "CONCEPT"])
out = out.sort_values(by=["CONCEPT_SET", "CONCEPT"])
#Merge with Concept Types in Summary Excel File
summary_config = mapping["concepts"]
......@@ -346,9 +346,9 @@ def run_all(mapping_file, target_code_type,
summary_cols_all += v
summary_df = summary_df[summary_cols_all] #select all relevant columns
summary_df = summary_df.rename(columns={summary_config["columns"]["concept_name"]: "MELDB_concept"})
summary_df = summary_df.rename(columns={summary_config["columns"]["concept_name"]: "CONCEPT_SET"})
summary_df = summary_df.drop_duplicates() #remove duplicates
out = out.merge(summary_df, how="left", on='MELDB_concept')
out = out.merge(summary_df, how="left", on='CONCEPT_SET')
# Save Output File
print(bcolors.HEADER, "---"*5, "OUTPUT", "---"*5, bcolors.ENDC)
......@@ -379,7 +379,7 @@ def run_all(mapping_file, target_code_type,
if os.path.exists(log_errors_path):
error_df = pd.read_csv(log_errors_path)
error_df = error_df.drop_duplicates() #Remove Duplicates from Error file
error_df = error_df.sort_values(by=["SOURCE", "CODE_TYPE", "CODE"])
error_df = error_df.sort_values(by=["SOURCE", "VOCABULARY", "CODE"])
error_df.to_csv(log_errors_path, index=False)
......
......@@ -9,8 +9,8 @@ def main(config):
else:
raise Exception("Concepts file must be '.csv' filetype")
for name, concept in df.groupby("MELDB_concept"):
concept = concept.sort_values(by="code") #sort rows
for name, concept in df.groupby("CONCEPT_SET"):
concept = concept.sort_values(by="CONCEPT") #sort rows
concept = concept.dropna(how='all', axis=1) #remove empty cols
concept = concept.reindex(sorted(concept.columns), axis=1) #sort cols alphabetically
......
......@@ -111,9 +111,9 @@ def test_concept_changes(config, report):
report.write(f"`{out1}` to `{out2}`\n")
df1 = pd.read_csv(out1)
df1 = df1[["code","MELDB_concept"]].groupby("MELDB_concept").count()
df1 = df1[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count()
df2 = pd.read_csv(out2)
df2 = df2[["code","MELDB_concept"]].groupby("MELDB_concept").count()
df2 = df2[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count()
#Added/Removed Concepts
report.write("- Removed Concepts {}\n".format(list(set(df1.index) - set(df2.index))))
......@@ -121,10 +121,10 @@ def test_concept_changes(config, report):
#Changed Concepts
diff = df2 - df1 #diff in counts
diff = diff[(~(diff["code"] == 0.0)) & diff["code"].notna()] #get non-zero counts
diff = diff[(~(diff["CONCEPT"] == 0.0)) & diff["CONCEPT"].notna()] #get non-zero counts
s = "\n"
for concept, row in diff.iterrows():
s += "\t - {} {}\n".format(concept, row["code"])
s += "\t - {} {}\n".format(concept, row["CONCEPT"])
report.write("- Changed Concepts {}\n\n".format(s))
# ✅ ❌
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment