Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • meldb/concepts-processing
1 result
Select Git revision
Show changes
Commits on Source (18)
......@@ -58,7 +58,7 @@ def _phen_validate(args: argparse.Namespace):
def _phen_map(args: argparse.Namespace):
"""Handle the `phen map` command."""
phen.map(args.phen_dir, args.target_coding)
phen.map(args.phen_dir, args.target_coding, args.not_translate, args.no_metadata)
def _phen_export(args: argparse.Namespace):
......@@ -78,7 +78,7 @@ def _phen_copy(args: argparse.Namespace):
def _phen_diff(args: argparse.Namespace):
"""Handle the `phen diff` command."""
phen.diff(args.phen_dir, args.version, args.old_phen_dir, args.old_version)
phen.diff(args.phen_dir, args.version, args.old_phen_dir, args.old_version, args.not_check_config)
def main():
......@@ -217,6 +217,18 @@ def main():
choices=parse.SUPPORTED_CODE_TYPES,
help=f"Specify the target coding {parse.SUPPORTED_CODE_TYPES}",
)
phen_map_parser.add_argument(
"--not-translate",
action='store_true',
default=False,
help="(Optional) Prevent any phenotype translation using NHS TRUD vocabularies.",
)
phen_map_parser.add_argument(
"--no-metadata",
action='store_true',
default=False,
help="(Optional) Prevent copying of metadata columns to output.",
)
phen_map_parser.set_defaults(func=_phen_map)
# phen export
......@@ -323,6 +335,12 @@ def main():
required=True,
help="Old phenotype version to compare with the changed version",
)
phen_diff_parser.add_argument(
"--not-check-config",
action='store_true',
default=False,
help="(Optional) Prevent loading and comparing config file, in the case where one does not exist",
)
phen_diff_parser.set_defaults(func=_phen_diff)
# Parse arguments
......
......@@ -192,7 +192,7 @@ class Read2(Proto):
class Read3(Proto):
def __init__(self):
super().__init__("Read3", trud.PROCESSED_PATH / "read3.parquet")
super().__init__("read3", trud.PROCESSED_PATH / "read3.parquet")
self.checks = [
(
......@@ -231,7 +231,7 @@ class Read3(Proto):
f"QA Alphanumeric Dot",
codes=codes,
codes_file=codes_file,
check_regex=codes.str.match(r"^[a-zA-Z0-9.]+$"),
mask=None,
code_type=self.name,
)
)
......@@ -246,7 +246,7 @@ class Read3(Proto):
f"QA In Database",
codes=codes,
codes_file=codes_file,
check_regex=self.in_database(codes, self.db, self.name),
mask=None,
code_type=self.name,
)
)
......
......@@ -127,21 +127,30 @@ CONFIG_SCHEMA = {
"type": "dict",
"schema": {
"name": {"type": "string", "required": True},
"file": {
"type": "dict",
"required": False,
"files": {
"type": "list",
"required": True,
"schema": {
"path": {"type": "string", "required": True},
"columns": {"type": "dict", "required": True},
"category": {
"type": "string"
}, # Optional but must be string if present
"actions": {
"type": "dict",
"schema": {"divide_col": {"type": "string"}},
"type": "dict",
"schema": {
"path": {"type": "string", "required": True},
"columns": {"type": "dict", "required": True},
"category": {
"type": "string"
}, # Optional but must be string if present
"actions": {
"type": "dict",
"schema": {
"divide_col": {"type": "string"},
"split_col": {"type": "string"},
"codes_col": {"type": "string"}
},
},
},
},
},
"metadata": {"type": "dict", "required": False},
},
},
},
......@@ -518,38 +527,39 @@ def validate(phen_dir: str):
concept_set_names.append(item["name"])
# check codes definition
for item in phenotype["concept_sets"]:
# check concepte code file exists
concept_code_file_path = concepts_path / item["file"]["path"]
if not concept_code_file_path.exists():
validation_errors.append(
f"Coding file {str(concept_code_file_path.resolve())} does not exist"
)
# check concepte code file is not empty
if concept_code_file_path.stat().st_size == 0:
validation_errors.append(
f"Coding file {str(concept_code_file_path.resolve())} is an empty file"
)
# check code file type is supported
if concept_code_file_path.suffix not in CODE_FILE_TYPES:
raise ValueError(
f"Unsupported filetype {concept_code_file_path.suffix}, only support csv, xlsx, xls code file types"
)
for files in phenotype["concept_sets"]:
for item in files["files"]:
# check concepte code file exists
concept_code_file_path = concepts_path / item["path"]
if not concept_code_file_path.exists():
validation_errors.append(
f"Coding file {str(concept_code_file_path.resolve())} does not exist"
)
# check columns specified are a supported medical coding type
for column in item["file"]["columns"]:
if column not in code_types:
# check concepte code file is not empty
if concept_code_file_path.stat().st_size == 0:
validation_errors.append(
f"Column type {column} for file {concept_code_file_path} is not supported"
f"Coding file {str(concept_code_file_path.resolve())} is an empty file"
)
# check the actions are supported
if "actions" in item["file"]:
for action in item["file"]["actions"]:
if action not in COL_ACTIONS:
validation_errors.append(f"Action {action} is not supported")
# check code file type is supported
if concept_code_file_path.suffix not in CODE_FILE_TYPES:
raise ValueError(
f"Unsupported filetype {concept_code_file_path.suffix}, only support csv, xlsx, xls code file types"
)
# check columns specified are a supported medical coding type
for column in item["columns"]:
if column not in code_types:
validation_errors.append(
f"Column type {column} for file {concept_code_file_path} is not supported"
)
# check the actions are supported
if "actions" in item:
for action in item["actions"]:
if action not in COL_ACTIONS:
validation_errors.append(f"Action {action} is not supported")
if len(validation_errors) > 0:
_logger.error(validation_errors)
......@@ -588,12 +598,12 @@ def _process_actions(df: pd.DataFrame, concept_set: dict) -> pd.DataFrame:
# Perform Structural Changes to file before preprocessing
_logger.debug("Processing file structural actions")
if (
"actions" in concept_set["file"]
and "split_col" in concept_set["file"]["actions"]
and "codes_col" in concept_set["file"]["actions"]
"actions" in concept_set
and "split_col" in concept_set["actions"]
and "codes_col" in concept_set["actions"]
):
split_col = concept_set["file"]["actions"]["split_col"]
codes_col = concept_set["file"]["actions"]["codes_col"]
split_col = concept_set["actions"]["split_col"]
codes_col = concept_set["actions"]["codes_col"]
_logger.debug(
"Action: Splitting",
split_col,
......@@ -621,12 +631,12 @@ def _preprocess_source_concepts(
# Preprocess codes
code_types = parse.CodeTypeParser().code_types
for code_type in concept_set["file"]["columns"]:
for code_type in concept_set["columns"]:
parser = code_types[code_type]
_logger.info(f"Processing {code_type} codes for {code_file_path}")
# get codes by column name
source_col_name = concept_set["file"]["columns"][code_type]
source_col_name = concept_set["columns"][code_type]
codes = df[source_col_name].dropna()
codes = codes.astype(str) # convert to string
codes = codes.str.strip() # remove excess spaces
......@@ -653,7 +663,7 @@ def _preprocess_source_concepts(
# Translate Df with multiple codes into single code type Series
def translate_codes(
source_df: pd.DataFrame, target_code_type: str, concept_name: str
source_df: pd.DataFrame, target_code_type: str, concept_name: str, not_translate:bool
) -> pd.DataFrame:
"""Translates each source code type the source coding list into a target type and returns all conversions as a concept set"""
......@@ -678,7 +688,7 @@ def translate_codes(
_logger.debug(
f"Target code type {target_code_type} is the same as source code type {len(source_df)}, copying codes rather than translating"
)
else:
elif not not_translate:
# get the translation filename using source to target code types
filename = f"{source_code_type}_to_{target_code_type}.parquet"
map_path = trud.PROCESSED_PATH / filename
......@@ -725,7 +735,7 @@ def _write_code_errors(code_errors: list, code_errors_path: Path):
"SOURCE": err.codes_file,
"CAUSE": err.message,
}
for err in code_errors
for err in code_errors if err.mask is not None
]
)
......@@ -773,7 +783,7 @@ def write_vocab_version(phen_path: Path):
)
def map(phen_dir: str, target_code_type: str):
def map(phen_dir: str, target_code_type: str, not_translate:bool, no_metadata:bool):
_logger.info(f"Processing phenotype: {phen_dir}")
# Validate configuration
......@@ -797,15 +807,15 @@ def map(phen_dir: str, target_code_type: str):
)
if target_code_type is not None:
_map_target_code_type(phen_path, phenotype, target_code_type)
_map_target_code_type(phen_path, phenotype, target_code_type, not_translate, no_metadata)
else:
for t in phenotype["map"]:
_map_target_code_type(phen_path, phenotype, t)
_map_target_code_type(phen_path, phenotype, t, not_translate, no_metadata)
_logger.info(f"Phenotype processed successfully")
def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str):
def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str, not_translate:bool, no_metadata:bool):
_logger.debug(f"Target coding format: {target_code_type}")
concepts_path = phen_path / CONCEPTS_DIR
# Create output dataframe
......@@ -813,67 +823,87 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
code_errors = []
# Process each folder in codes section
for concept_set in phenotype["concept_sets"]:
_logger.debug(f"--- {concept_set['file']} ---")
# Load code file
codes_file_path = Path(concepts_path / concept_set["file"]["path"])
df = _read_table_file(codes_file_path)
# process structural actions
df = _process_actions(df, concept_set)
# preprocessing and validate of source concepts
_logger.debug("Processing and validating source concept codes")
df, errors = _preprocess_source_concepts(
df,
concept_set,
codes_file_path,
)
# create df with just the source code columns
source_column_names = list(concept_set["file"]["columns"].keys())
source_df = df[source_column_names]
for files in phenotype["concept_sets"]:
concept_set_name = files["name"]
if "metadata" in files:
concept_set_metadata = files["metadata"]
else:
concept_set_metadata = {}
for concept_set in files["files"]:
_logger.debug(f"--- {concept_set} ---")
# Load code file
codes_file_path = Path(concepts_path / concept_set["path"])
df = _read_table_file(codes_file_path)
# process structural actions
df = _process_actions(df, concept_set)
# preprocessing and validate of source concepts
_logger.debug("Processing and validating source concept codes")
df, errors = _preprocess_source_concepts(
df,
concept_set,
codes_file_path,
)
_logger.debug(source_df.columns)
_logger.debug(source_df.head())
# create df with just the source code columns
source_column_names = list(concept_set["columns"].keys())
source_df = df[source_column_names]
_logger.debug(
f"Length of errors from _preprocess_source_concepts {len(errors)}"
)
if len(errors) > 0:
code_errors.extend(errors)
_logger.debug(f" Length of code_errors {len(code_errors)}")
_logger.debug(source_df.columns)
_logger.debug(source_df.head())
# Map source concepts codes to target codes
# if processing a source coding list with categorical data
if (
"actions" in concept_set["file"]
and "divide_col" in concept_set["file"]["actions"]
and len(df) > 0
):
divide_col = concept_set["file"]["actions"]["divide_col"]
_logger.debug(f"Action: Dividing Table by {divide_col}")
_logger.debug(f"column into: {df[divide_col].unique()}")
df_grp = df.groupby(divide_col)
for cat, grp in df_grp:
if cat == concept_set["file"]["category"]:
grp = grp.drop(columns=[divide_col]) # delete categorical column
source_df = grp[source_column_names]
trans_out = translate_codes(
source_df,
target_code_type=target_code_type,
concept_name=concept_set["name"],
)
out = pd.concat([out, trans_out])
else:
source_df = df[source_column_names]
trans_out = translate_codes(
source_df,
target_code_type=target_code_type,
concept_name=concept_set["name"],
_logger.debug(
f"Length of errors from _preprocess_source_concepts {len(errors)}"
)
out = pd.concat([out, trans_out])
if len(errors) > 0:
code_errors.extend(errors)
_logger.debug(f" Length of code_errors {len(code_errors)}")
# Map source concepts codes to target codes
# if processing a source coding list with categorical data
if (
"actions" in concept_set
and "divide_col" in concept_set["actions"]
and len(df) > 0
):
divide_col = concept_set["actions"]["divide_col"]
_logger.debug(f"Action: Dividing Table by {divide_col}")
_logger.debug(f"column into: {df[divide_col].unique()}")
df_grp = df.groupby(divide_col)
for cat, grp in df_grp:
if cat == concept_set["category"]:
grp = grp.drop(
columns=[divide_col]
) # delete categorical column
source_df = grp[source_column_names]
trans_out = translate_codes(
source_df,
target_code_type=target_code_type,
concept_name=concept_set_name,
not_translate=not_translate,
)
trans_out = add_metadata(
codes=trans_out,
metadata=concept_set_metadata,
no_metadata=no_metadata,
)
out = pd.concat([out, trans_out])
else:
source_df = df[source_column_names]
trans_out = translate_codes(
source_df,
target_code_type=target_code_type,
concept_name=concept_set_name,
not_translate=not_translate,
)
trans_out = add_metadata(
codes=trans_out,
metadata=concept_set_metadata,
no_metadata=no_metadata,
)
out = pd.concat([out, trans_out])
if len(code_errors) > 0:
_logger.error(f"The map processing has {len(code_errors)} errors")
......@@ -894,48 +924,51 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
out = out.drop_duplicates(subset=["CONCEPT_SET", "CONCEPT"])
out = out.sort_values(by=["CONCEPT_SET", "CONCEPT"])
out_count = len(out.index)
# out_count = len(out.index)
# added metadata
# Loop over each source_concept_type and perform the left join on all columns apart from source code columns
result_list = []
source_column_names = list(concept_set["file"]["columns"].keys())
for source_concept_type in source_column_names:
# Filter output based on the current source_concept_type
out_filtered_df = out[out["SOURCE_CONCEPT_TYPE"] == source_concept_type]
filtered_count = len(out_filtered_df.index)
# Remove the source type columns except the current type will leave the metadata and the join
remove_types = [
type for type in source_column_names if type != source_concept_type
]
metadata_df = df.drop(columns=remove_types)
metadata_df = metadata_df.rename(
columns={source_concept_type: "SOURCE_CONCEPT"}
)
metadata_df_count = len(metadata_df.index)
# Perform the left join with df2 on SOURCE_CONCEPT to add the metadata
result = pd.merge(out_filtered_df, metadata_df, how="left", on="SOURCE_CONCEPT")
result_count = len(result.index)
_logger.debug(
f"Adding metadata for {source_concept_type}: out_count {out_count}, filtered_count {filtered_count}, metadata_df_count {metadata_df_count}, result_count {result_count}"
)
# Append the result to the result_list
result_list.append(result)
# result_list = []
# for files in phenotype["concept_sets"]:
# concept_set_name = files["name"]
# for concept_set in files["files"]:
# source_column_names = list(concept_set["columns"].keys())
# for source_concept_type in source_column_names:
# # Filter output based on the current source_concept_type
# out_filtered_df = out[out["SOURCE_CONCEPT_TYPE"] == source_concept_type]
# filtered_count = len(out_filtered_df.index)
# # Remove the source type columns except the current type will leave the metadata and the join
# remove_types = [
# type for type in source_column_names if type != source_concept_type
# ]
# metadata_df = df.drop(columns=remove_types)
# metadata_df = metadata_df.rename(
# columns={source_concept_type: "SOURCE_CONCEPT"}
# )
# metadata_df_count = len(metadata_df.index)
# Perform the left join with df2 on SOURCE_CONCEPT to add the metadata
# result = pd.merge(out_filtered_df, metadata_df, how="left", on="SOURCE_CONCEPT")
# result_count = len(result.index)
# _logger.debug(
# f"Adding metadata for {source_concept_type}: out_count {out_count}, filtered_count {filtered_count}, metadata_df_count {metadata_df_count}, result_count {result_count}"
# )
# # Append the result to the result_list
# result_list.append(result)
# Concatenate all the results into a single DataFrame
final_out = pd.concat(result_list, ignore_index=True)
final_out = final_out.drop_duplicates(subset=["CONCEPT_SET", "CONCEPT"])
_logger.debug(
f"Check metadata processing counts: before {len(out.index)} : after {len(final_out.index)}"
)
# final_out = pd.concat(result_list, ignore_index=True)
# final_out = final_out.drop_duplicates(subset=["CONCEPT_SET", "CONCEPT"])
# _logger.debug(
# f"Check metadata processing counts: before {len(out.index)} : after {len(final_out.index)}"
# )
# Save output to map directory
output_filename = target_code_type + ".csv"
map_path = phen_path / MAP_DIR / output_filename
final_out.to_csv(map_path, index=False)
out.to_csv(map_path, index=False)
_logger.info(f"Saved mapped concepts to {str(map_path.resolve())}")
# save concept sets as separate files
......@@ -950,7 +983,7 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
concept_set_path.mkdir(parents=True, exist_ok=True)
# write each concept as a separate file
for name, concept in final_out.groupby("CONCEPT_SET"):
for name, concept in out.groupby("CONCEPT_SET"):
concept = concept.sort_values(by="CONCEPT") # sort rows
concept = concept.dropna(how="all", axis=1) # remove empty cols
concept = concept.reindex(
......@@ -965,6 +998,22 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
_logger.info(f"Phenotype processed target code type {target_code_type}")
# Add metadata dict to each row of Df codes
def add_metadata(
codes: pd.DataFrame, metadata: dict, no_metadata:bool,
) -> pd.DataFrame:
"""Add concept set metadata, stored as a dictionary, to each concept row"""
if not no_metadata:
for meta_name, meta_value in metadata.items():
codes[meta_name] = meta_value
_logger.debug(
f"Adding metadata for concept set: metadata name {meta_name}, metadata value {meta_value}"
)
return codes
def _generate_version_tag(
repo: git.Repo, increment: str = DEFAULT_VERSION_INC, use_v_prefix: bool = False
) -> str:
......@@ -1165,7 +1214,7 @@ def copy(phen_dir: str, target_dir: str, version: str):
def extract_concepts(config_data: dict) -> Tuple[dict, Set[str]]:
"""Extracts concepts as {name: file_path} dictionary and a name set."""
concepts_dict = {
item["name"]: item["file"]["path"]
item["name"]: [file["path"] for file in item["files"]]
for item in config_data["phenotype"]["concept_sets"]
}
name_set = set(concepts_dict.keys())
......@@ -1190,7 +1239,7 @@ def diff_config(old_config: dict, new_config: dict) -> str:
old_concepts, old_names = extract_concepts(old_config)
new_concepts, new_names = extract_concepts(new_config)
# Check added and removed names
# Check added and removed concept set names
added_names = new_names - old_names # Names that appear in new but not in old
removed_names = old_names - new_names # Names that were in old but not in new
......@@ -1331,37 +1380,42 @@ def diff_phen(
old_phen_path: Path,
old_version: str,
report_path: Path,
not_check_config:bool,
):
"""Compare the differences between two versions of a phenotype"""
# validate phenotypes
_logger.debug(f"Validating for diff old path: {str(old_phen_path.resolve())}")
validate(str(old_phen_path.resolve()))
_logger.debug(f"Validating for diff new path: {str(new_phen_path.resolve())}")
validate(str(new_phen_path.resolve()))
# get old and new config
old_config_path = old_phen_path / CONFIG_FILE
with old_config_path.open("r") as file:
old_config = yaml.safe_load(file)
new_config_path = new_phen_path / CONFIG_FILE
with new_config_path.open("r") as file:
new_config = yaml.safe_load(file)
# write report heading
report = f"# Phenotype Comparison Report\n"
report += f"## Original phenotype\n"
report += f" - {old_config['phenotype']['omop']['vocabulary_id']}\n"
report += f" - {old_version}\n"
report += f" - {str(old_phen_path.resolve())}\n"
report += f"## Changed phenotype:\n"
report += f" - {new_config['phenotype']['omop']['vocabulary_id']}\n"
report += f" - {new_version}\n"
report += f" - {str(new_phen_path.resolve())}\n"
# Step 1: check differences configuration files
# Convert list of dicts into a dict: {name: file}
report += diff_config(old_config, new_config)
if not not_check_config:
# validate phenotypes
_logger.debug(f"Validating for diff old path: {str(old_phen_path.resolve())}")
validate(str(old_phen_path.resolve()))
_logger.debug(f"Validating for diff new path: {str(new_phen_path.resolve())}")
validate(str(new_phen_path.resolve()))
# get old and new config
old_config_path = old_phen_path / CONFIG_FILE
with old_config_path.open("r") as file:
old_config = yaml.safe_load(file)
new_config_path = new_phen_path / CONFIG_FILE
with new_config_path.open("r") as file:
new_config = yaml.safe_load(file)
# write report
report += f"## Original phenotype\n"
report += f" - {old_config['phenotype']['omop']['vocabulary_id']}\n"
report += f" - {old_version}\n"
report += f" - {str(old_phen_path.resolve())}\n"
report += f"## Changed phenotype:\n"
report += f" - {new_config['phenotype']['omop']['vocabulary_id']}\n"
report += f" - {new_version}\n"
report += f" - {str(new_phen_path.resolve())}\n"
# Convert list of dicts into a dict: {name: file}
report += diff_config(old_config, new_config)
# Step 2: check differences between map files
# List files from output directories
......@@ -1378,7 +1432,7 @@ def diff_phen(
_logger.info(f"Phenotypes diff'd successfully")
def diff(phen_dir: str, version: str, old_phen_dir: str, old_version: str):
def diff(phen_dir: str, version: str, old_phen_dir: str, old_version: str, not_check_config:bool):
# make tmp directory .acmc
timestamp = time.strftime("%Y%m%d_%H%M%S")
temp_dir = Path(f".acmc/diff_{timestamp}")
......@@ -1439,7 +1493,7 @@ def diff(phen_dir: str, version: str, old_phen_dir: str, old_version: str):
report_filename = f"{version}_{old_version}_diff.md"
report_path = changed_phen_path / report_filename
# diff old with new
diff_phen(changed_path, version, old_path, old_version, report_path)
diff_phen(changed_path, version, old_path, old_version, report_path, not_check_config)
finally:
# clean up tmp directory
......
......@@ -760,7 +760,7 @@
<section id="SUPPORTED_CODE_TYPES">
<div class="attr variable">
<span class="name">SUPPORTED_CODE_TYPES</span> =
<span class="default_value">{&#39;opcs4&#39;, &#39;icd10&#39;, &#39;atc&#39;, &#39;snomed&#39;, &#39;read2&#39;, &#39;read3&#39;}</span>
<span class="default_value">{&#39;atc&#39;, &#39;read2&#39;, &#39;read3&#39;, &#39;opcs4&#39;, &#39;snomed&#39;, &#39;icd10&#39;}</span>
</div>
......
Source diff could not be displayed: it is too large. Options to address this: view the blob.
Source diff could not be displayed: it is too large. Options to address this: view the blob.
......@@ -111,6 +111,9 @@ The `phen` command is used phenotype-related operations.
- `-t`, `--target-coding`: (Optional) Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`).
- `-d`, `--phen-dir`: (Optional) Local phenotype workspace directory (default is ./workspace/phen).
- `--not-translate`: (Optional) Prevent any phenotype translation using NHS TRUD vocabularies. Therefore only concepts in already in the traget coding will be mapped.
- `--no-metadata`: (Optional) Prevent copying of metadata columns to output.
- **Publish Phenotype Configuration**
......
......@@ -8,9 +8,8 @@ phenotype:
- "read2"
- "read3"
concept_sets:
- name: "ABDO_PAIN"
file:
path: "clinical-codes-org/Symptom code lists/Abdominal pain/res176-abdominal-pain.csv"
- name: "ABDO_PAIN"
files:
- path: "clinical-codes-org/Symptom code lists/Abdominal pain/res176-abdominal-pain.csv"
columns:
read2: "code"
......@@ -8,13 +8,13 @@ phenotype:
- "read2"
- "read3"
concept_sets:
- name: "CVD_EVENTS"
file:
path: "clinical-codes-org/Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv"
- name: "CVD_EVENTS"
files:
- path: "clinical-codes-org/Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv"
columns:
icd10: "code"
- name: "DID_NOT_ATTEND"
file:
path: "clinical-codes-org/Non-attendance codes/res201-did-not-attend-appointment.csv"
icd10: "code"
- name: "DID_NOT_ATTEND"
files:
- path: "clinical-codes-org/Non-attendance codes/res201-did-not-attend-appointment.csv"
columns:
read2: "code"
\ No newline at end of file
......@@ -10,29 +10,29 @@ phenotype:
- "snomed"
concept_sets:
- name: "CVD_EVENTS"
file:
path: "clinical-codes-org/Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv"
columns:
icd10: "code"
files:
- path: "clinical-codes-org/Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv"
columns:
icd10: "code"
- name: "DID_NOT_ATTEND"
file:
path: "clinical-codes-org/Non-attendance codes/res201-did-not-attend-appointment.csv"
columns:
read2: "code"
files:
- path: "clinical-codes-org/Non-attendance codes/res201-did-not-attend-appointment.csv"
columns:
read2: "code"
- name: "HYPERTENSION"
file:
path: "hanlon/Read_codes_for_diagnoses.csv"
columns:
read2: "Read Code"
category: "2"
actions:
divide_col: "MMCode"
files:
- path: "hanlon/Read_codes_for_diagnoses.csv"
columns:
read2: "Read Code"
category: "2"
actions:
divide_col: "MMCode"
- name: "DEPRESSION"
file:
path: "hanlon/Read_codes_for_diagnoses.csv"
columns:
read2: "Read Code"
category: "3"
actions:
divide_col: "MMCode"
files:
- path: "hanlon/Read_codes_for_diagnoses.csv"
columns:
read2: "Read Code"
category: "3"
actions:
divide_col: "MMCode"
......@@ -128,26 +128,6 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
main.main()
assert "Phenotype published successfully" in caplog.text
# copy phenotype'
with caplog.at_level(logging.DEBUG):
monkeypatch.setattr(
sys,
"argv",
[
"main.py",
"phen",
"copy",
"-d",
str(phen_path.resolve()),
"-td",
str(tmp_dir.resolve()),
"-v",
"0.0.1",
],
)
main.main()
assert "Phenotype copied successfully" in caplog.text
# diff phenotype
with caplog.at_level(logging.DEBUG):
old_path = tmp_dir / "0.0.1"
......