Skip to content
Snippets Groups Projects
Commit e549bf4b authored by mjbonifa's avatar mjbonifa
Browse files

fix: added map definition to the config.yaml so that we know which maps are...

fix: added map definition to the config.yaml so that we know which maps are expected for the phenotype. It is still possible to run one of them using the -t option but they must be specified the config file. This means a user can run acmc phen map and all the required codes are created. It also reduces the chance of inconsistency between the map files generated between versions. It does not remove it entirely because it is still possible for a user to only run with a subset of the coding types but that should be discouraged. We retain the option for phenottype development because you might not want to run everything all of the time due the time it takes. Closes #40.
parent 3ac3a386
Branches
No related tags found
No related merge requests found
......@@ -215,7 +215,7 @@ Expected Output:
Use the following `acmc` command to generate the phenotype in `read2` format:
```bash
acmc phen map -t read2
acmc phen map
```
Expected Output:
......
......@@ -3,7 +3,7 @@ import logging
from pathlib import Path
import acmc
from acmc import trud, omop, phen, logging_config as lc
from acmc import trud, omop, phen, parse, logging_config as lc
# setup logging
logger = lc.setup_logger()
......@@ -167,17 +167,8 @@ def main():
phen_map_parser.add_argument(
"-t",
"--target-coding",
required=True,
choices=["read2", "read3", "icd10", "snomed", "opcs4"],
help="Specify the target coding (read2, read3, icd10, snomed, opcs4)",
)
phen_map_parser.add_argument(
"-o",
"--output",
choices=["csv", "omop"],
nargs="+", # allows one or more values
default=["csv"], # default to CSV if not specified
help="Specify output format(s): 'csv', 'omop', or both (default: csv)",
choices=parse.SUPPORTED_CODE_TYPES,
help=f"Specify the target coding {parse.SUPPORTED_CODE_TYPES}",
)
phen_map_parser.set_defaults(func=phen_map)
......
......@@ -8,6 +8,9 @@ from acmc import trud, logging_config as lc
# setup logging
logger = lc.setup_logger()
# Define allowed values
SUPPORTED_CODE_TYPES = {"read2", "read3", "icd10", "snomed", "opcs4", "atc"}
class CodesError:
"""A class used in InvalidCodesException to report an error if a code parser check fails"""
......
......@@ -70,6 +70,15 @@ CONFIG_SCHEMA = {
},
},
},
"map": {
"type": "list",
"schema": {
"type": "string",
"allowed": list(
parse.SUPPORTED_CODE_TYPES
), # Ensure only predefined values are allowed
},
},
"concept_sets": {
"type": "list",
"required": True,
......@@ -260,6 +269,7 @@ def init(phen_dir, remote_url):
"vocabulary_name": "",
"vocabulary_reference": "",
},
"translate": [],
"concept_sets": [],
}
}
......@@ -603,7 +613,6 @@ def write_vocab_version(phen_path):
def map(phen_dir, target_code_type):
logger.info(f"Processing phenotype: {phen_dir}")
logger.debug(f"Target coding format: {target_code_type}")
# Validate configuration
validate(phen_dir)
......@@ -611,13 +620,33 @@ def map(phen_dir, target_code_type):
# initialise paths
phen_path = Path(phen_dir)
config_path = phen_path / CONFIG_FILE
codes_path = phen_path / CODES_DIR
# load configuration
with config_path.open("r") as file:
config = yaml.safe_load(file)
phenotype = config["phenotype"]
if len(phenotype["map"]) == 0:
raise ValueError(f"No map codes defined in the phenotype configuration")
if target_code_type is not None and target_code_type not in phenotype["map"]:
raise ValueError(
f"Target code type {target_code_type} not in phenotype configuration map {phenotype['map']}"
)
if target_code_type is not None:
map_target_code_type(phen_path, phenotype, target_code_type)
else:
for t in phenotype["map"]:
map_target_code_type(phen_path, phenotype, t)
logger.info(f"Phenotype processed successfully")
def map_target_code_type(phen_path, phenotype, target_code_type):
logger.debug(f"Target coding format: {target_code_type}")
codes_path = phen_path / CODES_DIR
# Create output dataframe
out = pd.DataFrame([])
code_errors = []
......@@ -717,7 +746,7 @@ def map(phen_dir, target_code_type):
write_vocab_version(phen_path)
logger.info(f"Phenotype processed successfully")
logger.info(f"Phenotype processed target code type {target_code_type}")
def publish(phen_dir):
......
......@@ -96,9 +96,8 @@ The `phen` command is used phenotype-related operations.
acmc phen map -d <PHENOTYPE_DIRECTORY> -t <TARGET_CODING> -o <OUTPUT_FORMAT>
```
- `-t`, `--target-coding`: Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`).
- `-t`, `--target-coding`: (Optional) Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`).
- `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen).
- `-o`, `--output`: Output format(s) (`csv`, `omop`, or both), default is 'csv'.
- **Publish Phenotype Configuration**
......
......@@ -4,6 +4,9 @@ phenotype:
vocabulary_id: "ACMC_Example_1"
vocabulary_name: "ACMC example 1 phenotype"
vocabulary_reference: "https://git.soton.ac.uk/meldb/concepts-processing/-/tree/main/examples"
map:
- "read2"
- "read3"
concept_sets:
- name: "ABDO_PAIN"
file:
......
......@@ -4,6 +4,9 @@ phenotype:
vocabulary_id: "ACMC_Example_2"
vocabulary_name: "ACMC example 2 phenotype"
vocabulary_reference: "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example"
map:
- "read2"
- "read3"
concept_sets:
- name: "CVD_EVENTS"
file:
......
......@@ -4,6 +4,10 @@ phenotype:
vocabulary_id: "ACMC_Example_3"
vocabulary_name: "ACMC example 3 phenotype"
vocabulary_reference: "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example"
map:
- "read2"
- "read3"
- "snomed"
concept_sets:
- name: "CVD_EVENTS"
file:
......
......@@ -91,7 +91,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
assert "Phenotype validated successfully" in caplog.text
# map phenotype
for code_type in ["read2", "read3", "snomed"]:
for code_type in ["read2", "read3"]:
with caplog.at_level(logging.DEBUG):
monkeypatch.setattr(
sys,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment