diff --git a/README.md b/README.md index 5dfb6cce5e92e99ced354cc462488fb9bfd6934d..2175c98051a9edc3ab93c8947c6cab233a955fe4 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ Expected Output: Use the following `acmc` command to generate the phenotype in `read2` format: ```bash -acmc phen map -t read2 +acmc phen map ``` Expected Output: diff --git a/acmc/main.py b/acmc/main.py index 89df65dda2d1da11eef38260f2d760a6bbf9846c..b702518e204a56d92a09bd97d0102cde67a78200 100644 --- a/acmc/main.py +++ b/acmc/main.py @@ -3,7 +3,7 @@ import logging from pathlib import Path import acmc -from acmc import trud, omop, phen, logging_config as lc +from acmc import trud, omop, phen, parse, logging_config as lc # setup logging logger = lc.setup_logger() @@ -167,17 +167,8 @@ def main(): phen_map_parser.add_argument( "-t", "--target-coding", - required=True, - choices=["read2", "read3", "icd10", "snomed", "opcs4"], - help="Specify the target coding (read2, read3, icd10, snomed, opcs4)", - ) - phen_map_parser.add_argument( - "-o", - "--output", - choices=["csv", "omop"], - nargs="+", # allows one or more values - default=["csv"], # default to CSV if not specified - help="Specify output format(s): 'csv', 'omop', or both (default: csv)", + choices=parse.SUPPORTED_CODE_TYPES, + help=f"Specify the target coding {parse.SUPPORTED_CODE_TYPES}", ) phen_map_parser.set_defaults(func=phen_map) diff --git a/acmc/omop.py b/acmc/omop.py index 4b8cc4c49868d990467d8cb52642a5689bc2823f..dd5a461dc7be73d19d3666490c892aa63382234f 100644 --- a/acmc/omop.py +++ b/acmc/omop.py @@ -16,7 +16,7 @@ logger = logging_config.setup_logger() # constants VOCAB_PATH = Path("./vocab/omop") -OMOP_CDM_Version ="54" +OMOP_CDM_Version = "54" OMOP_DB_FILENAME = f"omop_{OMOP_CDM_Version}.sqlite" DB_PATH = VOCAB_PATH / OMOP_DB_FILENAME VERSION_FILE = "omop_version.yaml" @@ -26,7 +26,7 @@ EXPORT_FILE = "omop_export.db" vocabularies = { "source": "OHDSI Athena", "url": "https://athena.ohdsi.org/vocabulary/list", - "cdm_version": OMOP_CDM_Version, + "cdm_version": OMOP_CDM_Version, "version": "", "vocabularies": [ {"id": 1, "name": "SNOMED"}, # No license required diff --git a/acmc/parse.py b/acmc/parse.py index ca5b3e1dc847994cab714142d1bfc946eb15bc64..798a10ba743f3d5e939e9ebd7037eb43bf21aaba 100644 --- a/acmc/parse.py +++ b/acmc/parse.py @@ -8,6 +8,9 @@ from acmc import trud, logging_config as lc # setup logging logger = lc.setup_logger() +# Define allowed values +SUPPORTED_CODE_TYPES = {"read2", "read3", "icd10", "snomed", "opcs4", "atc"} + class CodesError: """A class used in InvalidCodesException to report an error if a code parser check fails""" diff --git a/acmc/phen.py b/acmc/phen.py index a92bdb295986dbcec2dad4d4c9b8c46cb5f50a18..a9131b59ba704088478d7e697401707edb472468 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -70,6 +70,15 @@ CONFIG_SCHEMA = { }, }, }, + "map": { + "type": "list", + "schema": { + "type": "string", + "allowed": list( + parse.SUPPORTED_CODE_TYPES + ), # Ensure only predefined values are allowed + }, + }, "concept_sets": { "type": "list", "required": True, @@ -260,6 +269,7 @@ def init(phen_dir, remote_url): "vocabulary_name": "", "vocabulary_reference": "", }, + "translate": [], "concept_sets": [], } } @@ -603,7 +613,6 @@ def write_vocab_version(phen_path): def map(phen_dir, target_code_type): logger.info(f"Processing phenotype: {phen_dir}") - logger.debug(f"Target coding format: {target_code_type}") # Validate configuration validate(phen_dir) @@ -611,13 +620,33 @@ def map(phen_dir, target_code_type): # initialise paths phen_path = Path(phen_dir) config_path = phen_path / CONFIG_FILE - codes_path = phen_path / CODES_DIR # load configuration with config_path.open("r") as file: config = yaml.safe_load(file) phenotype = config["phenotype"] + if len(phenotype["map"]) == 0: + raise ValueError(f"No map codes defined in the phenotype configuration") + + if target_code_type is not None and target_code_type not in phenotype["map"]: + raise ValueError( + f"Target code type {target_code_type} not in phenotype configuration map {phenotype['map']}" + ) + + if target_code_type is not None: + map_target_code_type(phen_path, phenotype, target_code_type) + else: + for t in phenotype["map"]: + map_target_code_type(phen_path, phenotype, t) + + logger.info(f"Phenotype processed successfully") + + +def map_target_code_type(phen_path, phenotype, target_code_type): + + logger.debug(f"Target coding format: {target_code_type}") + codes_path = phen_path / CODES_DIR # Create output dataframe out = pd.DataFrame([]) code_errors = [] @@ -717,7 +746,7 @@ def map(phen_dir, target_code_type): write_vocab_version(phen_path) - logger.info(f"Phenotype processed successfully") + logger.info(f"Phenotype processed target code type {target_code_type}") def publish(phen_dir): diff --git a/docs/usage.md b/docs/usage.md index 1527165ea12c7f7d696f8ba70c011c35eb2e92a5..57271cf0f12b54054159c184e56c0e00d38da5f4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -96,9 +96,8 @@ The `phen` command is used phenotype-related operations. acmc phen map -d <PHENOTYPE_DIRECTORY> -t <TARGET_CODING> -o <OUTPUT_FORMAT> ``` - - `-t`, `--target-coding`: Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`). + - `-t`, `--target-coding`: (Optional) Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`). - `-d`, `--phen-dir`: (Optional) Directory of phenotype configuration (the default is ./build/phen). - - `-o`, `--output`: Output format(s) (`csv`, `omop`, or both), default is 'csv'. - **Publish Phenotype Configuration** diff --git a/examples/config1.yaml b/examples/config1.yaml index 19fd9c686ff25da888c1f7ce39d122ebc6b479db..09d0e807b3ff433682ea69a0642bebdec87bdd1e 100644 --- a/examples/config1.yaml +++ b/examples/config1.yaml @@ -4,6 +4,9 @@ phenotype: vocabulary_id: "ACMC_Example_1" vocabulary_name: "ACMC example 1 phenotype" vocabulary_reference: "https://git.soton.ac.uk/meldb/concepts-processing/-/tree/main/examples" + map: + - "read2" + - "read3" concept_sets: - name: "ABDO_PAIN" file: diff --git a/examples/config2.yaml b/examples/config2.yaml index 33d6df4eb06d24fc4f7e2e6940b56bfec50ead37..4c6252efb6fa4c7eec6520ddfc963e40d32e9ccc 100644 --- a/examples/config2.yaml +++ b/examples/config2.yaml @@ -4,6 +4,9 @@ phenotype: vocabulary_id: "ACMC_Example_2" vocabulary_name: "ACMC example 2 phenotype" vocabulary_reference: "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example" + map: + - "read2" + - "read3" concept_sets: - name: "CVD_EVENTS" file: diff --git a/examples/config3.yaml b/examples/config3.yaml index 926ab602479bc7a83af4a39ff584cee14daccdce..764d7d8dd614132d4fed584d42b824a45694be9d 100644 --- a/examples/config3.yaml +++ b/examples/config3.yaml @@ -4,6 +4,10 @@ phenotype: vocabulary_id: "ACMC_Example_3" vocabulary_name: "ACMC example 3 phenotype" vocabulary_reference: "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example" + map: + - "read2" + - "read3" + - "snomed" concept_sets: - name: "CVD_EVENTS" file: diff --git a/tests/test_acmc.py b/tests/test_acmc.py index 6533847661e2c1b64a9194613fb5280bed5c1013..c4cb94efcc2630ff0285489525049d0fa41ab047 100644 --- a/tests/test_acmc.py +++ b/tests/test_acmc.py @@ -91,7 +91,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file): assert "Phenotype validated successfully" in caplog.text # map phenotype - for code_type in ["read2", "read3", "snomed"]: + for code_type in ["read2", "read3"]: with caplog.at_level(logging.DEBUG): monkeypatch.setattr( sys,