Compare revisions

mjbonifa · mjbonifa · e396067b · e396067b · e396067b · e396067b
--- a/acmc/main.py
+++ b/acmc/main.py
@@ -78,7 +78,13 @@ def _phen_copy(args: argparse.Namespace):

 def _phen_diff(args: argparse.Namespace):
    """Handle the `phen diff` command."""
-    phen.diff(args.phen_dir, args.version, args.old_phen_dir, args.old_version, args.not_check_config)
+    phen.diff(
+        args.phen_dir,
+        args.version,
+        args.old_phen_dir,
+        args.old_version,
+        args.not_check_config,
+    )


 def main():
@@ -219,16 +225,16 @@ def main():
    )
    phen_map_parser.add_argument(
        "--not-translate",
-        action='store_true',
+        action="store_true",
        default=False,
        help="(Optional) Prevent any phenotype translation using NHS TRUD vocabularies.",
-    ) 
+    )
    phen_map_parser.add_argument(
        "--no-metadata",
-        action='store_true',
+        action="store_true",
        default=False,
        help="(Optional) Prevent copying of metadata columns to output.",
-    ) 
+    )
    phen_map_parser.set_defaults(func=_phen_map)

    # phen export
@@ -337,10 +343,10 @@ def main():
    )
    phen_diff_parser.add_argument(
        "--not-check-config",
-        action='store_true',
+        action="store_true",
        default=False,
        help="(Optional) Prevent loading and comparing config file, in the case where one does not exist",
-    )   
+    )
    phen_diff_parser.set_defaults(func=_phen_diff)

    # Parse arguments

--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -143,10 +143,9 @@ CONFIG_SCHEMA = {
                                        "schema": {
                                            "divide_col": {"type": "string"},
                                            "split_col": {"type": "string"},
-                                            "codes_col": {"type": "string"}
+                                            "codes_col": {"type": "string"},
                                        },
                                    },
-                                    
                                },
                            },
                        },
@@ -663,7 +662,10 @@ def _preprocess_source_concepts(

 # Translate Df with multiple codes into single code type Series
 def translate_codes(
-    source_df: pd.DataFrame, target_code_type: str, concept_name: str, not_translate:bool
+    source_df: pd.DataFrame,
+    target_code_type: str,
+    concept_name: str,
+    not_translate: bool,
 ) -> pd.DataFrame:
    """Translates each source code type the source coding list into a target type and returns all conversions as a concept set"""

@@ -735,7 +737,8 @@ def _write_code_errors(code_errors: list, code_errors_path: Path):
                "SOURCE": err.codes_file,
                "CAUSE": err.message,
            }
-            for err in code_errors if err.mask is not None
+            for err in code_errors
+            if err.mask is not None
        ]
    )

@@ -783,7 +786,7 @@ def write_vocab_version(phen_path: Path):
        )


-def map(phen_dir: str, target_code_type: str, not_translate:bool, no_metadata:bool):
+def map(phen_dir: str, target_code_type: str, not_translate: bool, no_metadata: bool):
    _logger.info(f"Processing phenotype: {phen_dir}")

    # Validate configuration
@@ -807,7 +810,9 @@ def map(phen_dir: str, target_code_type: str, not_translate:bool, no_metadata:bo
        )

    if target_code_type is not None:
-        _map_target_code_type(phen_path, phenotype, target_code_type, not_translate, no_metadata)
+        _map_target_code_type(
+            phen_path, phenotype, target_code_type, not_translate, no_metadata
+        )
    else:
        for t in phenotype["map"]:
            _map_target_code_type(phen_path, phenotype, t, not_translate, no_metadata)
@@ -815,7 +820,13 @@ def map(phen_dir: str, target_code_type: str, not_translate:bool, no_metadata:bo
    _logger.info(f"Phenotype processed successfully")


-def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str, not_translate:bool, no_metadata:bool):
+def _map_target_code_type(
+    phen_path: Path,
+    phenotype: dict,
+    target_code_type: str,
+    not_translate: bool,
+    no_metadata: bool,
+):
    _logger.debug(f"Target coding format: {target_code_type}")
    concepts_path = phen_path / CONCEPTS_DIR
    # Create output dataframe
@@ -1000,10 +1011,12 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st

 # Add metadata dict to each row of Df codes
 def add_metadata(
-    codes: pd.DataFrame, metadata: dict, no_metadata:bool,
+    codes: pd.DataFrame,
+    metadata: dict,
+    no_metadata: bool,
 ) -> pd.DataFrame:
    """Add concept set metadata, stored as a dictionary, to each concept row"""
-    
+
    if not no_metadata:
        for meta_name, meta_value in metadata.items():
            codes[meta_name] = meta_value
@@ -1012,7 +1025,7 @@ def add_metadata(
            )

    return codes
-    
+

 def _generate_version_tag(
    repo: git.Repo, increment: str = DEFAULT_VERSION_INC, use_v_prefix: bool = False
@@ -1380,7 +1393,7 @@ def diff_phen(
    old_phen_path: Path,
    old_version: str,
    report_path: Path,
-    not_check_config:bool,
+    not_check_config: bool,
 ):
    """Compare the differences between two versions of a phenotype"""

@@ -1413,7 +1426,6 @@ def diff_phen(
        report += f"  - {new_version}\n"
        report += f"  - {str(new_phen_path.resolve())}\n"

-        
        # Convert list of dicts into a dict: {name: file}
        report += diff_config(old_config, new_config)

@@ -1432,7 +1444,13 @@ def diff_phen(
    _logger.info(f"Phenotypes diff'd successfully")


-def diff(phen_dir: str, version: str, old_phen_dir: str, old_version: str, not_check_config:bool):
+def diff(
+    phen_dir: str,
+    version: str,
+    old_phen_dir: str,
+    old_version: str,
+    not_check_config: bool,
+):
    # make tmp directory .acmc
    timestamp = time.strftime("%Y%m%d_%H%M%S")
    temp_dir = Path(f".acmc/diff_{timestamp}")
@@ -1449,9 +1467,9 @@ def diff(phen_dir: str, version: str, old_phen_dir: str, old_version: str, not_c
            f"Old phenotype directory does not exist: {str(old_phen_path.resolve())}"
        )

-    t_path = old_phen_path / "config.yml"
-    with t_path.open("r") as file:
-        c = yaml.safe_load(file)
+    #    t_path = old_phen_path / "config.yml"
+    #    with t_path.open("r") as file:
+    #        c = yaml.safe_load(file)

    try:
        # Create the directory
@@ -1493,10 +1511,11 @@ def diff(phen_dir: str, version: str, old_phen_dir: str, old_version: str, not_c
        report_filename = f"{version}_{old_version}_diff.md"
        report_path = changed_phen_path / report_filename
        # diff old with new
-        diff_phen(changed_path, version, old_path, old_version, report_path, not_check_config)
+        diff_phen(
+            changed_path, version, old_path, old_version, report_path, not_check_config
+        )

    finally:
        # clean up tmp directory
        if temp_dir.exists():
            shutil.rmtree(temp_dir)
-            print(f"Temporary directory removed: {temp_dir}")
--- a/docs/api/acmc.html
+++ b/docs/api/acmc.html
--- a/docs/api/acmc/logging_config.html
+++ b/docs/api/acmc/logging_config.html
--- a/docs/api/acmc/main.html
+++ b/docs/api/acmc/main.html
--- a/docs/api/acmc/omop.html
+++ b/docs/api/acmc/omop.html
--- a/docs/api/acmc/parse.html
+++ b/docs/api/acmc/parse.html
--- a/docs/api/acmc/phen.html
+++ b/docs/api/acmc/phen.html
--- a/docs/api/acmc/trud.html
+++ b/docs/api/acmc/trud.html
--- a/docs/api/acmc/util.html
+++ b/docs/api/acmc/util.html
--- a/docs/api/search.js
+++ b/docs/api/search.js
--- a/tests/test_acmc.py
+++ b/tests/test_acmc.py
@@ -58,6 +58,7 @@ def test_phen_init_local_specified(tmp_dir, monkeypatch, caplog):
 def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
    print(f"Temporary directory: {tmp_dir}")  # Prints path for debugging

+    # init phenotype in temp directory
    with caplog.at_level(logging.DEBUG):
        phen_path = tmp_dir / "phen"
        phen_path = phen_path.resolve()
@@ -69,9 +70,8 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
        main.main()
    assert "Phenotype initialised successfully" in caplog.text

+    # validate phenotype and copy examples across
    with caplog.at_level(logging.DEBUG):
-        # validate phenotype
-        # copy examples across
        shutil.rmtree(phen_path / phen.CONCEPTS_DIR)
        ex_path = Path("./examples").resolve()
        for item in ex_path.iterdir():
@@ -91,7 +91,7 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
        main.main()
    assert "Phenotype validated successfully" in caplog.text

-    # map phenotype
+    # map phenotypes at read2 and read3
    for code_type in ["read2", "read3"]:
        with caplog.at_level(logging.DEBUG):
            monkeypatch.setattr(
@@ -130,7 +130,6 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):

    # diff phenotype
    with caplog.at_level(logging.DEBUG):
-        old_path = tmp_dir / "0.0.1"
        monkeypatch.setattr(
            sys,
            "argv",
@@ -140,6 +139,8 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
                "diff",
                "-d",
                str(phen_path.resolve()),
+                "-od",
+                str(phen_path.resolve()),
                "-ov",
                "0.0.1",
            ],
No results found