started the precommit hook work, but seems more complex as it requires some...

started the precommit hook work, but seems more complex as it requires some download from github etc with usernames and passwords #21

started the precommit hook work, but seems more complex as it requires some...
3a7d715c · mjbonifa · cae3acc7 · 3a7d715c · 3a7d715c · 3a7d715c
Commit 3a7d715c authored 5 months ago by mjbonifa
--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -44,6 +44,7 @@ COL_ACTIONS = [SPLIT_COL_ACTION, CODES_COL_ACTION, DIVIDE_COL_ACTION]
 CODE_FILE_TYPES = [".xlsx", ".xls", ".csv"]
 class PhenValidationException(Exception):
    """Custom exception class raised when validation errors in phenotype configuration file"""
@@ -308,7 +309,9 @@ def validate(phen_dir):
            # check code file type is supported
            if concept_code_file_path.suffix not in CODE_FILE_TYPES:
-                raise ValueError(f"Unsupported filetype {concept_code_file_path.suffix}, only support csv, xlsx, xls code file types")
+                raise ValueError(
+                    f"Unsupported filetype {concept_code_file_path.suffix}, only support csv, xlsx, xls code file types"
+                )
            # check columns specified are a supported medical coding type
            for column in item["file"]["columns"]:
@@ -321,9 +324,7 @@ def validate(phen_dir):
            if "actions" in item["file"]:
                for action in item["file"]["actions"]:
                    if action not in COL_ACTIONS:
-                        validation_errors.append(
+                        validation_errors.append(f"Action {action} is not supported")
-                            f"Action {action} is not supported"
-                        )
        else:
            validation_errors.append(
@@ -356,7 +357,9 @@ def read_table_file(path, excel_sheet=None):
    elif path.suffix == ".dta":
        df = pd.read_stata(path, dtype=str)
    else:
-        raise ValueError(f"Unsupported filetype {codes_file_path.suffix}, only support{CODE_FILE_TYPES} code file types")   
+        raise ValueError(
+            f"Unsupported filetype {codes_file_path.suffix}, only support{CODE_FILE_TYPES} code file types"
+        )
    return df
@@ -394,7 +397,10 @@ def preprocess_codes(df, concept_set, code_file_path, target_code_type=None):
    # TODO: Is there a better way of processing this action as it's distributed across
    # different parts of the programme.
-    if "actions" in concept_set["file"] and "divide_col" in concept_set["file"]["actions"]:
+    if (
+        "actions" in concept_set["file"]
+        and "divide_col" in concept_set["file"]["actions"]
+    ):
        divide_col_df = df[concept_set["file"]["actions"]["divide_col"]]
    else:
        divide_col_df = pd.DataFrame()
@@ -582,29 +588,23 @@ def map(phen_dir, target_code_type):
        # Map
        # if processing a source coding list with categorical data
-        if "actions" in concept_set["file"] and "divide_col" in concept_set["file"]["actions"] and len(df) > 0:
+        if (
+            "actions" in concept_set["file"]
+            and "divide_col" in concept_set["file"]["actions"]
+            and len(df) > 0
+        ):
            divide_col = concept_set["file"]["actions"]["divide_col"]
            logger.debug(f"Action: Dividing Table by {divide_col}")
            logger.debug(f"column into: {df[divide_col].unique()}")
            df_grp = df.groupby(divide_col)
            for cat, grp in df_grp:
                if cat == concept_set["file"]["category"]:
-                    grp = grp.drop(
+                    grp = grp.drop(columns=[divide_col])  # delete categorical column
-                        columns=[divide_col]
-                    )  # delete categorical column
                    out = map_file(
-                        grp,
+                        grp, target_code_type, out, concept_name=concept_set["name"]
-                        target_code_type,
-                        out,
-                        concept_name=concept_set['name']
                    )
        else:
-            out = map_file(
+            out = map_file(df, target_code_type, out, concept_name=concept_set["name"])
-                df,
-                target_code_type,
-                out,
-                concept_name=concept_set['name']
-            )                             
    if len(code_errors) > 0:
        logger.error(f"The map processing has {len(code_errors)} errors")
@@ -847,9 +847,7 @@ def diff(phen_dir, phen_old_dir):
    new_config = new_phen_path / CONFIG_FILE
    with new_config.open("r") as file:
        new_config = yaml.safe_load(file)
-    report.write(
+    report.write(f"\n\n# Report for version {new_config['phenotype']['version']}\n\n")
-        f"\n\n# Report for version {new_config['phenotype']['version']}\n\n"
-    )
    report.write(f"- Removed outputs: {list(removed_outputs)}\n")
    report.write(f"- Added outputs: {list(added_outputs)}\n")
    report.write(f"- Common outputs: {list(common_outputs)}\n")

--- a/pyproject.toml
+++ b/pyproject.toml
--- a/tests/test_acmc.py
+++ b/tests/test_acmc.py