Compare revisions

Jakub Dylag · Jakub Dylag · Jakub Dylag · Jakub Dylag · Jakub Dylag · Jakub Dylag
--- a/acmc/main.py
+++ b/acmc/main.py
@@ -58,7 +58,7 @@ def _phen_validate(args: argparse.Namespace):

 def _phen_map(args: argparse.Namespace):
    """Handle the `phen map` command."""
-    phen.map(args.phen_dir, args.target_coding)
+    phen.map(args.phen_dir, args.target_coding, args.not_translate, args.no_metadata)


 def _phen_export(args: argparse.Namespace):
@@ -78,7 +78,7 @@ def _phen_copy(args: argparse.Namespace):

 def _phen_diff(args: argparse.Namespace):
    """Handle the `phen diff` command."""
-    phen.diff(args.phen_dir, args.version, args.old_phen_dir, args.old_version)
+    phen.diff(args.phen_dir, args.version, args.old_phen_dir, args.old_version, args.not_check_config)


 def main():
@@ -217,6 +217,18 @@ def main():
        choices=parse.SUPPORTED_CODE_TYPES,
        help=f"Specify the target coding {parse.SUPPORTED_CODE_TYPES}",
    )
+    phen_map_parser.add_argument(
+        "--not-translate",
+        action='store_true',
+        default=False,
+        help="(Optional) Prevent any phenotype translation using NHS TRUD vocabularies.",
+    ) 
+    phen_map_parser.add_argument(
+        "--no-metadata",
+        action='store_true',
+        default=False,
+        help="(Optional) Prevent copying of metadata columns to output.",
+    ) 
    phen_map_parser.set_defaults(func=_phen_map)

    # phen export
@@ -323,6 +335,12 @@ def main():
        required=True,
        help="Old phenotype version to compare with the changed version",
    )
+    phen_diff_parser.add_argument(
+        "--not-check-config",
+        action='store_true',
+        default=False,
+        help="(Optional) Prevent loading and comparing config file, in the case where one does not exist",
+    )   
    phen_diff_parser.set_defaults(func=_phen_diff)

    # Parse arguments

--- a/acmc/parse.py
+++ b/acmc/parse.py
@@ -192,7 +192,7 @@ class Read2(Proto):

 class Read3(Proto):
    def __init__(self):
-        super().__init__("Read3", trud.PROCESSED_PATH / "read3.parquet")
+        super().__init__("read3", trud.PROCESSED_PATH / "read3.parquet")

        self.checks = [
            (
@@ -231,7 +231,7 @@ class Read3(Proto):
                            f"QA Alphanumeric Dot",
                            codes=codes,
                            codes_file=codes_file,
-                            check_regex=codes.str.match(r"^[a-zA-Z0-9.]+$"),
+                            mask=None,
                            code_type=self.name,
                        )
                    )
@@ -246,7 +246,7 @@ class Read3(Proto):
                            f"QA In Database",
                            codes=codes,
                            codes_file=codes_file,
-                            check_regex=self.in_database(codes, self.db, self.name),
+                            mask=None,
                            code_type=self.name,
                        )
                    )

--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -127,21 +127,30 @@ CONFIG_SCHEMA = {
                    "type": "dict",
                    "schema": {
                        "name": {"type": "string", "required": True},
-                        "file": {
-                            "type": "dict",
-                            "required": False,
+                        "files": {
+                            "type": "list",
+                            "required": True,
                            "schema": {
-                                "path": {"type": "string", "required": True},
-                                "columns": {"type": "dict", "required": True},
-                                "category": {
-                                    "type": "string"
-                                },  # Optional but must be string if present
-                                "actions": {
-                                    "type": "dict",
-                                    "schema": {"divide_col": {"type": "string"}},
+                                "type": "dict",
+                                "schema": {
+                                    "path": {"type": "string", "required": True},
+                                    "columns": {"type": "dict", "required": True},
+                                    "category": {
+                                        "type": "string"
+                                    },  # Optional but must be string if present
+                                    "actions": {
+                                        "type": "dict",
+                                        "schema": {
+                                            "divide_col": {"type": "string"},
+                                            "split_col": {"type": "string"},
+                                            "codes_col": {"type": "string"}
+                                        },
+                                    },
+                                    
                                },
                            },
                        },
+                        "metadata": {"type": "dict", "required": False},
                    },
                },
            },
@@ -518,38 +527,39 @@ def validate(phen_dir: str):
            concept_set_names.append(item["name"])

    # check codes definition
-    for item in phenotype["concept_sets"]:
-        # check concepte code file exists
-        concept_code_file_path = concepts_path / item["file"]["path"]
-        if not concept_code_file_path.exists():
-            validation_errors.append(
-                f"Coding file {str(concept_code_file_path.resolve())} does not exist"
-            )
-
-        # check concepte code file is not empty
-        if concept_code_file_path.stat().st_size == 0:
-            validation_errors.append(
-                f"Coding file {str(concept_code_file_path.resolve())} is an empty file"
-            )
-
-        # check code file type is supported
-        if concept_code_file_path.suffix not in CODE_FILE_TYPES:
-            raise ValueError(
-                f"Unsupported filetype {concept_code_file_path.suffix}, only support csv, xlsx, xls code file types"
-            )
+    for files in phenotype["concept_sets"]:
+        for item in files["files"]:
+            # check concepte code file exists
+            concept_code_file_path = concepts_path / item["path"]
+            if not concept_code_file_path.exists():
+                validation_errors.append(
+                    f"Coding file {str(concept_code_file_path.resolve())} does not exist"
+                )

-        # check columns specified are a supported medical coding type
-        for column in item["file"]["columns"]:
-            if column not in code_types:
+            # check concepte code file is not empty
+            if concept_code_file_path.stat().st_size == 0:
                validation_errors.append(
-                    f"Column type {column} for file {concept_code_file_path} is not supported"
+                    f"Coding file {str(concept_code_file_path.resolve())} is an empty file"
                )

-        # check the actions are supported
-        if "actions" in item["file"]:
-            for action in item["file"]["actions"]:
-                if action not in COL_ACTIONS:
-                    validation_errors.append(f"Action {action} is not supported")
+            # check code file type is supported
+            if concept_code_file_path.suffix not in CODE_FILE_TYPES:
+                raise ValueError(
+                    f"Unsupported filetype {concept_code_file_path.suffix}, only support csv, xlsx, xls code file types"
+                )
+
+            # check columns specified are a supported medical coding type
+            for column in item["columns"]:
+                if column not in code_types:
+                    validation_errors.append(
+                        f"Column type {column} for file {concept_code_file_path} is not supported"
+                    )
+
+            # check the actions are supported
+            if "actions" in item:
+                for action in item["actions"]:
+                    if action not in COL_ACTIONS:
+                        validation_errors.append(f"Action {action} is not supported")

    if len(validation_errors) > 0:
        _logger.error(validation_errors)
@@ -588,12 +598,12 @@ def _process_actions(df: pd.DataFrame, concept_set: dict) -> pd.DataFrame:
    # Perform Structural Changes to file before preprocessing
    _logger.debug("Processing file structural actions")
    if (
-        "actions" in concept_set["file"]
-        and "split_col" in concept_set["file"]["actions"]
-        and "codes_col" in concept_set["file"]["actions"]
+        "actions" in concept_set
+        and "split_col" in concept_set["actions"]
+        and "codes_col" in concept_set["actions"]
    ):
-        split_col = concept_set["file"]["actions"]["split_col"]
-        codes_col = concept_set["file"]["actions"]["codes_col"]
+        split_col = concept_set["actions"]["split_col"]
+        codes_col = concept_set["actions"]["codes_col"]
        _logger.debug(
            "Action: Splitting",
            split_col,
@@ -621,12 +631,12 @@ def _preprocess_source_concepts(

    # Preprocess codes
    code_types = parse.CodeTypeParser().code_types
-    for code_type in concept_set["file"]["columns"]:
+    for code_type in concept_set["columns"]:
        parser = code_types[code_type]
        _logger.info(f"Processing {code_type} codes for {code_file_path}")

        # get codes by column name
-        source_col_name = concept_set["file"]["columns"][code_type]
+        source_col_name = concept_set["columns"][code_type]
        codes = df[source_col_name].dropna()
        codes = codes.astype(str)  # convert to string
        codes = codes.str.strip()  # remove excess spaces
@@ -653,7 +663,7 @@ def _preprocess_source_concepts(

 # Translate Df with multiple codes into single code type Series
 def translate_codes(
-    source_df: pd.DataFrame, target_code_type: str, concept_name: str
+    source_df: pd.DataFrame, target_code_type: str, concept_name: str, not_translate:bool
 ) -> pd.DataFrame:
    """Translates each source code type the source coding list into a target type and returns all conversions as a concept set"""

@@ -678,7 +688,7 @@ def translate_codes(
            _logger.debug(
                f"Target code type {target_code_type} is the same as source code type {len(source_df)}, copying codes rather than translating"
            )
-        else:
+        elif not not_translate:
            # get the translation filename using source to target code types
            filename = f"{source_code_type}_to_{target_code_type}.parquet"
            map_path = trud.PROCESSED_PATH / filename
@@ -725,7 +735,7 @@ def _write_code_errors(code_errors: list, code_errors_path: Path):
                "SOURCE": err.codes_file,
                "CAUSE": err.message,
            }
-            for err in code_errors
+            for err in code_errors if err.mask is not None
        ]
    )

@@ -773,7 +783,7 @@ def write_vocab_version(phen_path: Path):
        )


-def map(phen_dir: str, target_code_type: str):
+def map(phen_dir: str, target_code_type: str, not_translate:bool, no_metadata:bool):
    _logger.info(f"Processing phenotype: {phen_dir}")

    # Validate configuration
@@ -797,15 +807,15 @@ def map(phen_dir: str, target_code_type: str):
        )

    if target_code_type is not None:
-        _map_target_code_type(phen_path, phenotype, target_code_type)
+        _map_target_code_type(phen_path, phenotype, target_code_type, not_translate, no_metadata)
    else:
        for t in phenotype["map"]:
-            _map_target_code_type(phen_path, phenotype, t)
+            _map_target_code_type(phen_path, phenotype, t, not_translate, no_metadata)

    _logger.info(f"Phenotype processed successfully")


-def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str):
+def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str, not_translate:bool, no_metadata:bool):
    _logger.debug(f"Target coding format: {target_code_type}")
    concepts_path = phen_path / CONCEPTS_DIR
    # Create output dataframe
@@ -813,67 +823,87 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
    code_errors = []

    # Process each folder in codes section
-    for concept_set in phenotype["concept_sets"]:
-        _logger.debug(f"--- {concept_set['file']} ---")
-
-        # Load code file
-        codes_file_path = Path(concepts_path / concept_set["file"]["path"])
-        df = _read_table_file(codes_file_path)
-
-        # process structural actions
-        df = _process_actions(df, concept_set)
-
-        # preprocessing and validate of source concepts
-        _logger.debug("Processing and validating source concept codes")
-        df, errors = _preprocess_source_concepts(
-            df,
-            concept_set,
-            codes_file_path,
-        )
-
-        # create df with just the source code columns
-        source_column_names = list(concept_set["file"]["columns"].keys())
-        source_df = df[source_column_names]
+    for files in phenotype["concept_sets"]:
+        concept_set_name = files["name"]
+        if "metadata" in files:
+            concept_set_metadata = files["metadata"]
+        else:
+            concept_set_metadata = {}
+        for concept_set in files["files"]:
+            _logger.debug(f"--- {concept_set} ---")
+
+            # Load code file
+            codes_file_path = Path(concepts_path / concept_set["path"])
+            df = _read_table_file(codes_file_path)
+
+            # process structural actions
+            df = _process_actions(df, concept_set)
+
+            # preprocessing and validate of source concepts
+            _logger.debug("Processing and validating source concept codes")
+            df, errors = _preprocess_source_concepts(
+                df,
+                concept_set,
+                codes_file_path,
+            )

-        _logger.debug(source_df.columns)
-        _logger.debug(source_df.head())
+            # create df with just the source code columns
+            source_column_names = list(concept_set["columns"].keys())
+            source_df = df[source_column_names]

-        _logger.debug(
-            f"Length of errors from _preprocess_source_concepts {len(errors)}"
-        )
-        if len(errors) > 0:
-            code_errors.extend(errors)
-        _logger.debug(f" Length of code_errors {len(code_errors)}")
+            _logger.debug(source_df.columns)
+            _logger.debug(source_df.head())

-        # Map source concepts codes to target codes
-        # if processing a source coding list with categorical data
-        if (
-            "actions" in concept_set["file"]
-            and "divide_col" in concept_set["file"]["actions"]
-            and len(df) > 0
-        ):
-            divide_col = concept_set["file"]["actions"]["divide_col"]
-            _logger.debug(f"Action: Dividing Table by {divide_col}")
-            _logger.debug(f"column into: {df[divide_col].unique()}")
-            df_grp = df.groupby(divide_col)
-            for cat, grp in df_grp:
-                if cat == concept_set["file"]["category"]:
-                    grp = grp.drop(columns=[divide_col])  # delete categorical column
-                    source_df = grp[source_column_names]
-                    trans_out = translate_codes(
-                        source_df,
-                        target_code_type=target_code_type,
-                        concept_name=concept_set["name"],
-                    )
-                    out = pd.concat([out, trans_out])
-        else:
-            source_df = df[source_column_names]
-            trans_out = translate_codes(
-                source_df,
-                target_code_type=target_code_type,
-                concept_name=concept_set["name"],
+            _logger.debug(
+                f"Length of errors from _preprocess_source_concepts {len(errors)}"
            )
-            out = pd.concat([out, trans_out])
+            if len(errors) > 0:
+                code_errors.extend(errors)
+            _logger.debug(f" Length of code_errors {len(code_errors)}")
+
+            # Map source concepts codes to target codes
+            # if processing a source coding list with categorical data
+            if (
+                "actions" in concept_set
+                and "divide_col" in concept_set["actions"]
+                and len(df) > 0
+            ):
+                divide_col = concept_set["actions"]["divide_col"]
+                _logger.debug(f"Action: Dividing Table by {divide_col}")
+                _logger.debug(f"column into: {df[divide_col].unique()}")
+                df_grp = df.groupby(divide_col)
+                for cat, grp in df_grp:
+                    if cat == concept_set["category"]:
+                        grp = grp.drop(
+                            columns=[divide_col]
+                        )  # delete categorical column
+                        source_df = grp[source_column_names]
+                        trans_out = translate_codes(
+                            source_df,
+                            target_code_type=target_code_type,
+                            concept_name=concept_set_name,
+                            not_translate=not_translate,
+                        )
+                        trans_out = add_metadata(
+                            codes=trans_out,
+                            metadata=concept_set_metadata,
+                            no_metadata=no_metadata,
+                        )
+                        out = pd.concat([out, trans_out])
+            else:
+                source_df = df[source_column_names]
+                trans_out = translate_codes(
+                    source_df,
+                    target_code_type=target_code_type,
+                    concept_name=concept_set_name,
+                    not_translate=not_translate,
+                )
+                trans_out = add_metadata(
+                    codes=trans_out,
+                    metadata=concept_set_metadata,
+                    no_metadata=no_metadata,
+                )
+                out = pd.concat([out, trans_out])

    if len(code_errors) > 0:
        _logger.error(f"The map processing has {len(code_errors)} errors")
@@ -894,48 +924,51 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
    out = out.drop_duplicates(subset=["CONCEPT_SET", "CONCEPT"])
    out = out.sort_values(by=["CONCEPT_SET", "CONCEPT"])

-    out_count = len(out.index)
+    # out_count = len(out.index)
    # added metadata
    # Loop over each source_concept_type and perform the left join on all columns apart from source code columns
-    result_list = []
-    source_column_names = list(concept_set["file"]["columns"].keys())
-    for source_concept_type in source_column_names:
-        # Filter output based on the current source_concept_type
-        out_filtered_df = out[out["SOURCE_CONCEPT_TYPE"] == source_concept_type]
-        filtered_count = len(out_filtered_df.index)
-
-        # Remove the source type columns except the current type will leave the metadata and the join
-        remove_types = [
-            type for type in source_column_names if type != source_concept_type
-        ]
-        metadata_df = df.drop(columns=remove_types)
-        metadata_df = metadata_df.rename(
-            columns={source_concept_type: "SOURCE_CONCEPT"}
-        )
-        metadata_df_count = len(metadata_df.index)
-
-        # Perform the left join with df2 on SOURCE_CONCEPT to add the metadata
-        result = pd.merge(out_filtered_df, metadata_df, how="left", on="SOURCE_CONCEPT")
-        result_count = len(result.index)
-
-        _logger.debug(
-            f"Adding metadata for {source_concept_type}: out_count {out_count}, filtered_count {filtered_count}, metadata_df_count {metadata_df_count}, result_count {result_count}"
-        )
-
-        # Append the result to the result_list
-        result_list.append(result)
+    # result_list = []
+    # for files in phenotype["concept_sets"]:
+    #     concept_set_name = files["name"]
+    #     for concept_set in files["files"]:
+    #         source_column_names = list(concept_set["columns"].keys())
+    #         for source_concept_type in source_column_names:
+    #             # Filter output based on the current source_concept_type
+    #             out_filtered_df = out[out["SOURCE_CONCEPT_TYPE"] == source_concept_type]
+    #             filtered_count = len(out_filtered_df.index)
+
+    #             # Remove the source type columns except the current type will leave the metadata and the join
+    #             remove_types = [
+    #                 type for type in source_column_names if type != source_concept_type
+    #             ]
+    #             metadata_df = df.drop(columns=remove_types)
+    #             metadata_df = metadata_df.rename(
+    #                 columns={source_concept_type: "SOURCE_CONCEPT"}
+    #             )
+    #             metadata_df_count = len(metadata_df.index)
+
+    # Perform the left join with df2 on SOURCE_CONCEPT to add the metadata
+    # result = pd.merge(out_filtered_df, metadata_df, how="left", on="SOURCE_CONCEPT")
+    # result_count = len(result.index)
+
+    #             _logger.debug(
+    #                 f"Adding metadata for {source_concept_type}: out_count {out_count}, filtered_count {filtered_count}, metadata_df_count {metadata_df_count}, result_count {result_count}"
+    #             )
+
+    #             # Append the result to the result_list
+    #             result_list.append(result)

    # Concatenate all the results into a single DataFrame
-    final_out = pd.concat(result_list, ignore_index=True)
-    final_out = final_out.drop_duplicates(subset=["CONCEPT_SET", "CONCEPT"])
-    _logger.debug(
-        f"Check metadata processing counts: before {len(out.index)} : after {len(final_out.index)}"
-    )
+    # final_out = pd.concat(result_list, ignore_index=True)
+    # final_out = final_out.drop_duplicates(subset=["CONCEPT_SET", "CONCEPT"])
+    # _logger.debug(
+    #     f"Check metadata processing counts: before {len(out.index)} : after {len(final_out.index)}"
+    # )

    # Save output to map directory
    output_filename = target_code_type + ".csv"
    map_path = phen_path / MAP_DIR / output_filename
-    final_out.to_csv(map_path, index=False)
+    out.to_csv(map_path, index=False)
    _logger.info(f"Saved mapped concepts to {str(map_path.resolve())}")

    # save concept sets as separate files
@@ -950,7 +983,7 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
        concept_set_path.mkdir(parents=True, exist_ok=True)

    # write each concept as a separate file
-    for name, concept in final_out.groupby("CONCEPT_SET"):
+    for name, concept in out.groupby("CONCEPT_SET"):
        concept = concept.sort_values(by="CONCEPT")  # sort rows
        concept = concept.dropna(how="all", axis=1)  # remove empty cols
        concept = concept.reindex(
@@ -965,6 +998,22 @@ def _map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: st
    _logger.info(f"Phenotype processed target code type {target_code_type}")


+# Add metadata dict to each row of Df codes
+def add_metadata(
+    codes: pd.DataFrame, metadata: dict, no_metadata:bool,
+) -> pd.DataFrame:
+    """Add concept set metadata, stored as a dictionary, to each concept row"""
+    
+    if not no_metadata:
+        for meta_name, meta_value in metadata.items():
+            codes[meta_name] = meta_value
+            _logger.debug(
+                f"Adding metadata for concept set: metadata name {meta_name}, metadata value {meta_value}"
+            )
+
+    return codes
+    
+
 def _generate_version_tag(
    repo: git.Repo, increment: str = DEFAULT_VERSION_INC, use_v_prefix: bool = False
 ) -> str:
@@ -1165,7 +1214,7 @@ def copy(phen_dir: str, target_dir: str, version: str):
 def extract_concepts(config_data: dict) -> Tuple[dict, Set[str]]:
    """Extracts concepts as {name: file_path} dictionary and a name set."""
    concepts_dict = {
-        item["name"]: item["file"]["path"]
+        item["name"]: [file["path"] for file in item["files"]]
        for item in config_data["phenotype"]["concept_sets"]
    }
    name_set = set(concepts_dict.keys())
@@ -1190,7 +1239,7 @@ def diff_config(old_config: dict, new_config: dict) -> str:
    old_concepts, old_names = extract_concepts(old_config)
    new_concepts, new_names = extract_concepts(new_config)

-    # Check added and removed names
+    # Check added and removed concept set names
    added_names = new_names - old_names  # Names that appear in new but not in old
    removed_names = old_names - new_names  # Names that were in old but not in new

@@ -1331,37 +1380,42 @@ def diff_phen(
    old_phen_path: Path,
    old_version: str,
    report_path: Path,
+    not_check_config:bool,
 ):
    """Compare the differences between two versions of a phenotype"""

-    # validate phenotypes
-    _logger.debug(f"Validating for diff old path: {str(old_phen_path.resolve())}")
-    validate(str(old_phen_path.resolve()))
-    _logger.debug(f"Validating for diff new path: {str(new_phen_path.resolve())}")
-    validate(str(new_phen_path.resolve()))
-
-    # get old and new config
-    old_config_path = old_phen_path / CONFIG_FILE
-    with old_config_path.open("r") as file:
-        old_config = yaml.safe_load(file)
-    new_config_path = new_phen_path / CONFIG_FILE
-    with new_config_path.open("r") as file:
-        new_config = yaml.safe_load(file)
-
    # write report heading
    report = f"# Phenotype Comparison Report\n"
-    report += f"## Original phenotype\n"
-    report += f"  - {old_config['phenotype']['omop']['vocabulary_id']}\n"
-    report += f"  - {old_version}\n"
-    report += f"  - {str(old_phen_path.resolve())}\n"
-    report += f"## Changed phenotype:\n"
-    report += f"  - {new_config['phenotype']['omop']['vocabulary_id']}\n"
-    report += f"  - {new_version}\n"
-    report += f"  - {str(new_phen_path.resolve())}\n"

    # Step 1: check differences configuration files
-    # Convert list of dicts into a dict: {name: file}
-    report += diff_config(old_config, new_config)
+    if not not_check_config:
+        # validate phenotypes
+        _logger.debug(f"Validating for diff old path: {str(old_phen_path.resolve())}")
+        validate(str(old_phen_path.resolve()))
+        _logger.debug(f"Validating for diff new path: {str(new_phen_path.resolve())}")
+        validate(str(new_phen_path.resolve()))
+
+        # get old and new config
+        old_config_path = old_phen_path / CONFIG_FILE
+        with old_config_path.open("r") as file:
+            old_config = yaml.safe_load(file)
+        new_config_path = new_phen_path / CONFIG_FILE
+        with new_config_path.open("r") as file:
+            new_config = yaml.safe_load(file)
+
+        # write report
+        report += f"## Original phenotype\n"
+        report += f"  - {old_config['phenotype']['omop']['vocabulary_id']}\n"
+        report += f"  - {old_version}\n"
+        report += f"  - {str(old_phen_path.resolve())}\n"
+        report += f"## Changed phenotype:\n"
+        report += f"  - {new_config['phenotype']['omop']['vocabulary_id']}\n"
+        report += f"  - {new_version}\n"
+        report += f"  - {str(new_phen_path.resolve())}\n"
+
+        
+        # Convert list of dicts into a dict: {name: file}
+        report += diff_config(old_config, new_config)

    # Step 2: check differences between map files
    # List files from output directories
@@ -1378,7 +1432,7 @@ def diff_phen(
    _logger.info(f"Phenotypes diff'd successfully")


-def diff(phen_dir: str, version: str, old_phen_dir: str, old_version: str):
+def diff(phen_dir: str, version: str, old_phen_dir: str, old_version: str, not_check_config:bool):
    # make tmp directory .acmc
    timestamp = time.strftime("%Y%m%d_%H%M%S")
    temp_dir = Path(f".acmc/diff_{timestamp}")
@@ -1439,7 +1493,7 @@ def diff(phen_dir: str, version: str, old_phen_dir: str, old_version: str):
        report_filename = f"{version}_{old_version}_diff.md"
        report_path = changed_phen_path / report_filename
        # diff old with new
-        diff_phen(changed_path, version, old_path, old_version, report_path)
+        diff_phen(changed_path, version, old_path, old_version, report_path, not_check_config)

    finally:
        # clean up tmp directory

--- a/docs/api/acmc/parse.html
+++ b/docs/api/acmc/parse.html
@@ -760,7 +760,7 @@
                <section id="SUPPORTED_CODE_TYPES">
                    <div class="attr variable">
            <span class="name">SUPPORTED_CODE_TYPES</span>        =
-<span class="default_value">{&#39;opcs4&#39;, &#39;icd10&#39;, &#39;atc&#39;, &#39;snomed&#39;, &#39;read2&#39;, &#39;read3&#39;}</span>
+<span class="default_value">{&#39;atc&#39;, &#39;read2&#39;, &#39;read3&#39;, &#39;opcs4&#39;, &#39;snomed&#39;, &#39;icd10&#39;}</span>

        
    </div>

--- a/docs/api/acmc/phen.html
+++ b/docs/api/acmc/phen.html
--- a/docs/api/search.js
+++ b/docs/api/search.js
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -111,6 +111,9 @@ The `phen` command is used phenotype-related operations.

  - `-t`, `--target-coding`: (Optional) Specify the target coding (e.g., `read2`, `read3`, `icd10`, `snomed`, `opcs4`).
  - `-d`, `--phen-dir`: (Optional) Local phenotype workspace directory (default is ./workspace/phen).
+  - `--not-translate`: (Optional) Prevent any phenotype translation using NHS TRUD vocabularies. Therefore only concepts in already in the traget coding will be mapped.
+    - `--no-metadata`: (Optional) Prevent copying of metadata columns to output.
+

 - **Publish Phenotype Configuration**


--- a/examples/config1.yml
+++ b/examples/config1.yml
@@ -8,9 +8,8 @@ phenotype:
    - "read2"
    - "read3"
  concept_sets:
-    - name: "ABDO_PAIN"
-      file:
-        path: "clinical-codes-org/Symptom code lists/Abdominal pain/res176-abdominal-pain.csv"
+  - name: "ABDO_PAIN"
+    files:
+      - path: "clinical-codes-org/Symptom code lists/Abdominal pain/res176-abdominal-pain.csv"
        columns:
          read2: "code"
-
--- a/examples/config2.yml
+++ b/examples/config2.yml
@@ -8,13 +8,13 @@ phenotype:
    - "read2"
    - "read3"    
  concept_sets:
-    - name: "CVD_EVENTS"
-      file: 
-        path: "clinical-codes-org/Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv"
+  - name: "CVD_EVENTS"
+    files:
+      - path: "clinical-codes-org/Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv"
        columns:
-          icd10: "code"
-    - name: "DID_NOT_ATTEND"
-      file: 
-        path: "clinical-codes-org/Non-attendance codes/res201-did-not-attend-appointment.csv"
+          icd10: "code"          
+  - name: "DID_NOT_ATTEND"
+    files: 
+      - path: "clinical-codes-org/Non-attendance codes/res201-did-not-attend-appointment.csv"
        columns:
          read2: "code"
\ No newline at end of file
--- a/examples/config3.yml
+++ b/examples/config3.yml
@@ -10,29 +10,29 @@ phenotype:
    - "snomed"       
  concept_sets:
    - name: "CVD_EVENTS"
-      file: 
-        path: "clinical-codes-org/Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv"
-        columns:
-          icd10: "code"
+      files: 
+        - path: "clinical-codes-org/Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv"
+          columns:
+            icd10: "code"
    - name: "DID_NOT_ATTEND"
-      file: 
-        path: "clinical-codes-org/Non-attendance codes/res201-did-not-attend-appointment.csv"
-        columns:
-          read2: "code"
+      files: 
+        - path: "clinical-codes-org/Non-attendance codes/res201-did-not-attend-appointment.csv"
+          columns:
+            read2: "code"
    - name: "HYPERTENSION"
-      file: 
-        path: "hanlon/Read_codes_for_diagnoses.csv"
-        columns:
-          read2: "Read Code"
-        category: "2"
-        actions:
-          divide_col: "MMCode"         
+      files: 
+        - path: "hanlon/Read_codes_for_diagnoses.csv"
+          columns:
+            read2: "Read Code"
+          category: "2"
+          actions:
+            divide_col: "MMCode"
    - name: "DEPRESSION"
-      file: 
-        path: "hanlon/Read_codes_for_diagnoses.csv"
-        columns:
-          read2: "Read Code"
-        category: "3"
-        actions:
-          divide_col: "MMCode"          
+      files: 
+        - path: "hanlon/Read_codes_for_diagnoses.csv"
+          columns:
+            read2: "Read Code"
+          category: "3"
+          actions:
+            divide_col: "MMCode"  

--- a/tests/test_acmc.py
+++ b/tests/test_acmc.py
@@ -128,26 +128,6 @@ def test_phen_workflow(tmp_dir, monkeypatch, caplog, config_file):
        main.main()
    assert "Phenotype published successfully" in caplog.text

-    # copy phenotype'
-    with caplog.at_level(logging.DEBUG):
-        monkeypatch.setattr(
-            sys,
-            "argv",
-            [
-                "main.py",
-                "phen",
-                "copy",
-                "-d",
-                str(phen_path.resolve()),
-                "-td",
-                str(tmp_dir.resolve()),
-                "-v",
-                "0.0.1",
-            ],
-        )
-        main.main()
-    assert "Phenotype copied successfully" in caplog.text
-
    # diff phenotype
    with caplog.at_level(logging.DEBUG):
        old_path = tmp_dir / "0.0.1"
No results found