From 9254ae241f4a1cf334bc2c07f059a74efba7fd87 Mon Sep 17 00:00:00 2001
From: Michael Boniface <m.j.boniface@soton.ac.uk>
Date: Mon, 10 Mar 2025 09:47:55 +0000
Subject: [PATCH] (fix) fixed all typing and ready for documentation

---
 acmc/omop.py   |   5 +-
 acmc/parse.py  |  11 +-
 acmc/phen.py   |  15 ++-
 acmc/trud.py   |  15 +--
 docs/api.md    | 271 ++++++++++++++++++++++++-------------------------
 pyproject.toml |   6 +-
 6 files changed, 153 insertions(+), 170 deletions(-)

diff --git a/acmc/omop.py b/acmc/omop.py
index 95c0ca9..0c3b31f 100644
--- a/acmc/omop.py
+++ b/acmc/omop.py
@@ -96,6 +96,7 @@ def install(omop_zip_file: str, version: str):
 
     # connect to database, if it does not exist it will be created
     conn = sqlite3.connect(DB_PATH)
+
     # Iterate through files in the folder
     csv_files = list(VOCAB_PATH.glob("*.csv"))
     total_tables_count = len(csv_files)
@@ -112,7 +113,7 @@ def install(omop_zip_file: str, version: str):
             df.to_sql(filename.stem, conn, if_exists="replace", index=False)
 
             # add to the metadata
-            vocabularies["tables"].append(filename.stem)
+            list(vocabularies["tables"]).append(filename.stem)
             table_count = table_count + 1
         except Exception as e:
             raise Exception(f"Error reading file {filename}: {e}")
@@ -204,7 +205,6 @@ def vocab_exists(cursor: sqlite3.Cursor, vocab_id: str) -> bool:
 
 
 def concept_set_exist(cursor: sqlite3.Cursor, concept_set_name: str) -> bool:
-
     query = f"SELECT EXISTS (SELECT 1 FROM CONCEPT_SET WHERE concept_set_name = ?)"
     cursor.execute(query, (concept_set_name,))
 
@@ -271,7 +271,6 @@ def export(map_path: Path, export_path: Path, version: str, omop_metadata) -> Pa
         df = pd.read_csv(map_file)
 
         for concept_set_name, grp in df.groupby("CONCEPT_SET"):
-
             # create Concept_Set
             if not concept_set_exist(cur, str(concept_set_name)):
                 cur.execute(
diff --git a/acmc/parse.py b/acmc/parse.py
index 2542cea..9c0e472 100644
--- a/acmc/parse.py
+++ b/acmc/parse.py
@@ -43,12 +43,12 @@ class Proto:
         tuple[
             str,  # The description, e.g., "Not Empty"
             Callable[
-                [list],
+                [pd.DataFrame],
                 pd.Series,
             ],  # The first lambda function: takes a list and returns a pd.Series of booleans
             Callable[
-                [list, Path],
-                None,
+                [pd.DataFrame, Path],
+                pd.DataFrame,
             ],  # The second lambda function: takes a list and a string, and returns nothing
         ]
     ]
@@ -338,10 +338,6 @@ class Icd10(Proto):
             # 			)
         ]
 
-    def trim_icd10(codes: pd.DataFrame) -> pd.DataFrame:
-        codes = codes.str[:4]
-        return codes
-
 
 class Snomed(Proto):
     def __init__(self):
@@ -544,7 +540,6 @@ class CodeTypeParser:
     """A class used in InvalidCodesException to report an error if a code parser check fails"""
 
     def __init__(self, trud_processed_dir: Path = trud.PROCESSED_PATH):
-
         if not trud_processed_dir.exists() or not trud_processed_dir.is_dir():
             raise FileNotFoundError(
                 f"Cannot initialise parsers as the TRUD processed directory {trud_processed_dir} does not exist, please check that TRUD has been installed: acmc trud install"
diff --git a/acmc/phen.py b/acmc/phen.py
index e9354a3..a340feb 100644
--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -20,7 +20,7 @@ import requests
 import yaml
 import semver
 from git import Repo
-from cerberus import Validator
+from cerberus import Validator  # type: ignore
 from deepdiff import DeepDiff
 from pathlib import Path
 from urllib.parse import urlparse, urlunparse
@@ -199,9 +199,7 @@ def check_delete_dir(path: Path, msg: str) -> bool:
     return deleted
 
 
-def fork(
-    phen_dir: str, upstream_url: str, upstream_version: str, new_origin_url: str = None
-):
+def fork(phen_dir: str, upstream_url: str, upstream_version: str, new_origin_url: str):
     """Forks an upstream phenotype in a remote repo at a specific version to a local director, and optionally sets to a new remote origin"
 
     Args:
@@ -315,14 +313,15 @@ def init(phen_dir: str, remote_url: str):
 
     # Initialise repo from local or remote
     repo: Repo
+
     # if remote then clone the repo otherwise init a local repo
     if remote_url != None:
         # add PAT token to the URL
         git_url = construct_git_url(remote_url)
 
         # clone the repo
-        repo = git.cmd.Git()
-        repo.clone(git_url, phen_path)
+        git_cmd = git.cmd.Git()
+        git_cmd.clone(git_url, phen_path)
 
         # open repo
         repo = Repo(phen_path)
@@ -645,7 +644,6 @@ def translate_codes(
     logger.info(f"Converting to target code type {target_code_type}")
 
     for source_code_type in source_df.columns:
-
         # if target code type is the same as thet source code type, no translation, just appending source as target
         if source_code_type == target_code_type:
             copy_df = pd.DataFrame(
@@ -675,7 +673,7 @@ def translate_codes(
                 )
 
                 # normalise the output
-                translated_df.columns = ["SOURCE_CONCEPT", "CONCEPT"]
+                translated_df.columns = pd.Index(["SOURCE_CONCEPT", "CONCEPT"])
                 translated_df["SOURCE_CONCEPT_TYPE"] = source_code_type
 
                 # add to list of codes
@@ -891,7 +889,6 @@ def map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str
     result_list = []
     source_column_names = list(concept_set["file"]["columns"].keys())
     for source_concept_type in source_column_names:
-
         # Filter output based on the current source_concept_type
         out_filtered_df = out[out["SOURCE_CONCEPT_TYPE"] == source_concept_type]
         filtered_count = len(out_filtered_df.index)
diff --git a/acmc/trud.py b/acmc/trud.py
index 65a3cd3..38c0974 100644
--- a/acmc/trud.py
+++ b/acmc/trud.py
@@ -47,11 +47,7 @@ def get_releases(item_id: str, API_KEY: str, latest=False) -> list:
 
 
 def download_release_file(
-    item_id: str,
-    release_ordinal: str,
-    release: dict,
-    file_json_prefix: str,
-    file_type=None,
+    item_id: str, release_ordinal: str, release: dict, file_json_prefix: str
 ) -> Path:
     """Download specified file type for a given release of an item."""
 
@@ -61,9 +57,15 @@ def download_release_file(
             f"Error: '{DOWNLOADS_PATH}' for TRUD resources is not a directory"
         )
 
-    file_type = file_type or file_json_prefix
+    file_type = file_json_prefix
     file_url = release.get(f"{file_json_prefix}FileUrl")
+    if file_url == None:
+        raise ValueError(f"File url not in json data {file_json_prefix}FileUrl")
+
     file_name = release.get(f"{file_json_prefix}FileName")
+    if file_name == None:
+        raise ValueError(f"File name not in json data {file_json_prefix}FileName")
+
     file_destination = DOWNLOADS_PATH / file_name
 
     if not file_url or not file_name:
@@ -101,7 +103,6 @@ def validate_download_hash(file_destination: str, item_hash: str):
 
 
 def unzip_download(file_destination: str):
-
     # check folder is a directory
     if not DOWNLOADS_PATH.is_dir():
         raise NotADirectoryError(
diff --git a/docs/api.md b/docs/api.md
index a4ea298..fe7f6fd 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -6,6 +6,10 @@
 
 # \_\_main\_\_
 
+<a id="util"></a>
+
+# util
+
 <a id="logging_config"></a>
 
 # logging\_config
@@ -30,135 +34,129 @@ def set_log_level(log_level: int)
 
 Sets the log level for the acmc logger
 
-<a id="parse"></a>
+<a id="main"></a>
 
-# parse
+# main
 
-<a id="parse.CodesError"></a>
+<a id="main.trud_install"></a>
 
-## CodesError Objects
+#### trud\_install
 
 ```python
-class CodesError()
+def trud_install(args: argparse.Namespace)
 ```
 
-A class used in InvalidCodesException to report an error if a code parser check fails
+Handle the `trud install` command.
 
-<a id="parse.InvalidCodesException"></a>
+<a id="main.omop_install"></a>
 
-## InvalidCodesException Objects
+#### omop\_install
 
 ```python
-class InvalidCodesException(Exception)
+def omop_install(args: argparse.Namespace)
 ```
 
-Custom exception class raised when invalid codes are found that cannot be resolved by processing
+Handle the `omop install` command.
 
-<a id="parse.Proto"></a>
+<a id="main.omop_clear"></a>
 
-## Proto Objects
+#### omop\_clear
 
 ```python
-class Proto()
+def omop_clear(args: argparse.Namespace)
 ```
 
-Define checks as list of 3 tuple: (Message, Condition, Process)
-- Message = The name of the condition (what is printed and logged)
-- Condition = True if Passed, and False if Failed
-- Process = Aims to resolve all issues that stop condition from passing (Do not change index!)
+Handle the `omop clear` command.
 
-<a id="parse.Proto.raise_exception"></a>
+<a id="main.omop_delete"></a>
 
-#### raise\_exception
+#### omop\_delete
 
 ```python
-def raise_exception(ex: Exception)
+def omop_delete(args: argparse.Namespace)
 ```
 
-Raises an exception inside a lambda function. Python does not allow using raise statement inside lambda because lambda can only contain expressions, not statements. Using raise_exception not raise_ as it's more explict
+Handle the `omop delete` command.
 
-<a id="parse.Proto.process"></a>
+<a id="main.phen_init"></a>
 
-#### process
+#### phen\_init
 
 ```python
-def process(codes: pd.DataFrame,
-            codes_file: Path) -> Tuple[pd.DataFrame, list]
+def phen_init(args: argparse.Namespace)
 ```
 
-identify issues that do not pass and fix them with define/d process
+Handle the `phen init` command.
 
-<a id="parse.Proto.verify"></a>
+<a id="main.phen_fork"></a>
 
-#### verify
+#### phen\_fork
 
 ```python
-def verify(codes: pd.DataFrame, codes_file: Path)
+def phen_fork(args: argparse.Namespace)
 ```
 
-verify codes in codes file
+Handle the `phen fork` command.
 
-<a id="parse.Read2"></a>
+<a id="main.phen_validate"></a>
 
-## Read2 Objects
+#### phen\_validate
 
 ```python
-class Read2(Proto)
+def phen_validate(args: argparse.Namespace)
 ```
 
-This Read2 class extends Proto, adding custom validation checks for a dataset of "Read2" codes. It ensures that the dataset is loaded, validates the codes based on several rules, and applies corrections or logs errors when necessary.
+Handle the `phen validate` command.
 
-<a id="parse.CodeTypeParser"></a>
+<a id="main.phen_map"></a>
 
-## CodeTypeParser Objects
+#### phen\_map
 
 ```python
-class CodeTypeParser()
+def phen_map(args: argparse.Namespace)
 ```
 
-A class used in InvalidCodesException to report an error if a code parser check fails
-
-<a id="trud"></a>
-
-# trud
+Handle the `phen map` command.
 
-<a id="trud.get_releases"></a>
+<a id="main.phen_export"></a>
 
-#### get\_releases
+#### phen\_export
 
 ```python
-def get_releases(item_id: str, API_KEY: str, latest=False) -> list
+def phen_export(args: argparse.Namespace)
 ```
 
-Retrieve release information for an item from the TRUD API.
+Handle the `phen copy` command.
 
-<a id="trud.download_release_file"></a>
+<a id="main.phen_publish"></a>
 
-#### download\_release\_file
+#### phen\_publish
 
 ```python
-def download_release_file(item_id: str,
-                          release_ordinal: str,
-                          release: dict,
-                          file_json_prefix: str,
-                          file_type=None) -> Path
+def phen_publish(args: argparse.Namespace)
 ```
 
-Download specified file type for a given release of an item.
+Handle the `phen publish` command.
 
-<a id="trud.create_map_directories"></a>
+<a id="main.phen_copy"></a>
 
-#### create\_map\_directories
+#### phen\_copy
 
 ```python
-def create_map_directories()
+def phen_copy(args: argparse.Namespace)
 ```
 
-Create map directories.
+Handle the `phen copy` command.
 
-<a id="util"></a>
+<a id="main.phen_diff"></a>
 
-# util
+#### phen\_diff
+
+```python
+def phen_diff(args: argparse.Namespace)
+```
+
+Handle the `phen diff` command.
 
 <a id="omop"></a>
 
@@ -208,129 +206,93 @@ def delete(db_path: Path)
 
 Deletes the OMOP sql database
 
-<a id="main"></a>
-
-# main
-
-<a id="main.trud_install"></a>
-
-#### trud\_install
-
-```python
-def trud_install(args: argparse.Namespace)
-```
-
-Handle the `trud install` command.
-
-<a id="main.omop_install"></a>
-
-#### omop\_install
-
-```python
-def omop_install(args: argparse.Namespace)
-```
-
-Handle the `omop install` command.
-
-<a id="main.omop_clear"></a>
-
-#### omop\_clear
-
-```python
-def omop_clear(args: argparse.Namespace)
-```
-
-Handle the `omop clear` command.
-
-<a id="main.omop_delete"></a>
-
-#### omop\_delete
-
-```python
-def omop_delete(args: argparse.Namespace)
-```
+<a id="parse"></a>
 
-Handle the `omop delete` command.
+# parse
 
-<a id="main.phen_init"></a>
+<a id="parse.CodesError"></a>
 
-#### phen\_init
+## CodesError Objects
 
 ```python
-def phen_init(args: argparse.Namespace)
+class CodesError()
 ```
 
-Handle the `phen init` command.
+A class used in InvalidCodesException to report an error if a code parser check fails
 
-<a id="main.phen_fork"></a>
+<a id="parse.InvalidCodesException"></a>
 
-#### phen\_fork
+## InvalidCodesException Objects
 
 ```python
-def phen_fork(args: argparse.Namespace)
+class InvalidCodesException(Exception)
 ```
 
-Handle the `phen fork` command.
+Custom exception class raised when invalid codes are found that cannot be resolved by processing
 
-<a id="main.phen_validate"></a>
+<a id="parse.Proto"></a>
 
-#### phen\_validate
+## Proto Objects
 
 ```python
-def phen_validate(args: argparse.Namespace)
+class Proto()
 ```
 
-Handle the `phen validate` command.
+Define checks as list of 3 tuple: (Message, Condition, Process)
+- Message = The name of the condition (what is printed and logged)
+- Condition = True if Passed, and False if Failed
+- Process = Aims to resolve all issues that stop condition from passing (Do not change index!)
 
-<a id="main.phen_map"></a>
+<a id="parse.Proto.raise_exception"></a>
 
-#### phen\_map
+#### raise\_exception
 
 ```python
-def phen_map(args: argparse.Namespace)
+def raise_exception(ex: Exception)
 ```
 
-Handle the `phen map` command.
+Raises an exception inside a lambda function. Python does not allow using raise statement inside lambda because lambda can only contain expressions, not statements. Using raise_exception not raise_ as it's more explict
 
-<a id="main.phen_export"></a>
+<a id="parse.Proto.process"></a>
 
-#### phen\_export
+#### process
 
 ```python
-def phen_export(args: argparse.Namespace)
+def process(codes: pd.DataFrame,
+            codes_file: Path) -> Tuple[pd.DataFrame, list]
 ```
 
-Handle the `phen copy` command.
+identify issues that do not pass and fix them with define/d process
 
-<a id="main.phen_publish"></a>
+<a id="parse.Proto.verify"></a>
 
-#### phen\_publish
+#### verify
 
 ```python
-def phen_publish(args: argparse.Namespace)
+def verify(codes: pd.DataFrame, codes_file: Path)
 ```
 
-Handle the `phen publish` command.
+verify codes in codes file
 
-<a id="main.phen_copy"></a>
+<a id="parse.Read2"></a>
 
-#### phen\_copy
+## Read2 Objects
 
 ```python
-def phen_copy(args: argparse.Namespace)
+class Read2(Proto)
 ```
 
-Handle the `phen copy` command.
+This Read2 class extends Proto, adding custom validation checks for a dataset of "Read2" codes. It ensures that the dataset is loaded, validates the codes based on several rules, and applies corrections or logs errors when necessary.
 
-<a id="main.phen_diff"></a>
+<a id="parse.CodeTypeParser"></a>
 
-#### phen\_diff
+## CodeTypeParser Objects
 
 ```python
-def phen_diff(args: argparse.Namespace)
+class CodeTypeParser()
 ```
 
-Handle the `phen diff` command.
+A class used in InvalidCodesException to report an error if a code parser check fails
 
 <a id="phen"></a>
 
@@ -395,10 +357,8 @@ Checks on the command line if a user wants to delete a directory
 #### fork
 
 ```python
-def fork(phen_dir: str,
-         upstream_url: str,
-         upstream_version: str,
-         new_origin_url: str = None)
+def fork(phen_dir: str, upstream_url: str, upstream_version: str,
+         new_origin_url: str)
 ```
 
 Forks an upstream phenotype in a remote repo at a specific version to a local director, and optionally sets to a new remote origin"
@@ -543,3 +503,38 @@ def diff_phen(new_phen_path: Path, new_version: str, old_phen_path: Path,
 
 Compare the differences between two versions of a phenotype
 
+<a id="trud"></a>
+
+# trud
+
+<a id="trud.get_releases"></a>
+
+#### get\_releases
+
+```python
+def get_releases(item_id: str, API_KEY: str, latest=False) -> list
+```
+
+Retrieve release information for an item from the TRUD API.
+
+<a id="trud.download_release_file"></a>
+
+#### download\_release\_file
+
+```python
+def download_release_file(item_id: str, release_ordinal: str, release: dict,
+                          file_json_prefix: str) -> Path
+```
+
+Download specified file type for a given release of an item.
+
+<a id="trud.create_map_directories"></a>
+
+#### create\_map\_directories
+
+```python
+def create_map_directories()
+```
+
+Create map directories.
+
diff --git a/pyproject.toml b/pyproject.toml
index 8132caa..2797c27 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,10 +71,6 @@ dependencies = [
     "mypy",
     "pdoc",
     "pydoc-markdown",
-    "sphinx",
-    "sphinx-rtd-theme",
-    "myst-parser",
-    "sphinx-markdown-builder",
 ]
 
 [tool.hatch.envs.dev.scripts]
@@ -82,7 +78,7 @@ format = "black ."
 type = "mypy ."
 docs = "pydoc-markdown > docs/api.md"
 htmldocs = "pdoc -o docs/api -d markdown --docformat markdown --include-undocumented acmc"
-precommit = "hatch format && type mypy && hatch docs"
+precommit = "hatch run format && hatch run type && hatch run docs"
 
 [tool.hatch.build]
 include = ["acmc/**"]  # Ensure only the acmc package is included
-- 
GitLab