diff --git a/acmc/omop.py b/acmc/omop.py index 95c0ca9ed557c819363429aea8cd3d054e9a7294..0c3b31f02cf2b115f1f0ce400b5ebca5671dc725 100644 --- a/acmc/omop.py +++ b/acmc/omop.py @@ -96,6 +96,7 @@ def install(omop_zip_file: str, version: str): # connect to database, if it does not exist it will be created conn = sqlite3.connect(DB_PATH) + # Iterate through files in the folder csv_files = list(VOCAB_PATH.glob("*.csv")) total_tables_count = len(csv_files) @@ -112,7 +113,7 @@ def install(omop_zip_file: str, version: str): df.to_sql(filename.stem, conn, if_exists="replace", index=False) # add to the metadata - vocabularies["tables"].append(filename.stem) + list(vocabularies["tables"]).append(filename.stem) table_count = table_count + 1 except Exception as e: raise Exception(f"Error reading file {filename}: {e}") @@ -204,7 +205,6 @@ def vocab_exists(cursor: sqlite3.Cursor, vocab_id: str) -> bool: def concept_set_exist(cursor: sqlite3.Cursor, concept_set_name: str) -> bool: - query = f"SELECT EXISTS (SELECT 1 FROM CONCEPT_SET WHERE concept_set_name = ?)" cursor.execute(query, (concept_set_name,)) @@ -271,7 +271,6 @@ def export(map_path: Path, export_path: Path, version: str, omop_metadata) -> Pa df = pd.read_csv(map_file) for concept_set_name, grp in df.groupby("CONCEPT_SET"): - # create Concept_Set if not concept_set_exist(cur, str(concept_set_name)): cur.execute( diff --git a/acmc/parse.py b/acmc/parse.py index 2542ceab4fd3230365c3a8146e6bb1c78f2f9e90..9c0e47212b016d82182b475fc3bfd18588e09c3d 100644 --- a/acmc/parse.py +++ b/acmc/parse.py @@ -43,12 +43,12 @@ class Proto: tuple[ str, # The description, e.g., "Not Empty" Callable[ - [list], + [pd.DataFrame], pd.Series, ], # The first lambda function: takes a list and returns a pd.Series of booleans Callable[ - [list, Path], - None, + [pd.DataFrame, Path], + pd.DataFrame, ], # The second lambda function: takes a list and a string, and returns nothing ] ] @@ -338,10 +338,6 @@ class Icd10(Proto): # ) ] - def trim_icd10(codes: pd.DataFrame) -> pd.DataFrame: - codes = codes.str[:4] - return codes - class Snomed(Proto): def __init__(self): @@ -544,7 +540,6 @@ class CodeTypeParser: """A class used in InvalidCodesException to report an error if a code parser check fails""" def __init__(self, trud_processed_dir: Path = trud.PROCESSED_PATH): - if not trud_processed_dir.exists() or not trud_processed_dir.is_dir(): raise FileNotFoundError( f"Cannot initialise parsers as the TRUD processed directory {trud_processed_dir} does not exist, please check that TRUD has been installed: acmc trud install" diff --git a/acmc/phen.py b/acmc/phen.py index e9354a3722df43bda556509851821232542d60e9..a340febf3866855717b7880872add8d61504d09c 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -20,7 +20,7 @@ import requests import yaml import semver from git import Repo -from cerberus import Validator +from cerberus import Validator # type: ignore from deepdiff import DeepDiff from pathlib import Path from urllib.parse import urlparse, urlunparse @@ -199,9 +199,7 @@ def check_delete_dir(path: Path, msg: str) -> bool: return deleted -def fork( - phen_dir: str, upstream_url: str, upstream_version: str, new_origin_url: str = None -): +def fork(phen_dir: str, upstream_url: str, upstream_version: str, new_origin_url: str): """Forks an upstream phenotype in a remote repo at a specific version to a local director, and optionally sets to a new remote origin" Args: @@ -315,14 +313,15 @@ def init(phen_dir: str, remote_url: str): # Initialise repo from local or remote repo: Repo + # if remote then clone the repo otherwise init a local repo if remote_url != None: # add PAT token to the URL git_url = construct_git_url(remote_url) # clone the repo - repo = git.cmd.Git() - repo.clone(git_url, phen_path) + git_cmd = git.cmd.Git() + git_cmd.clone(git_url, phen_path) # open repo repo = Repo(phen_path) @@ -645,7 +644,6 @@ def translate_codes( logger.info(f"Converting to target code type {target_code_type}") for source_code_type in source_df.columns: - # if target code type is the same as thet source code type, no translation, just appending source as target if source_code_type == target_code_type: copy_df = pd.DataFrame( @@ -675,7 +673,7 @@ def translate_codes( ) # normalise the output - translated_df.columns = ["SOURCE_CONCEPT", "CONCEPT"] + translated_df.columns = pd.Index(["SOURCE_CONCEPT", "CONCEPT"]) translated_df["SOURCE_CONCEPT_TYPE"] = source_code_type # add to list of codes @@ -891,7 +889,6 @@ def map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str result_list = [] source_column_names = list(concept_set["file"]["columns"].keys()) for source_concept_type in source_column_names: - # Filter output based on the current source_concept_type out_filtered_df = out[out["SOURCE_CONCEPT_TYPE"] == source_concept_type] filtered_count = len(out_filtered_df.index) diff --git a/acmc/trud.py b/acmc/trud.py index 65a3cd3af62439600d7cb5990c0654e81c4d71d9..38c0974149bf481a0db89febec63900f27a59c2d 100644 --- a/acmc/trud.py +++ b/acmc/trud.py @@ -47,11 +47,7 @@ def get_releases(item_id: str, API_KEY: str, latest=False) -> list: def download_release_file( - item_id: str, - release_ordinal: str, - release: dict, - file_json_prefix: str, - file_type=None, + item_id: str, release_ordinal: str, release: dict, file_json_prefix: str ) -> Path: """Download specified file type for a given release of an item.""" @@ -61,9 +57,15 @@ def download_release_file( f"Error: '{DOWNLOADS_PATH}' for TRUD resources is not a directory" ) - file_type = file_type or file_json_prefix + file_type = file_json_prefix file_url = release.get(f"{file_json_prefix}FileUrl") + if file_url == None: + raise ValueError(f"File url not in json data {file_json_prefix}FileUrl") + file_name = release.get(f"{file_json_prefix}FileName") + if file_name == None: + raise ValueError(f"File name not in json data {file_json_prefix}FileName") + file_destination = DOWNLOADS_PATH / file_name if not file_url or not file_name: @@ -101,7 +103,6 @@ def validate_download_hash(file_destination: str, item_hash: str): def unzip_download(file_destination: str): - # check folder is a directory if not DOWNLOADS_PATH.is_dir(): raise NotADirectoryError( diff --git a/docs/api.md b/docs/api.md index a4ea298367eded018099fa70106b80ed6ee5b679..fe7f6fdd1561d82e13f11d38fbb842b5dcbcb87e 100644 --- a/docs/api.md +++ b/docs/api.md @@ -6,6 +6,10 @@ # \_\_main\_\_ +<a id="util"></a> + +# util + <a id="logging_config"></a> # logging\_config @@ -30,135 +34,129 @@ def set_log_level(log_level: int) Sets the log level for the acmc logger -<a id="parse"></a> +<a id="main"></a> -# parse +# main -<a id="parse.CodesError"></a> +<a id="main.trud_install"></a> -## CodesError Objects +#### trud\_install ```python -class CodesError() +def trud_install(args: argparse.Namespace) ``` -A class used in InvalidCodesException to report an error if a code parser check fails +Handle the `trud install` command. -<a id="parse.InvalidCodesException"></a> +<a id="main.omop_install"></a> -## InvalidCodesException Objects +#### omop\_install ```python -class InvalidCodesException(Exception) +def omop_install(args: argparse.Namespace) ``` -Custom exception class raised when invalid codes are found that cannot be resolved by processing +Handle the `omop install` command. -<a id="parse.Proto"></a> +<a id="main.omop_clear"></a> -## Proto Objects +#### omop\_clear ```python -class Proto() +def omop_clear(args: argparse.Namespace) ``` -Define checks as list of 3 tuple: (Message, Condition, Process) -- Message = The name of the condition (what is printed and logged) -- Condition = True if Passed, and False if Failed -- Process = Aims to resolve all issues that stop condition from passing (Do not change index!) +Handle the `omop clear` command. -<a id="parse.Proto.raise_exception"></a> +<a id="main.omop_delete"></a> -#### raise\_exception +#### omop\_delete ```python -def raise_exception(ex: Exception) +def omop_delete(args: argparse.Namespace) ``` -Raises an exception inside a lambda function. Python does not allow using raise statement inside lambda because lambda can only contain expressions, not statements. Using raise_exception not raise_ as it's more explict +Handle the `omop delete` command. -<a id="parse.Proto.process"></a> +<a id="main.phen_init"></a> -#### process +#### phen\_init ```python -def process(codes: pd.DataFrame, - codes_file: Path) -> Tuple[pd.DataFrame, list] +def phen_init(args: argparse.Namespace) ``` -identify issues that do not pass and fix them with define/d process +Handle the `phen init` command. -<a id="parse.Proto.verify"></a> +<a id="main.phen_fork"></a> -#### verify +#### phen\_fork ```python -def verify(codes: pd.DataFrame, codes_file: Path) +def phen_fork(args: argparse.Namespace) ``` -verify codes in codes file +Handle the `phen fork` command. -<a id="parse.Read2"></a> +<a id="main.phen_validate"></a> -## Read2 Objects +#### phen\_validate ```python -class Read2(Proto) +def phen_validate(args: argparse.Namespace) ``` -This Read2 class extends Proto, adding custom validation checks for a dataset of "Read2" codes. It ensures that the dataset is loaded, validates the codes based on several rules, and applies corrections or logs errors when necessary. +Handle the `phen validate` command. -<a id="parse.CodeTypeParser"></a> +<a id="main.phen_map"></a> -## CodeTypeParser Objects +#### phen\_map ```python -class CodeTypeParser() +def phen_map(args: argparse.Namespace) ``` -A class used in InvalidCodesException to report an error if a code parser check fails - -<a id="trud"></a> - -# trud +Handle the `phen map` command. -<a id="trud.get_releases"></a> +<a id="main.phen_export"></a> -#### get\_releases +#### phen\_export ```python -def get_releases(item_id: str, API_KEY: str, latest=False) -> list +def phen_export(args: argparse.Namespace) ``` -Retrieve release information for an item from the TRUD API. +Handle the `phen copy` command. -<a id="trud.download_release_file"></a> +<a id="main.phen_publish"></a> -#### download\_release\_file +#### phen\_publish ```python -def download_release_file(item_id: str, - release_ordinal: str, - release: dict, - file_json_prefix: str, - file_type=None) -> Path +def phen_publish(args: argparse.Namespace) ``` -Download specified file type for a given release of an item. +Handle the `phen publish` command. -<a id="trud.create_map_directories"></a> +<a id="main.phen_copy"></a> -#### create\_map\_directories +#### phen\_copy ```python -def create_map_directories() +def phen_copy(args: argparse.Namespace) ``` -Create map directories. +Handle the `phen copy` command. -<a id="util"></a> +<a id="main.phen_diff"></a> -# util +#### phen\_diff + +```python +def phen_diff(args: argparse.Namespace) +``` + +Handle the `phen diff` command. <a id="omop"></a> @@ -208,129 +206,93 @@ def delete(db_path: Path) Deletes the OMOP sql database -<a id="main"></a> - -# main - -<a id="main.trud_install"></a> - -#### trud\_install - -```python -def trud_install(args: argparse.Namespace) -``` - -Handle the `trud install` command. - -<a id="main.omop_install"></a> - -#### omop\_install - -```python -def omop_install(args: argparse.Namespace) -``` - -Handle the `omop install` command. - -<a id="main.omop_clear"></a> - -#### omop\_clear - -```python -def omop_clear(args: argparse.Namespace) -``` - -Handle the `omop clear` command. - -<a id="main.omop_delete"></a> - -#### omop\_delete - -```python -def omop_delete(args: argparse.Namespace) -``` +<a id="parse"></a> -Handle the `omop delete` command. +# parse -<a id="main.phen_init"></a> +<a id="parse.CodesError"></a> -#### phen\_init +## CodesError Objects ```python -def phen_init(args: argparse.Namespace) +class CodesError() ``` -Handle the `phen init` command. +A class used in InvalidCodesException to report an error if a code parser check fails -<a id="main.phen_fork"></a> +<a id="parse.InvalidCodesException"></a> -#### phen\_fork +## InvalidCodesException Objects ```python -def phen_fork(args: argparse.Namespace) +class InvalidCodesException(Exception) ``` -Handle the `phen fork` command. +Custom exception class raised when invalid codes are found that cannot be resolved by processing -<a id="main.phen_validate"></a> +<a id="parse.Proto"></a> -#### phen\_validate +## Proto Objects ```python -def phen_validate(args: argparse.Namespace) +class Proto() ``` -Handle the `phen validate` command. +Define checks as list of 3 tuple: (Message, Condition, Process) +- Message = The name of the condition (what is printed and logged) +- Condition = True if Passed, and False if Failed +- Process = Aims to resolve all issues that stop condition from passing (Do not change index!) -<a id="main.phen_map"></a> +<a id="parse.Proto.raise_exception"></a> -#### phen\_map +#### raise\_exception ```python -def phen_map(args: argparse.Namespace) +def raise_exception(ex: Exception) ``` -Handle the `phen map` command. +Raises an exception inside a lambda function. Python does not allow using raise statement inside lambda because lambda can only contain expressions, not statements. Using raise_exception not raise_ as it's more explict -<a id="main.phen_export"></a> +<a id="parse.Proto.process"></a> -#### phen\_export +#### process ```python -def phen_export(args: argparse.Namespace) +def process(codes: pd.DataFrame, + codes_file: Path) -> Tuple[pd.DataFrame, list] ``` -Handle the `phen copy` command. +identify issues that do not pass and fix them with define/d process -<a id="main.phen_publish"></a> +<a id="parse.Proto.verify"></a> -#### phen\_publish +#### verify ```python -def phen_publish(args: argparse.Namespace) +def verify(codes: pd.DataFrame, codes_file: Path) ``` -Handle the `phen publish` command. +verify codes in codes file -<a id="main.phen_copy"></a> +<a id="parse.Read2"></a> -#### phen\_copy +## Read2 Objects ```python -def phen_copy(args: argparse.Namespace) +class Read2(Proto) ``` -Handle the `phen copy` command. +This Read2 class extends Proto, adding custom validation checks for a dataset of "Read2" codes. It ensures that the dataset is loaded, validates the codes based on several rules, and applies corrections or logs errors when necessary. -<a id="main.phen_diff"></a> +<a id="parse.CodeTypeParser"></a> -#### phen\_diff +## CodeTypeParser Objects ```python -def phen_diff(args: argparse.Namespace) +class CodeTypeParser() ``` -Handle the `phen diff` command. +A class used in InvalidCodesException to report an error if a code parser check fails <a id="phen"></a> @@ -395,10 +357,8 @@ Checks on the command line if a user wants to delete a directory #### fork ```python -def fork(phen_dir: str, - upstream_url: str, - upstream_version: str, - new_origin_url: str = None) +def fork(phen_dir: str, upstream_url: str, upstream_version: str, + new_origin_url: str) ``` Forks an upstream phenotype in a remote repo at a specific version to a local director, and optionally sets to a new remote origin" @@ -543,3 +503,38 @@ def diff_phen(new_phen_path: Path, new_version: str, old_phen_path: Path, Compare the differences between two versions of a phenotype +<a id="trud"></a> + +# trud + +<a id="trud.get_releases"></a> + +#### get\_releases + +```python +def get_releases(item_id: str, API_KEY: str, latest=False) -> list +``` + +Retrieve release information for an item from the TRUD API. + +<a id="trud.download_release_file"></a> + +#### download\_release\_file + +```python +def download_release_file(item_id: str, release_ordinal: str, release: dict, + file_json_prefix: str) -> Path +``` + +Download specified file type for a given release of an item. + +<a id="trud.create_map_directories"></a> + +#### create\_map\_directories + +```python +def create_map_directories() +``` + +Create map directories. + diff --git a/pyproject.toml b/pyproject.toml index 8132caa2df8d868646fba082f79520c8a04c0cb7..2797c27f55d70e29524664cc07cfa4a0b57d12cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,10 +71,6 @@ dependencies = [ "mypy", "pdoc", "pydoc-markdown", - "sphinx", - "sphinx-rtd-theme", - "myst-parser", - "sphinx-markdown-builder", ] [tool.hatch.envs.dev.scripts] @@ -82,7 +78,7 @@ format = "black ." type = "mypy ." docs = "pydoc-markdown > docs/api.md" htmldocs = "pdoc -o docs/api -d markdown --docformat markdown --include-undocumented acmc" -precommit = "hatch format && type mypy && hatch docs" +precommit = "hatch run format && hatch run type && hatch run docs" [tool.hatch.build] include = ["acmc/**"] # Ensure only the acmc package is included