Skip to content
Snippets Groups Projects
Commit 9254ae24 authored by mjbonifa's avatar mjbonifa
Browse files

(fix) fixed all typing and ready for documentation

parent a6329d38
No related branches found
No related tags found
No related merge requests found
......@@ -96,6 +96,7 @@ def install(omop_zip_file: str, version: str):
# connect to database, if it does not exist it will be created
conn = sqlite3.connect(DB_PATH)
# Iterate through files in the folder
csv_files = list(VOCAB_PATH.glob("*.csv"))
total_tables_count = len(csv_files)
......@@ -112,7 +113,7 @@ def install(omop_zip_file: str, version: str):
df.to_sql(filename.stem, conn, if_exists="replace", index=False)
# add to the metadata
vocabularies["tables"].append(filename.stem)
list(vocabularies["tables"]).append(filename.stem)
table_count = table_count + 1
except Exception as e:
raise Exception(f"Error reading file {filename}: {e}")
......@@ -204,7 +205,6 @@ def vocab_exists(cursor: sqlite3.Cursor, vocab_id: str) -> bool:
def concept_set_exist(cursor: sqlite3.Cursor, concept_set_name: str) -> bool:
query = f"SELECT EXISTS (SELECT 1 FROM CONCEPT_SET WHERE concept_set_name = ?)"
cursor.execute(query, (concept_set_name,))
......@@ -271,7 +271,6 @@ def export(map_path: Path, export_path: Path, version: str, omop_metadata) -> Pa
df = pd.read_csv(map_file)
for concept_set_name, grp in df.groupby("CONCEPT_SET"):
# create Concept_Set
if not concept_set_exist(cur, str(concept_set_name)):
cur.execute(
......
......@@ -43,12 +43,12 @@ class Proto:
tuple[
str, # The description, e.g., "Not Empty"
Callable[
[list],
[pd.DataFrame],
pd.Series,
], # The first lambda function: takes a list and returns a pd.Series of booleans
Callable[
[list, Path],
None,
[pd.DataFrame, Path],
pd.DataFrame,
], # The second lambda function: takes a list and a string, and returns nothing
]
]
......@@ -338,10 +338,6 @@ class Icd10(Proto):
# )
]
def trim_icd10(codes: pd.DataFrame) -> pd.DataFrame:
codes = codes.str[:4]
return codes
class Snomed(Proto):
def __init__(self):
......@@ -544,7 +540,6 @@ class CodeTypeParser:
"""A class used in InvalidCodesException to report an error if a code parser check fails"""
def __init__(self, trud_processed_dir: Path = trud.PROCESSED_PATH):
if not trud_processed_dir.exists() or not trud_processed_dir.is_dir():
raise FileNotFoundError(
f"Cannot initialise parsers as the TRUD processed directory {trud_processed_dir} does not exist, please check that TRUD has been installed: acmc trud install"
......
......@@ -20,7 +20,7 @@ import requests
import yaml
import semver
from git import Repo
from cerberus import Validator
from cerberus import Validator # type: ignore
from deepdiff import DeepDiff
from pathlib import Path
from urllib.parse import urlparse, urlunparse
......@@ -199,9 +199,7 @@ def check_delete_dir(path: Path, msg: str) -> bool:
return deleted
def fork(
phen_dir: str, upstream_url: str, upstream_version: str, new_origin_url: str = None
):
def fork(phen_dir: str, upstream_url: str, upstream_version: str, new_origin_url: str):
"""Forks an upstream phenotype in a remote repo at a specific version to a local director, and optionally sets to a new remote origin"
Args:
......@@ -315,14 +313,15 @@ def init(phen_dir: str, remote_url: str):
# Initialise repo from local or remote
repo: Repo
# if remote then clone the repo otherwise init a local repo
if remote_url != None:
# add PAT token to the URL
git_url = construct_git_url(remote_url)
# clone the repo
repo = git.cmd.Git()
repo.clone(git_url, phen_path)
git_cmd = git.cmd.Git()
git_cmd.clone(git_url, phen_path)
# open repo
repo = Repo(phen_path)
......@@ -645,7 +644,6 @@ def translate_codes(
logger.info(f"Converting to target code type {target_code_type}")
for source_code_type in source_df.columns:
# if target code type is the same as thet source code type, no translation, just appending source as target
if source_code_type == target_code_type:
copy_df = pd.DataFrame(
......@@ -675,7 +673,7 @@ def translate_codes(
)
# normalise the output
translated_df.columns = ["SOURCE_CONCEPT", "CONCEPT"]
translated_df.columns = pd.Index(["SOURCE_CONCEPT", "CONCEPT"])
translated_df["SOURCE_CONCEPT_TYPE"] = source_code_type
# add to list of codes
......@@ -891,7 +889,6 @@ def map_target_code_type(phen_path: Path, phenotype: dict, target_code_type: str
result_list = []
source_column_names = list(concept_set["file"]["columns"].keys())
for source_concept_type in source_column_names:
# Filter output based on the current source_concept_type
out_filtered_df = out[out["SOURCE_CONCEPT_TYPE"] == source_concept_type]
filtered_count = len(out_filtered_df.index)
......
......@@ -47,11 +47,7 @@ def get_releases(item_id: str, API_KEY: str, latest=False) -> list:
def download_release_file(
item_id: str,
release_ordinal: str,
release: dict,
file_json_prefix: str,
file_type=None,
item_id: str, release_ordinal: str, release: dict, file_json_prefix: str
) -> Path:
"""Download specified file type for a given release of an item."""
......@@ -61,9 +57,15 @@ def download_release_file(
f"Error: '{DOWNLOADS_PATH}' for TRUD resources is not a directory"
)
file_type = file_type or file_json_prefix
file_type = file_json_prefix
file_url = release.get(f"{file_json_prefix}FileUrl")
if file_url == None:
raise ValueError(f"File url not in json data {file_json_prefix}FileUrl")
file_name = release.get(f"{file_json_prefix}FileName")
if file_name == None:
raise ValueError(f"File name not in json data {file_json_prefix}FileName")
file_destination = DOWNLOADS_PATH / file_name
if not file_url or not file_name:
......@@ -101,7 +103,6 @@ def validate_download_hash(file_destination: str, item_hash: str):
def unzip_download(file_destination: str):
# check folder is a directory
if not DOWNLOADS_PATH.is_dir():
raise NotADirectoryError(
......
......@@ -6,6 +6,10 @@
# \_\_main\_\_
<a id="util"></a>
# util
<a id="logging_config"></a>
# logging\_config
......@@ -30,135 +34,129 @@ def set_log_level(log_level: int)
Sets the log level for the acmc logger
<a id="parse"></a>
<a id="main"></a>
# parse
# main
<a id="parse.CodesError"></a>
<a id="main.trud_install"></a>
## CodesError Objects
#### trud\_install
```python
class CodesError()
def trud_install(args: argparse.Namespace)
```
A class used in InvalidCodesException to report an error if a code parser check fails
Handle the `trud install` command.
<a id="parse.InvalidCodesException"></a>
<a id="main.omop_install"></a>
## InvalidCodesException Objects
#### omop\_install
```python
class InvalidCodesException(Exception)
def omop_install(args: argparse.Namespace)
```
Custom exception class raised when invalid codes are found that cannot be resolved by processing
Handle the `omop install` command.
<a id="parse.Proto"></a>
<a id="main.omop_clear"></a>
## Proto Objects
#### omop\_clear
```python
class Proto()
def omop_clear(args: argparse.Namespace)
```
Define checks as list of 3 tuple: (Message, Condition, Process)
- Message = The name of the condition (what is printed and logged)
- Condition = True if Passed, and False if Failed
- Process = Aims to resolve all issues that stop condition from passing (Do not change index!)
Handle the `omop clear` command.
<a id="parse.Proto.raise_exception"></a>
<a id="main.omop_delete"></a>
#### raise\_exception
#### omop\_delete
```python
def raise_exception(ex: Exception)
def omop_delete(args: argparse.Namespace)
```
Raises an exception inside a lambda function. Python does not allow using raise statement inside lambda because lambda can only contain expressions, not statements. Using raise_exception not raise_ as it's more explict
Handle the `omop delete` command.
<a id="parse.Proto.process"></a>
<a id="main.phen_init"></a>
#### process
#### phen\_init
```python
def process(codes: pd.DataFrame,
codes_file: Path) -> Tuple[pd.DataFrame, list]
def phen_init(args: argparse.Namespace)
```
identify issues that do not pass and fix them with define/d process
Handle the `phen init` command.
<a id="parse.Proto.verify"></a>
<a id="main.phen_fork"></a>
#### verify
#### phen\_fork
```python
def verify(codes: pd.DataFrame, codes_file: Path)
def phen_fork(args: argparse.Namespace)
```
verify codes in codes file
Handle the `phen fork` command.
<a id="parse.Read2"></a>
<a id="main.phen_validate"></a>
## Read2 Objects
#### phen\_validate
```python
class Read2(Proto)
def phen_validate(args: argparse.Namespace)
```
This Read2 class extends Proto, adding custom validation checks for a dataset of "Read2" codes. It ensures that the dataset is loaded, validates the codes based on several rules, and applies corrections or logs errors when necessary.
Handle the `phen validate` command.
<a id="parse.CodeTypeParser"></a>
<a id="main.phen_map"></a>
## CodeTypeParser Objects
#### phen\_map
```python
class CodeTypeParser()
def phen_map(args: argparse.Namespace)
```
A class used in InvalidCodesException to report an error if a code parser check fails
<a id="trud"></a>
# trud
Handle the `phen map` command.
<a id="trud.get_releases"></a>
<a id="main.phen_export"></a>
#### get\_releases
#### phen\_export
```python
def get_releases(item_id: str, API_KEY: str, latest=False) -> list
def phen_export(args: argparse.Namespace)
```
Retrieve release information for an item from the TRUD API.
Handle the `phen copy` command.
<a id="trud.download_release_file"></a>
<a id="main.phen_publish"></a>
#### download\_release\_file
#### phen\_publish
```python
def download_release_file(item_id: str,
release_ordinal: str,
release: dict,
file_json_prefix: str,
file_type=None) -> Path
def phen_publish(args: argparse.Namespace)
```
Download specified file type for a given release of an item.
Handle the `phen publish` command.
<a id="trud.create_map_directories"></a>
<a id="main.phen_copy"></a>
#### create\_map\_directories
#### phen\_copy
```python
def create_map_directories()
def phen_copy(args: argparse.Namespace)
```
Create map directories.
Handle the `phen copy` command.
<a id="util"></a>
<a id="main.phen_diff"></a>
# util
#### phen\_diff
```python
def phen_diff(args: argparse.Namespace)
```
Handle the `phen diff` command.
<a id="omop"></a>
......@@ -208,129 +206,93 @@ def delete(db_path: Path)
Deletes the OMOP sql database
<a id="main"></a>
# main
<a id="main.trud_install"></a>
#### trud\_install
```python
def trud_install(args: argparse.Namespace)
```
Handle the `trud install` command.
<a id="main.omop_install"></a>
#### omop\_install
```python
def omop_install(args: argparse.Namespace)
```
Handle the `omop install` command.
<a id="main.omop_clear"></a>
#### omop\_clear
```python
def omop_clear(args: argparse.Namespace)
```
Handle the `omop clear` command.
<a id="main.omop_delete"></a>
#### omop\_delete
```python
def omop_delete(args: argparse.Namespace)
```
<a id="parse"></a>
Handle the `omop delete` command.
# parse
<a id="main.phen_init"></a>
<a id="parse.CodesError"></a>
#### phen\_init
## CodesError Objects
```python
def phen_init(args: argparse.Namespace)
class CodesError()
```
Handle the `phen init` command.
A class used in InvalidCodesException to report an error if a code parser check fails
<a id="main.phen_fork"></a>
<a id="parse.InvalidCodesException"></a>
#### phen\_fork
## InvalidCodesException Objects
```python
def phen_fork(args: argparse.Namespace)
class InvalidCodesException(Exception)
```
Handle the `phen fork` command.
Custom exception class raised when invalid codes are found that cannot be resolved by processing
<a id="main.phen_validate"></a>
<a id="parse.Proto"></a>
#### phen\_validate
## Proto Objects
```python
def phen_validate(args: argparse.Namespace)
class Proto()
```
Handle the `phen validate` command.
Define checks as list of 3 tuple: (Message, Condition, Process)
- Message = The name of the condition (what is printed and logged)
- Condition = True if Passed, and False if Failed
- Process = Aims to resolve all issues that stop condition from passing (Do not change index!)
<a id="main.phen_map"></a>
<a id="parse.Proto.raise_exception"></a>
#### phen\_map
#### raise\_exception
```python
def phen_map(args: argparse.Namespace)
def raise_exception(ex: Exception)
```
Handle the `phen map` command.
Raises an exception inside a lambda function. Python does not allow using raise statement inside lambda because lambda can only contain expressions, not statements. Using raise_exception not raise_ as it's more explict
<a id="main.phen_export"></a>
<a id="parse.Proto.process"></a>
#### phen\_export
#### process
```python
def phen_export(args: argparse.Namespace)
def process(codes: pd.DataFrame,
codes_file: Path) -> Tuple[pd.DataFrame, list]
```
Handle the `phen copy` command.
identify issues that do not pass and fix them with define/d process
<a id="main.phen_publish"></a>
<a id="parse.Proto.verify"></a>
#### phen\_publish
#### verify
```python
def phen_publish(args: argparse.Namespace)
def verify(codes: pd.DataFrame, codes_file: Path)
```
Handle the `phen publish` command.
verify codes in codes file
<a id="main.phen_copy"></a>
<a id="parse.Read2"></a>
#### phen\_copy
## Read2 Objects
```python
def phen_copy(args: argparse.Namespace)
class Read2(Proto)
```
Handle the `phen copy` command.
This Read2 class extends Proto, adding custom validation checks for a dataset of "Read2" codes. It ensures that the dataset is loaded, validates the codes based on several rules, and applies corrections or logs errors when necessary.
<a id="main.phen_diff"></a>
<a id="parse.CodeTypeParser"></a>
#### phen\_diff
## CodeTypeParser Objects
```python
def phen_diff(args: argparse.Namespace)
class CodeTypeParser()
```
Handle the `phen diff` command.
A class used in InvalidCodesException to report an error if a code parser check fails
<a id="phen"></a>
......@@ -395,10 +357,8 @@ Checks on the command line if a user wants to delete a directory
#### fork
```python
def fork(phen_dir: str,
upstream_url: str,
upstream_version: str,
new_origin_url: str = None)
def fork(phen_dir: str, upstream_url: str, upstream_version: str,
new_origin_url: str)
```
Forks an upstream phenotype in a remote repo at a specific version to a local director, and optionally sets to a new remote origin"
......@@ -543,3 +503,38 @@ def diff_phen(new_phen_path: Path, new_version: str, old_phen_path: Path,
Compare the differences between two versions of a phenotype
<a id="trud"></a>
# trud
<a id="trud.get_releases"></a>
#### get\_releases
```python
def get_releases(item_id: str, API_KEY: str, latest=False) -> list
```
Retrieve release information for an item from the TRUD API.
<a id="trud.download_release_file"></a>
#### download\_release\_file
```python
def download_release_file(item_id: str, release_ordinal: str, release: dict,
file_json_prefix: str) -> Path
```
Download specified file type for a given release of an item.
<a id="trud.create_map_directories"></a>
#### create\_map\_directories
```python
def create_map_directories()
```
Create map directories.
......@@ -71,10 +71,6 @@ dependencies = [
"mypy",
"pdoc",
"pydoc-markdown",
"sphinx",
"sphinx-rtd-theme",
"myst-parser",
"sphinx-markdown-builder",
]
[tool.hatch.envs.dev.scripts]
......@@ -82,7 +78,7 @@ format = "black ."
type = "mypy ."
docs = "pydoc-markdown > docs/api.md"
htmldocs = "pdoc -o docs/api -d markdown --docformat markdown --include-undocumented acmc"
precommit = "hatch format && type mypy && hatch docs"
precommit = "hatch run format && hatch run type && hatch run docs"
[tool.hatch.build]
include = ["acmc/**"] # Ensure only the acmc package is included
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment