Skip to content
Snippets Groups Projects
Commit f51e056a authored by mjbonifa's avatar mjbonifa
Browse files

added initial pyproject.toml and testing creating package without conda.yaml,...

added initial pyproject.toml and testing creating package without conda.yaml, only works with pythom >=v3.9
parent 09d928c9
No related branches found
No related tags found
No related merge requests found
# Python
dist/*
*.py[co]
__pycache__
......@@ -11,8 +11,8 @@ __pycache__
*.bak
*.save*
~$*
*.log
# Build and output files
# ACMC phenotype build files
build/*
*output*
*.log
......@@ -2,7 +2,6 @@ import argparse
import logging
from pathlib import Path
# acmc imports
from acmc import trud, omop, phen, logging_config as lc
# setup logging
......
......@@ -7,6 +7,8 @@ import logging
from pathlib import Path
from acmc import logging_config
# setup logging
logger = logging_config.setup_logger()
OMOP_DB_DIR = Path('./build/omop')
......@@ -56,7 +58,7 @@ def install (omop_install_folder, version, db_path=OMOP_DB_PATH):
if filename.endswith(".csv"): # Check if the file is a CSV
file_path = os.path.join(omop_install_folder, filename)
try:
logger.debug(f"Reading file: {file_path}")
logger.info(f"Reading table: {file_path}")
# read the CSV file with the specified delimiter
df = pd.read_csv(file_path, delimiter="\t", low_memory=False)
table_name = os.path.splitext(os.path.basename(file_path))[0] #Get name of file
......
......@@ -2,15 +2,13 @@ import pandas as pd
import numpy as np
import os
# acmc imports
from acmc import trud, logging_config as lc
# setup logging
logger = lc.setup_logger()
PHEN_CODE_ERROR_FILE = "code_errors.csv"
class CodesError():
"""A class used in InvalidCodesException to report an error if a code parser check fails"""
def __init__(self, message, codes=None, codes_file=None, mask=None, code_type=None):
# initialise class variables with provided parameters
for key, value in locals().items():
......@@ -85,7 +83,7 @@ class Proto():
out = cond(codes)
conds = np.append(conds, out.all())
return codes
return conds
class Read2(Proto):
""" This Read2 class extends Proto, adding custom validation checks for a dataset of "Read2" codes. It ensures that the dataset is loaded, validates the codes based on several rules, and applies corrections or logs errors when necessary."""
......@@ -396,6 +394,7 @@ class Cprd(Proto):
)
]
# THe medical code type parsers
code_types = {
"read2": Read2(),
"read3": Read3(),
......@@ -417,33 +416,3 @@ vocab_types = {
"med": None,
"cprd": None,
}
\ No newline at end of file
# def log_invalid_code(self, codes=None, codes_file=None, mask=None, code_type=None, error_file=None, cause=None):
# logger = logging.getLogger('acmc_logger')
# logger.error(f"Invalid codes {codes_file}, {codes[~mask]}")
# errors = pd.DataFrame([])
# errors["CONCEPT"] = codes[~mask].astype(str)
# errors["VOCABULARY"] = code_type
# errors["SOURCE"] = file_path
# errors["CAUSE"] = cause
#
# #append to error log csv
# if os.path.exists(error_file):
# df_error = pd.read_csv(error_file)
# df_error = pd.concat([df_error, errors])
# df_error.to_csv(error_file, index=False)
# else:
# df_error = errors
# df_error.to_csv(error_file, index=False)
# return codes[mask]
# write erros to a file
# error_path = phen_path / ERROR_FILE
# if error_path.exists():
# error_df = pd.read_csv(error_path)
# error_df = error_df.drop_duplicates() # Remove Duplicates from Error file
# error_df = error_df.sort_values(by=["SOURCE", "VOCABULARY", "CONCEPT"])
# error_df.to_csv(error_path, index=False)
\ No newline at end of file
......@@ -13,9 +13,7 @@ import requests
from pathlib import Path
from urllib.parse import urlparse, urlunparse
# acmc imports
from acmc import trud, omop, parse
from acmc.omop import publish_concept_sets, setup
# setup logging
import acmc.logging_config as lc
......@@ -456,7 +454,7 @@ def map(phen_dir, target_code_type):
# Process each folder in codes section
for folder in codes:
for file in folder["files"]:
logger.debug(f"--- {file["file"]} ---")
logger.debug(f"--- {file['file']} ---")
codes_file_path = codes_path / folder["folder"] / file["file"]
# Load Code File
......@@ -710,7 +708,7 @@ def diff(phen_dir, phen_old_dir):
# Write outputs report
new_config_path = new_phen_path / CONFIG_FILE
new_config = json.load(open(new_config_path, "rb"))
report.write(f"\n\n# Report for version {new_config['concept_sets']["version"]}\n\n")
report.write(f"\n\n# Report for version {new_config['concept_sets']['version']}\n\n")
report.write(f"- Removed outputs: {list(removed_outputs)}\n")
report.write(f"- Added outputs: {list(added_outputs)}\n")
report.write(f"- Common outputs: {list(common_outputs)}\n")
......
......@@ -319,7 +319,7 @@ def install():
# Validate and process each item ID
for item in items:
item_id = item["id"]
logger.info(f"---{item["name"]}---")
logger.info(f"--- {item['name']} ---")
releases = get_releases(item_id, API_KEY=api_key, latest=items_latest)
if not releases:
......
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "acmc"
version = "0.0.1"
authors = [
{ name = "Jakub Dylag", email = "j.j.dylag@soton.ac.uk" },
{ name = "Michael Boniface", email = "m.j.boniface@soton.ac.uk" }
]
description = "A Tool for Automating the Curation of Medical Concepts derived from Coding Lists"
readme = "README.md"
license = { file = "LICENSE" }
requires-python = ">=3.9"
dependencies = [
"aiosqlite==0.21.0",
"click==8.1.8",
"cramjam==2.9.1",
"et-xmlfile==2.0.0",
"fastparquet==2024.11.0",
"fsspec==2025.2.0",
"gitdb==4.0.12",
"gitpython==3.1.44",
"greenlet==3.1.1",
"iniconfig==2.0.0",
"lxml==5.3.1",
"numpy<2",
"openpyxl==3.1.5",
"pluggy==1.5.0",
"pyarrow==19.0.0",
"pyomop==4.3.0",
"tables=3.9.2",
"pytest==8.3.4",
"requests=2.32.3",
"simpledbf==0.2.6",
"smmap==5.0.2",
"sqlalchemy==2.0.38"
]
[project.scripts]
acmc = "acmc.main:main" # Entry point for CLI command
[project.urls]
Repository = "https://git.soton.ac.uk/meldb/concepts-processing"
Documentation = "https://git.soton.ac.uk/meldb/concepts-processing/docs"
Issues = "https://git.soton.ac.uk/meldb/concepts-processing/-/issues"
[tool.hatch.build]
include = ["acmc/**"] # Ensure only the acmc package is included
[tool.hatch.build.targets.sdist]
include = [
"acmc/**",
]
[tool.hatch.build.targets.wheel]
packages = ["acmc"]
[tool.hatch.envs.default]
dependencies = [
"hatch",
"pytest"
]
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment