Skip to content
Snippets Groups Projects
Commit e50447ea authored by mjbonifa's avatar mjbonifa
Browse files

added version control through a git repository; changed init to create the git...

added version control through a git repository; changed init to create the git repo; added publish to commit to the git repo with tagged versions; added diff using some of Jakubs report code;
parent 8c0be40f
No related branches found
No related tags found
No related merge requests found
......@@ -12,11 +12,6 @@ __pycache__
*.save*
~$*
# Build
# Build and output files
build/*
*output*
# temporary
script
......@@ -6,6 +6,8 @@ import phen
from pathlib import Path
BUILD_PATH = Path('build')
def trud_install(args):
"""Handle the `trud install` command."""
trud.install(args.api_key)
......@@ -37,6 +39,27 @@ def phen_map(args):
args.translate,
args.verify)
def phen_publish(args):
"""Handle the `phen publish` command."""
phen.publish(args.phen_dir)
def phen_copy(args):
"""Handle the `phen copy` command."""
phen.copy(args.phen_dir,
args.target_dir,
args.version)
def phen_copy(args):
"""Handle the `phen copy` command."""
phen.copy(args.phen_dir,
args.target_dir,
args.version)
def phen_diff(args):
"""Handle the `phen diff` command."""
phen.diff(args.phen_dir,
args.phen_dir_old)
def main():
parser = argparse.ArgumentParser(description="ACMC command-line tool")
......@@ -74,27 +97,42 @@ def main():
phen_subparsers = phen_parser.add_subparsers(dest="subcommand", required=True, help="Phen subcommands")
# phen init
phen_init_parser = phen_subparsers.add_parser("init", help="Initiatise phenotype configuration")
phen_init_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory")
phen_init_parser = phen_subparsers.add_parser("init", help="Initiatise phenotype directory")
phen_init_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory")
phen_init_parser.set_defaults(func=phen_init)
# phen validate
phen_validate_parser = phen_subparsers.add_parser("validate", help="Validate phenotype configuration")
phen_validate_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory")
phen_validate_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory")
phen_validate_parser.set_defaults(func=phen_validate)
# phen map
phen_map_parser = phen_subparsers.add_parser("map", help="Process phen configuration file")
phen_map_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory")
phen_map_parser = phen_subparsers.add_parser("map", help="Process phen mapping")
phen_map_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory")
phen_map_parser.add_argument("-t", "--target-coding", required=True, choices=['read2_code', 'read3_code', 'icd10_code', 'snomed_code', 'opcs4_code'], help="Specify the target coding (read2, read3, icd10, snomed, opcs4)")
# Flags
phen_map_parser.add_argument("-tr", "--translate", action="store_true", default=False, help="Do not translate code types")
phen_map_parser.add_argument("-v", "--verify", action="store_true", default=False, help="Do not verify codes")
# Set the function to call when 'process' subcommand is used
# phen map flags
phen_map_parser.add_argument("-tr", "--translate", action="store_true", default=False, help="Translate code types")
phen_map_parser.add_argument("-v", "--verify", action="store_true", default=False, help="Verify codes")
phen_map_parser.set_defaults(func=phen_map)
# phen publish
phen_publish_parser = phen_subparsers.add_parser("publish", help="Publish phenotype configuration")
phen_publish_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory")
phen_publish_parser.set_defaults(func=phen_publish)
# phen copy
phen_copy_parser = phen_subparsers.add_parser("copy", help="Publish phenotype configuration")
phen_copy_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory")
phen_copy_parser.add_argument("-td", "--target-dir", type=str, default=str(BUILD_PATH.resolve()), help="Target directory for the copy")
phen_copy_parser.add_argument("-ve", "--version", type=str, default=None, help="Version to copy")
phen_copy_parser.set_defaults(func=phen_copy)
# phen diff
phen_diff_parser = phen_subparsers.add_parser("diff", help="Publish phenotype configuration")
phen_diff_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="The directory for the new phenotype version")
phen_diff_parser.add_argument("-old", "--phen-dir-old", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="The directory of the old phenotype version that is compared to the new one")
phen_diff_parser.set_defaults(func=phen_diff)
# Parse arguments
args = parser.parse_args()
......
import pandas as pd
import os
#TODO: set error file location from cmd
log_errors_path = "MELD_errors.csv"
class bcolors: #for printing coloured text
HEADER = '\033[95m'
OKBLUE = '\033[94m'
......@@ -18,9 +15,7 @@ class bcolors: #for printing coloured text
def raise_(ex):
raise ex
def log_invalid_code(codes, mask, code_type=None, file_path=None, cause=None):
def log_invalid_code(codes, mask, code_type=None, error_file='build/error.csv', file_path=None, cause=None):
# print("ERROR WITH CODES", file_path, codes[~mask])
errors = pd.DataFrame([])
......@@ -30,14 +25,12 @@ def log_invalid_code(codes, mask, code_type=None, file_path=None, cause=None):
errors["CAUSE"] = cause
#append to error log csv
if os.path.exists(log_errors_path):
print("FILE EXISTS")
df_error = pd.read_csv(log_errors_path)
if os.path.exists(error_file):
df_error = pd.read_csv(error_file)
df_error = pd.concat([df_error, errors])
df_error.to_csv(log_errors_path, index=False)
df_error.to_csv(error_file, index=False)
else:
print("FILE NOT EXIST")
df_error = errors
df_error.to_csv(log_errors_path, index=False)
df_error.to_csv(error_file, index=False)
return codes[mask]
\ No newline at end of file
......@@ -3,195 +3,127 @@ channels:
- conda-forge
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=1_gnu
- anyio=3.3.4=py39hf3d152e_1
- argon2-cffi=21.1.0=py39h3811e60_2
- async_generator=1.10=py_0
- attrs=21.2.0=pyhd8ed1ab_0
- babel=2.9.1=pyh44b312d_0
- backcall=0.2.0=pyh9f0ad1d_0
- backports=1.0=py_2
- backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
- bleach=4.1.0=pyhd8ed1ab_0
- blinker=1.4=py_1
- brotlipy=0.7.0=py39h3811e60_1003
- bzip2=1.0.8=h7f98852_4
- c-ares=1.18.1=h7f98852_0
- ca-certificates=2021.10.8=ha878542_0
- certifi=2021.10.8=py39hf3d152e_1
- certipy=0.1.3=py_0
- cffi=1.15.0=py39h4bc2ebd_0
- chardet=4.0.0=py39hf3d152e_2
- charset-normalizer=2.0.0=pyhd8ed1ab_0
- colorama=0.4.4=pyh9f0ad1d_0
- conda=4.10.3=py39hf3d152e_4
- conda-package-handling=1.7.3=py39h3811e60_1
- configurable-http-proxy=4.2.3=node12_hac6e442_0
- cryptography=35.0.0=py39h95dcef6_2
- debugpy=1.5.1=py39he80948d_0
- decorator=5.1.0=pyhd8ed1ab_0
- defusedxml=0.7.1=pyhd8ed1ab_0
- entrypoints=0.3=py39hde42818_1002
- greenlet=1.1.2=py39he80948d_1
- icu=69.1=h9c3ff4c_0
- idna=3.1=pyhd3deb0d_0
- importlib-metadata=4.8.2=py39hf3d152e_0
- importlib_resources=5.4.0=pyhd8ed1ab_0
- ipykernel=6.5.0=py39hef51801_1
- ipython=7.29.0=py39hef51801_2
- ipython_genutils=0.2.0=py_1
- jedi=0.18.1=py39hf3d152e_0
- jinja2=3.0.3=pyhd8ed1ab_0
- json5=0.9.5=pyh9f0ad1d_0
- jsonschema=4.2.1=pyhd8ed1ab_0
- jupyter_client=7.0.6=pyhd8ed1ab_0
- jupyter_core=4.9.1=py39hf3d152e_1
- jupyter_server=1.11.2=pyhd8ed1ab_0
- jupyter_telemetry=0.1.0=pyhd8ed1ab_1
- jupyterhub=1.5.0=py39hf3d152e_1
- jupyterhub-base=1.5.0=py39hf3d152e_1
- jupyterlab=3.2.4=pyhd8ed1ab_0
- jupyterlab_pygments=0.1.2=pyh9f0ad1d_0
- jupyterlab_server=2.8.2=pyhd8ed1ab_0
- krb5=1.19.2=hcc1bbae_3
- ld_impl_linux-64=2.36.1=hea4e1c9_2
- libarchive=3.5.2=hccf745f_1
- libcurl=7.80.0=h2574ce0_0
- libedit=3.1.20191231=he28a2e2_2
- libev=4.33=h516909a_1
- libffi=3.4.2=h7f98852_5
- libgcc-ng=11.2.0=h1d223b6_11
- libgomp=11.2.0=h1d223b6_11
- libiconv=1.16=h516909a_0
- libmamba=0.18.1=h3985d26_1
- libmambapy=0.18.1=py39h8bfa403_1
- libnghttp2=1.43.0=h812cca2_1
- libsodium=1.0.18=h36c2ea0_1
- libsolv=0.7.19=h780b84a_5
- libssh2=1.10.0=ha56f1ee_2
- libstdcxx-ng=11.2.0=he4da1e4_11
- libxml2=2.9.12=h885dcf4_1
- libzlib=1.2.11=h36c2ea0_1013
- lz4-c=1.9.3=h9c3ff4c_1
- lzo=2.10=h516909a_1000
- mako=1.1.6=pyhd8ed1ab_0
- mamba=0.18.1=py39hfa8f2c8_1
- markupsafe=2.0.1=py39h3811e60_1
- matplotlib-inline=0.1.3=pyhd8ed1ab_0
- mistune=0.8.4=py39h3811e60_1005
- nbclassic=0.3.4=pyhd8ed1ab_0
- nbclient=0.5.9=pyhd8ed1ab_0
- nbconvert=6.3.0=py39hf3d152e_1
- nbformat=5.1.3=pyhd8ed1ab_0
- ncurses=6.2=h58526e2_4
- nest-asyncio=1.5.1=pyhd8ed1ab_0
- nodejs=12.4.0=he1b5a44_0
- notebook=6.4.6=pyha770c72_0
- oauthlib=3.1.1=pyhd8ed1ab_0
- openssl=1.1.1l=h7f98852_0
- packaging=21.3=pyhd8ed1ab_0
- pamela=1.0.0=py_0
- pandoc=2.16.1=h7f98852_0
- pandocfilters=1.5.0=pyhd8ed1ab_0
- parso=0.8.2=pyhd8ed1ab_0
- pexpect=4.8.0=pyh9f0ad1d_2
- pickleshare=0.7.5=py39hde42818_1002
- pip=21.3.1=pyhd8ed1ab_0
- prometheus_client=0.12.0=pyhd8ed1ab_0
- prompt-toolkit=3.0.22=pyha770c72_0
- ptyprocess=0.7.0=pyhd3deb0d_0
- pybind11-abi=4=hd8ed1ab_3
- pycosat=0.6.3=py39h3811e60_1009
- pycparser=2.21=pyhd8ed1ab_0
- pycurl=7.44.1=py39h72e3413_1
- pygments=2.10.0=pyhd8ed1ab_0
- pyjwt=2.3.0=pyhd8ed1ab_0
- pyopenssl=21.0.0=pyhd8ed1ab_0
- pyparsing=3.0.6=pyhd8ed1ab_0
- pyrsistent=0.18.0=py39h3811e60_0
- pysocks=1.7.1=py39hf3d152e_4
- python=3.9.7=hb7a2778_3_cpython
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python-json-logger=2.0.1=pyh9f0ad1d_0
- python_abi=3.9=2_cp39
- pytz=2021.3=pyhd8ed1ab_0
- pyzmq=22.3.0=py39h37b5a0c_1
- readline=8.1=h46c0cb4_0
- reproc=14.2.3=h7f98852_0
- reproc-cpp=14.2.3=h9c3ff4c_0
- requests=2.26.0=pyhd8ed1ab_0
- ruamel.yaml=0.17.17=py39h3811e60_1
- ruamel.yaml.clib=0.2.6=py39h3811e60_0
- ruamel_yaml=0.15.80=py39h3811e60_1006
- send2trash=1.8.0=pyhd8ed1ab_0
- setuptools=59.2.0=py39hf3d152e_0
- six=1.16.0=pyh6c4a22f_0
- sniffio=1.2.0=py39hf3d152e_2
- sqlalchemy=1.4.27=py39h3811e60_0
- sqlite=3.36.0=h9cd32fc_2
- terminado=0.12.1=py39hf3d152e_1
- testpath=0.5.0=pyhd8ed1ab_0
- tk=8.6.11=h27826a3_1
- tornado=6.1=py39h3811e60_2
- tqdm=4.62.3=pyhd8ed1ab_0
- traitlets=5.1.1=pyhd8ed1ab_0
- urllib3=1.26.7=pyhd8ed1ab_0
- wcwidth=0.2.5=pyh9f0ad1d_2
- webencodings=0.5.1=py_1
- websocket-client=1.2.1=py39hf3d152e_0
- wheel=0.37.0=pyhd8ed1ab_1
- xz=5.2.5=h516909a_1
- yaml=0.2.5=h516909a_0
- yaml-cpp=0.6.3=he1b5a44_4
- zeromq=4.3.4=h9c3ff4c_1
- zipp=3.6.0=pyhd8ed1ab_0
- zlib=1.2.11=h36c2ea0_1013
- zstd=1.5.0=ha95c52a_0
- _openmp_mutex=4.5=2_gnu
- asttokens=3.0.0=pyhd8ed1ab_1
- blosc=1.21.6=he440d0b_1
- brotli-python=1.1.0=py313h46c70d0_2
- bzip2=1.0.8=h4bc722e_7
- c-ares=1.34.4=hb9d3cd8_0
- c-blosc2=2.16.0=h3122c55_0
- ca-certificates=2025.1.31=hbcca054_0
- certifi=2025.1.31=pyhd8ed1ab_0
- cffi=1.17.1=py313hfab6e84_0
- charset-normalizer=3.4.1=pyhd8ed1ab_0
- comm=0.2.2=pyhd8ed1ab_1
- debugpy=1.8.12=py313h46c70d0_0
- decorator=5.1.1=pyhd8ed1ab_1
- exceptiongroup=1.2.2=pyhd8ed1ab_1
- executing=2.1.0=pyhd8ed1ab_1
- h2=4.2.0=pyhd8ed1ab_0
- hdf5=1.14.3=nompi_h2d575fe_109
- hpack=4.1.0=pyhd8ed1ab_0
- hyperframe=6.1.0=pyhd8ed1ab_0
- idna=3.10=pyhd8ed1ab_1
- importlib-metadata=8.6.1=pyha770c72_0
- ipykernel=6.29.5=pyh3099207_0
- ipython=8.32.0=pyh907856f_0
- jedi=0.19.2=pyhd8ed1ab_1
- jupyter_client=8.6.3=pyhd8ed1ab_1
- jupyter_core=5.7.2=pyh31011fe_1
- keyutils=1.6.1=h166bdaf_0
- krb5=1.21.3=h659f571_0
- ld_impl_linux-64=2.43=h712a8e2_2
- libaec=1.1.3=h59595ed_0
- libblas=3.9.0=28_h59b9bed_openblas
- libcblas=3.9.0=28_he106b2a_openblas
- libcurl=8.11.1=h332b0f4_0
- libedit=3.1.20250104=pl5321h7949ede_0
- libev=4.33=hd590300_2
- libexpat=2.6.4=h5888daf_0
- libffi=3.4.6=h2dba641_0
- libgcc=14.2.0=h77fa898_1
- libgcc-ng=14.2.0=h69a702a_1
- libgfortran=14.2.0=h69a702a_1
- libgfortran5=14.2.0=hd5240d6_1
- libgomp=14.2.0=h77fa898_1
- liblapack=3.9.0=28_h7ac8fdf_openblas
- liblzma=5.6.4=hb9d3cd8_0
- libmpdec=4.0.0=h4bc722e_0
- libnghttp2=1.64.0=h161d5f1_0
- libopenblas=0.3.28=pthreads_h94d23a6_1
- libsodium=1.0.20=h4ab18f5_0
- libsqlite=3.48.0=hee588c1_1
- libssh2=1.11.1=hf672d98_0
- libstdcxx=14.2.0=hc0a3c3a_1
- libstdcxx-ng=14.2.0=h4852527_1
- libuuid=2.38.1=h0b41bf4_0
- libzlib=1.3.1=hb9d3cd8_2
- lz4-c=1.10.0=h5888daf_1
- matplotlib-inline=0.1.7=pyhd8ed1ab_1
- ncurses=6.5=h2d0b736_3
- nest-asyncio=1.6.0=pyhd8ed1ab_1
- nomkl=1.0=h5ca1d4c_0
- numexpr=2.10.2=py313h5f97788_100
- numpy=2.2.3=py313h17eae1a_0
- openssl=3.4.1=h7b32b05_0
- packaging=24.2=pyhd8ed1ab_2
- pandas=2.2.3=py313ha87cce1_1
- parso=0.8.4=pyhd8ed1ab_1
- pexpect=4.9.0=pyhd8ed1ab_1
- pickleshare=0.7.5=pyhd8ed1ab_1004
- pip=25.0.1=pyh145f28c_0
- platformdirs=4.3.6=pyhd8ed1ab_1
- prompt-toolkit=3.0.50=pyha770c72_0
- psutil=6.1.1=py313h536fd9c_0
- ptyprocess=0.7.0=pyhd8ed1ab_1
- pure_eval=0.2.3=pyhd8ed1ab_1
- py-cpuinfo=9.0.0=pyhd8ed1ab_1
- pycparser=2.22=pyh29332c3_1
- pygments=2.19.1=pyhd8ed1ab_0
- pysocks=1.7.1=pyha55dd90_7
- pytables=3.10.2=py313hd261420_1
- python=3.13.1=ha99a958_105_cp313
- python-dateutil=2.9.0.post0=pyhff2d567_1
- python-tzdata=2025.1=pyhd8ed1ab_0
- python_abi=3.13=5_cp313
- pytz=2024.1=pyhd8ed1ab_0
- pyzmq=26.2.1=py313h8e95178_0
- readline=8.2=h8228510_1
- requests=2.32.3=pyhd8ed1ab_1
- six=1.17.0=pyhd8ed1ab_0
- snappy=1.2.1=h8bd8927_1
- stack_data=0.6.3=pyhd8ed1ab_1
- tk=8.6.13=noxft_h4845f30_101
- tornado=6.4.2=py313h536fd9c_0
- traitlets=5.14.3=pyhd8ed1ab_1
- typing-extensions=4.12.2=hd8ed1ab_1
- typing_extensions=4.12.2=pyha770c72_1
- tzdata=2025a=h78e105d_0
- urllib3=2.3.0=pyhd8ed1ab_0
- wcwidth=0.2.13=pyhd8ed1ab_1
- zeromq=4.3.5=h3b0a872_7
- zipp=3.21.0=pyhd8ed1ab_1
- zlib-ng=2.2.4=h7955e40_0
- zstandard=0.23.0=py313h80202fe_1
- zstd=1.5.6=ha6fb4c9_0
- pip:
- alembic==1.4.1
- boto3==1.20.30
- botocore==1.23.30
- cfgv==3.3.1
- click==8.0.3
- cloudpickle==2.0.0
- cramjam==2.8.0
- databricks-cli==0.16.2
- distlib==0.3.4
- docker==5.0.3
- et-xmlfile==1.1.0
- fastparquet==2023.10.1
- filelock==3.4.2
- flask==2.0.2
- fsspec==2023.12.2
- gitdb==4.0.9
- gitpython==3.1.24
- gunicorn==20.1.0
- identify==2.4.2
- itsdangerous==2.0.1
- jmespath==0.10.0
- lxml==5.3.0
- minio==7.1.2
- mlflow==1.22.0
- nodeenv==1.6.0
- numpy==1.26.3
- openpyxl==3.1.2
- pandas==2.1.4
- platformdirs==2.4.1
- pre-commit==2.16.0
- prometheus-flask-exporter==0.18.7
- protobuf==3.19.1
- python-dotenv==0.19.2
- python-editor==1.0.4
- pyyaml==6.0
- querystring-parser==1.2.4
- s3transfer==0.5.0
- aiosqlite==0.21.0
- click==8.1.8
- cramjam==2.9.1
- et-xmlfile==2.0.0
- fastparquet==2024.11.0
- fsspec==2025.2.0
- gitdb==4.0.12
- gitpython==3.1.44
- greenlet==3.1.1
- iniconfig==2.0.0
- lxml==5.3.1
- openpyxl==3.1.5
- pluggy==1.5.0
- pyarrow==19.0.0
- pyomop==4.3.0
- pytest==8.3.4
- simpledbf==0.2.6
- smmap==5.0.0
- sqlparse==0.4.2
- tabulate==0.8.9
- toml==0.10.2
- typing-extensions==4.0.1
- tzdata==2023.4
- virtualenv==20.13.0
- werkzeug==2.0.2
prefix: /opt/conda
- smmap==5.0.2
- sqlalchemy==2.0.38
prefix: /opt/conda/envs/acmc
{
"concept_sets": {
"version": "1.0.0",
"version": "v1.0.1",
"omop": {
"vocabulary_id": "ACMC_Example",
"vocabulary_name": "ACMC example phenotype",
......@@ -11,16 +11,6 @@
"concept_set_name": "ABDO_PAIN",
"concept_set_status": "AGREED",
"metadata": {
"#": "18",
"CONCEPT DESCRIPTION": "Abdominal pain",
"CONCEPT TYPE": "Workload indicator (symptom)",
"DATE ADDED ": "2023-08-25",
"REQUEST REASON ": "Clinician SF - requested by email - symptom example from Qualitative Evidence Synthesis",
"SOURCE INFO": "YES",
"FUNCTION": "QUERY BY CODING LIST",
"FUNCTION.1": "https://clinicalcodes.rss.mhs.man.ac.uk/",
"CODING LIST": "https://git.soton.ac.uk/meld/meldb-external/phenotype/-/tree/main/codes/ClinicalCodes.org%20from%20the%20University%20of%20Manchester/Symptom%20code%20lists/Abdominal%20pain/res176-abdominal-pain.csv ",
"NOTES": "2023-09-08: Clinical SF confirmed that the clinical view would be that this would need to be recurrent or persistent."
}
}
]
......@@ -28,7 +18,7 @@
"codes": [
{
"folder": "clinical-codes-org",
"description": "SF's clinical codes - downloaded 16/11/23",
"description": "Downloaded 16/11/23",
"files": [
{
"file": "Symptom code lists/Abdominal pain/res176-abdominal-pain.csv",
......
{
"concept_sets": {
"version": "v1.0.4",
"omop": {
"vocabulary_id": "ACMC_Example",
"vocabulary_name": "ACMC example phenotype",
"vocabulary_reference": "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example"
},
"concept_set": [
{
"concept_set_name": "CVD_EVENTS",
"concept_set_status": "AGREED",
"metadata": {}
},
{
"concept_set_name": "DID_NOT_ATTEND",
"concept_set_status": "AGREED",
"metadata": {}
}
]
},
"codes": [
{
"folder": "clinical-codes-org",
"description": "Downloaded 16/11/23",
"files": [
{
"file": "Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv",
"columns": {
"icd10_code": "code",
"metadata": []
},
"concept_set": [
"CVD_EVENTS"
]
},
{
"file": "Non-attendance codes/res201-did-not-attend-appointment.csv",
"columns": {
"read2_code": "code",
"metadata": []
},
"concept_set": [
"DID_NOT_ATTEND"
]
}
]
}
]
}
\ No newline at end of file
name: acmc
channels:
- conda-forge
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_gnu
- blosc=1.21.6=he440d0b_1
- brotli-python=1.1.0=py313h46c70d0_2
- bzip2=1.0.8=h4bc722e_7
- c-ares=1.34.4=hb9d3cd8_0
- c-blosc2=2.16.0=h3122c55_0
- ca-certificates=2025.1.31=hbcca054_0
- certifi=2024.12.14=pyhd8ed1ab_0
- cffi=1.17.1=py313hfab6e84_0
- charset-normalizer=3.4.1=pyhd8ed1ab_0
- h2=4.2.0=pyhd8ed1ab_0
- hdf5=1.14.3=nompi_h2d575fe_109
- hpack=4.1.0=pyhd8ed1ab_0
- hyperframe=6.1.0=pyhd8ed1ab_0
- idna=3.10=pyhd8ed1ab_1
- keyutils=1.6.1=h166bdaf_0
- krb5=1.21.3=h659f571_0
- ld_impl_linux-64=2.43=h712a8e2_2
- libaec=1.1.3=h59595ed_0
- libblas=3.9.0=28_h59b9bed_openblas
- libcblas=3.9.0=28_he106b2a_openblas
- libcurl=8.11.1=h332b0f4_0
- libedit=3.1.20250104=pl5321h7949ede_0
- libev=4.33=hd590300_2
- libexpat=2.6.4=h5888daf_0
- libffi=3.4.6=h2dba641_0
- libgcc=14.2.0=h77fa898_1
- libgcc-ng=14.2.0=h69a702a_1
- libgfortran=14.2.0=h69a702a_1
- libgfortran5=14.2.0=hd5240d6_1
- libgomp=14.2.0=h77fa898_1
- liblapack=3.9.0=28_h7ac8fdf_openblas
- liblzma=5.6.4=hb9d3cd8_0
- libmpdec=4.0.0=h4bc722e_0
- libnghttp2=1.64.0=h161d5f1_0
- libopenblas=0.3.28=pthreads_h94d23a6_1
- libsqlite=3.48.0=hee588c1_1
- libssh2=1.11.1=hf672d98_0
- libstdcxx=14.2.0=hc0a3c3a_1
- libstdcxx-ng=14.2.0=h4852527_1
- libuuid=2.38.1=h0b41bf4_0
- libzlib=1.3.1=hb9d3cd8_2
- lz4-c=1.10.0=h5888daf_1
- ncurses=6.5=h2d0b736_3
- nomkl=1.0=h5ca1d4c_0
- numexpr=2.10.2=py313h5f97788_100
- numpy=2.2.3=py313h17eae1a_0
- openssl=3.4.1=h7b32b05_0
- packaging=24.2=pyhd8ed1ab_2
- pandas=2.2.3=py313ha87cce1_1
- pip=25.0.1=pyh145f28c_0
- py-cpuinfo=9.0.0=pyhd8ed1ab_1
- pycparser=2.22=pyh29332c3_1
- pysocks=1.7.1=pyha55dd90_7
- pytables=3.10.2=py313hd261420_1
- python=3.13.1=ha99a958_105_cp313
- python-dateutil=2.9.0.post0=pyhff2d567_1
- python-tzdata=2025.1=pyhd8ed1ab_0
- python_abi=3.13=5_cp313
- pytz=2024.1=pyhd8ed1ab_0
- readline=8.2=h8228510_1
- requests=2.32.3=pyhd8ed1ab_1
- six=1.17.0=pyhd8ed1ab_0
- snappy=1.2.1=h8bd8927_1
- tk=8.6.13=noxft_h4845f30_101
- typing-extensions=4.12.2=hd8ed1ab_1
- typing_extensions=4.12.2=pyha770c72_1
- tzdata=2025a=h78e105d_0
- urllib3=2.3.0=pyhd8ed1ab_0
- zlib-ng=2.2.4=h7955e40_0
- zstandard=0.23.0=py313h80202fe_1
- zstd=1.5.6=ha6fb4c9_0
- pip:
- aiosqlite==0.21.0
- click==8.1.8
- cramjam==2.9.1
- et-xmlfile==2.0.0
- fastparquet==2024.11.0
- fsspec==2025.2.0
- greenlet==3.1.1
- lxml==5.3.1
- openpyxl==3.1.5
- pyarrow==19.0.0
- pyomop==4.3.0
- simpledbf==0.2.6
- sqlalchemy==2.0.38
prefix: /opt/conda/envs/acmc
File moved
This diff is collapsed.
import json
import os
import pandas as pd
import numpy as np
import argparse
#Get all Files in JSON
def get_json_files(folders):
out = []
for folder in folders:
if "files" in folder:
for file in folder["files"]:
file_path = folder["folder"]+"/"+file["file"]
if "concept_set" in file:
for concept in file["concept_set"]:
out.append({"json_concept":concept, "filepath":file_path, "json_code_types":list(file["columns"].keys())})
elif "concept_set_categories" in file:
for code, concept in file["concept_set_categories"].items():
out.append({"json_concept":concept[0], "filepath":file_path, "json_code_types":list(file["columns"].keys())})
else:
out.append({"json_concept":None, "filepath":file_path})
out = pd.DataFrame(out)
out["filepath"] = out["filepath"].astype(str)
return out
#Get all Files Excel Summary
def get_excel_files(out2):
out2 = out2[["CONCEPT NAME ", "CODING LIST", "AGREED", "FUNCTION"]].loc[1:] #select relevant columns
#Filter Concepts in use
out2 = out2[out2["AGREED"] == "USE"] #remove deprecated concepts
out2 = out2[out2["FUNCTION"] == "QUERY BY CODING LIST"] #remove deprecated concepts
out2 = out2.drop(['AGREED', 'FUNCTION'], axis=1)
#Get filepaths
out2["CODING LIST"] = out2["CODING LIST"].str.split(",") #split by ,
out2 = out2.explode("CODING LIST") #one row per file
out2["CODING LIST"] = out2["CODING LIST"].str.strip()
out2["CODING LIST"] = out2["CODING LIST"].str.replace("https://git.soton.ac.uk/meld/meldb-external/phenotype/-/tree/main/", "")
out2["CODING LIST"] = out2["CODING LIST"].str.replace("%20", " ")
out2 = out2.rename(columns={"CONCEPT NAME ":"excel_concept", "CODING LIST":"filepath"})
return out2
#Get all Files in /codes
def get_code_files(path_codes):
all_files = []
for root, dirs, files in os.walk(path_codes, topdown=False):
for name in files:
if ".ipynb_checkpoint" not in root: #exclude notebook checkpoints
if name.endswith(".csv") or name.endswith(".xlsx") or name.endswith(".dta"): #exclude non-data files
all_files.append(os.path.join(root, name))
all_files = pd.DataFrame(all_files)
all_files = all_files.rename(columns={0:"filepath"})
all_files["filepath"] = all_files["filepath"].astype(str)
return all_files
def test_concept_def(config, report, folders, summary):
report.write("## Check Concept Definitions")
out = get_json_files(folders)
out2 = get_excel_files(summary)
all_files = get_code_files(config["codes"])
#Merge all into single table
outs = pd.merge(all_files, out, how="outer", on="filepath")
outs = pd.merge(outs, out2, how="outer", on="filepath")
report.write("\n\nCode source files:\n")
report.write("- {} total files\n\n".format(len(all_files)))
report.write("\n\nJSON concepts:\n")
report.write("- {} unique concepts\n".format(len(out["filepath"].unique())))
missing = outs[outs["json_concept"].isna() & outs["excel_concept"].notna()]
if len(missing) > 0:
report.write("- Missing from JSON\n")
for id, row in missing.iterrows():
report.write("\t - ❌ {} {}\n\n".format(row["filepath"], row["excel_concept"]))
report.write("\n\nEXCEL concepts:\n")
report.write("- {} unique concepts\n".format(len(out2["filepath"].unique())))
missing = outs[outs["json_concept"].notna() & outs["excel_concept"].isna()]
if len(missing) > 0:
report.write("- Missing from EXCEL\n")
for id, row in missing.iterrows():
report.write("\t - ❌ {} {} {}\n\n".format(row["filepath"], row["json_concept"], row["json_code_types"]))
def get_output_files(version):
output_files = [f"output/{version}_MELD_concepts_readv2.csv",
f"output/{version}_MELD_snomed_no_translate.csv",
f"output/{version}_MELD_icd10_no_translate.csv",
# f"output/{version}_MELD_med_no_translate.csv",
f"output/{version}_MELD_atc_no_translate.csv"
]
error_file = f"output/{version}_MELD_errors.csv"
return output_files, error_file
def test_concept_changes(config, report):
version_1 = config["previous"]
version_2 = config["version"]
output1, err1 = get_output_files(version_1)
output2, err2 = get_output_files(version_2)
report.write(f"\n\n## Compare Concepts {version_1} to {version_2}\n\n")
for out1, out2 in zip(output1, output2):
report.write(f"`{out1}` to `{out2}`\n")
df1 = pd.read_csv(out1)
df1 = df1[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count()
df2 = pd.read_csv(out2)
df2 = df2[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count()
#Added/Removed Concepts
report.write("- Removed Concepts {}\n".format(list(set(df1.index) - set(df2.index))))
report.write("- Added Concepts {}\n".format(list(set(df2.index) - set(df1.index))))
#Changed Concepts
diff = df2 - df1 #diff in counts
diff = diff[(~(diff["CONCEPT"] == 0.0)) & diff["CONCEPT"].notna()] #get non-zero counts
s = "\n"
for concept, row in diff.iterrows():
s += "\t - {} {}\n".format(concept, row["CONCEPT"])
report.write("- Changed Concepts {}\n\n".format(s))
# ✅ ❌
def main(config):
#Load Report
if config["report"].endswith(".md"):
report = open(config["report"], 'a')
else:
raise Exception("Unsupported filetype provided for source file")
#Load Mapping File
if config["map"].endswith(".json"):
folders = json.load(open(config["map"],'rb'))
else:
raise Exception("Unsupported filetype provided for source file")
#Load Excel Summary File
if config["summary"].endswith(".xlsx"):
summary = pd.read_excel(config["summary"], sheet_name="CONCEPT_TRACKING", dtype=str)
else:
raise Exception("Unsupported filetype provided for summary file")
#Title with version
report.write("\n\n# Report {} \n\n".format(config["version"]))
#Compare JSON Mapping with Excel Summary
test_concept_def(config, report, folders, summary)
#Changes in Concept Codes between versions
test_concept_changes(config, report)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Script performs testing and generates report for output files",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("map", help="Concept/Phenotype Assignment File (json)")
parser.add_argument("summary", help="Summary working excel document")
parser.add_argument("codes", help="Folder containing all code source files")
parser.add_argument("report", help="Output Markdown file containing report")
parser.add_argument("version", help="Version of output")
parser.add_argument("previous", help="Previous version of output")
args = parser.parse_args()
config = vars(args)
main(config)
\ No newline at end of file
File moved
File moved
import pytest
import argparse
from unittest.mock import patch, MagicMock
import trud
import omop
import phen
from pathlib import Path
from acmc import trud_install, omop_install, omop_clear, omop_delete, phen_init, phen_validate, phen_map, phen_publish, phen_copy, phen_diff
def test_trud_install():
with patch("trud.install") as mock_install:
args = argparse.Namespace(api_key="test_key")
trud_install(args)
mock_install.assert_called_once_with("test_key")
def test_omop_install():
with patch("omop.install") as mock_install:
args = argparse.Namespace(omop_folder="/path/to/omop")
omop_install(args)
mock_install.assert_called_once_with(omop.OMOP_DB_PATH, "/path/to/omop")
def test_omop_clear():
with patch("omop.clear") as mock_clear:
args = argparse.Namespace()
omop_clear(args)
mock_clear.assert_called_once_with(omop.OMOP_DB_PATH)
def test_omop_delete():
with patch("omop.delete") as mock_delete:
args = argparse.Namespace()
omop_delete(args)
mock_delete.assert_called_once_with(omop.OMOP_DB_PATH)
def test_phen_init():
with patch("phen.init") as mock_init:
args = argparse.Namespace(phen_dir="/path/to/phen")
phen_init(args)
mock_init.assert_called_once_with("/path/to/phen")
def test_phen_validate():
with patch("phen.validate") as mock_validate:
args = argparse.Namespace(phen_dir="/path/to/phen")
phen_validate(args)
mock_validate.assert_called_once_with("/path/to/phen")
def test_phen_map():
with patch("phen.map") as mock_map:
args = argparse.Namespace(phen_dir="/path/to/phen", target_coding="icd10_code", translate=True, verify=True)
phen_map(args)
mock_map.assert_called_once_with("/path/to/phen", "icd10_code", True, True)
def test_phen_publish():
with patch("phen.publish") as mock_publish:
args = argparse.Namespace(phen_dir="/path/to/phen")
phen_publish(args)
mock_publish.assert_called_once_with("/path/to/phen")
def test_phen_copy():
with patch("phen.copy") as mock_copy:
args = argparse.Namespace(phen_dir="/path/to/phen", target_dir="/path/to/target", version="1.0")
phen_copy(args)
mock_copy.assert_called_once_with("/path/to/phen", "/path/to/target", "1.0")
def test_phen_diff():
with patch("phen.diff") as mock_diff:
args = argparse.Namespace(phen_dir="/path/to/phen", phen_dir_old="/path/to/old_phen")
phen_diff(args)
mock_diff.assert_called_once_with("/path/to/phen", "/path/to/old_phen")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment