added version control through a git repository; changed init to create the git...

added version control through a git repository; changed init to create the git repo; added publish to commit to the git repo with tagged versions; added diff using some of Jakubs report code;

added version control through a git repository; changed init to create the git...
e50447ea · mjbonifa · 8c0be40f · e50447ea · e50447ea · e50447ea
Commit e50447ea authored 4 months ago by mjbonifa
--- a/.gitignore
+++ b/.gitignore
@@ -12,11 +12,6 @@ __pycache__
 *.save*
 ~$*

-# Build
+# Build and output files
 build/*
 *output*
-
-# temporary 
-script
-
-
--- a/acmc.py
+++ b/acmc.py
@@ -6,6 +6,8 @@ import phen

 from pathlib import Path

+BUILD_PATH = Path('build')
+
 def trud_install(args):
    """Handle the `trud install` command."""
    trud.install(args.api_key)
@@ -37,6 +39,27 @@ def phen_map(args):
 			 args.translate,
 			 args.verify)

+def phen_publish(args):
+	"""Handle the `phen publish` command."""  
+	phen.publish(args.phen_dir)
+
+def phen_copy(args):
+	"""Handle the `phen copy` command."""  
+	phen.copy(args.phen_dir, 
+			 args.target_dir,
+			 args.version)
+
+def phen_copy(args):
+	"""Handle the `phen copy` command."""  
+	phen.copy(args.phen_dir,
+			  args.target_dir,
+			  args.version)
+
+def phen_diff(args):
+	"""Handle the `phen diff` command."""  
+	phen.diff(args.phen_dir, 
+			 args.phen_dir_old)
+
 def main():
 	parser = argparse.ArgumentParser(description="ACMC command-line tool")
 	
@@ -74,27 +97,42 @@ def main():
 	phen_subparsers = phen_parser.add_subparsers(dest="subcommand", required=True, help="Phen subcommands")

 	# phen init
-	phen_init_parser = phen_subparsers.add_parser("init", help="Initiatise phenotype configuration")
-	phen_init_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory")
+	phen_init_parser = phen_subparsers.add_parser("init", help="Initiatise phenotype directory")
+	phen_init_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory")
 	phen_init_parser.set_defaults(func=phen_init)

 	# phen validate
 	phen_validate_parser = phen_subparsers.add_parser("validate", help="Validate phenotype configuration")
-	phen_validate_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory")
+	phen_validate_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory")
 	phen_validate_parser.set_defaults(func=phen_validate)
 	
 	# phen map
-	phen_map_parser = phen_subparsers.add_parser("map", help="Process phen configuration file")
-	phen_map_parser.add_argument("-d", "--phen-dir", type=str, default=phen.DEFAULT_PHEN_PATH.resolve, help="Phenotype directory")
+	phen_map_parser = phen_subparsers.add_parser("map", help="Process phen mapping")
+	phen_map_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory")
 	phen_map_parser.add_argument("-t", "--target-coding", required=True, choices=['read2_code', 'read3_code', 'icd10_code', 'snomed_code', 'opcs4_code'], help="Specify the target coding (read2, read3, icd10, snomed, opcs4)")
-	
-	# Flags
-	phen_map_parser.add_argument("-tr", "--translate", action="store_true", default=False, help="Do not translate code types")
-	phen_map_parser.add_argument("-v", "--verify", action="store_true", default=False, help="Do not verify codes")
-
-	# Set the function to call when 'process' subcommand is used
+	# phen map flags
+	phen_map_parser.add_argument("-tr", "--translate", action="store_true", default=False, help="Translate code types")
+	phen_map_parser.add_argument("-v", "--verify", action="store_true", default=False, help="Verify codes")
 	phen_map_parser.set_defaults(func=phen_map)    

+	# phen publish
+	phen_publish_parser = phen_subparsers.add_parser("publish", help="Publish phenotype configuration")
+	phen_publish_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory")
+	phen_publish_parser.set_defaults(func=phen_publish)
+
+	# phen copy
+	phen_copy_parser = phen_subparsers.add_parser("copy", help="Publish phenotype configuration")
+	phen_copy_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="Phenotype directory")
+	phen_copy_parser.add_argument("-td", "--target-dir", type=str, default=str(BUILD_PATH.resolve()), help="Target directory for the copy")
+	phen_copy_parser.add_argument("-ve", "--version", type=str, default=None, help="Version to copy")		
+	phen_copy_parser.set_defaults(func=phen_copy)
+
+	# phen diff
+	phen_diff_parser = phen_subparsers.add_parser("diff", help="Publish phenotype configuration")
+	phen_diff_parser.add_argument("-d", "--phen-dir", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="The directory for the new phenotype version")
+	phen_diff_parser.add_argument("-old", "--phen-dir-old", type=str, default=str(phen.DEFAULT_PHEN_PATH.resolve()), help="The directory of the old phenotype version that is compared to the new one")	
+	phen_diff_parser.set_defaults(func=phen_diff)	
+	
 	# Parse arguments
 	args = parser.parse_args()


--- a/base.py
+++ b/base.py
 import pandas as pd
 import os

-#TODO: set error file location from cmd
-log_errors_path = "MELD_errors.csv"
-
 class bcolors: #for printing coloured text
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
@@ -18,9 +15,7 @@ class bcolors: #for printing coloured text
 def raise_(ex):
 	raise ex

-
-
-def log_invalid_code(codes, mask, code_type=None, file_path=None, cause=None):
+def log_invalid_code(codes, mask, code_type=None, error_file='build/error.csv', file_path=None, cause=None):
 	# print("ERROR WITH CODES", file_path, codes[~mask])
 	
 	errors = pd.DataFrame([])
@@ -30,14 +25,12 @@ def log_invalid_code(codes, mask, code_type=None, file_path=None, cause=None):
 	errors["CAUSE"] = cause
 	
 	#append to error log csv
-	if os.path.exists(log_errors_path):
-		print("FILE EXISTS")
-		df_error = pd.read_csv(log_errors_path)
+	if os.path.exists(error_file):
+		df_error = pd.read_csv(error_file)
 		df_error = pd.concat([df_error, errors])
-		df_error.to_csv(log_errors_path, index=False)
+		df_error.to_csv(error_file, index=False)
 	else:
-		print("FILE NOT EXIST")
 		df_error = errors
-		df_error.to_csv(log_errors_path, index=False)
+		df_error.to_csv(error_file, index=False)
 		
 	return codes[mask]
\ No newline at end of file
--- a/conda.yaml
+++ b/conda.yaml
@@ -3,195 +3,127 @@ channels:
  - conda-forge
 dependencies:
  - _libgcc_mutex=0.1=conda_forge
-  - _openmp_mutex=4.5=1_gnu
-  - anyio=3.3.4=py39hf3d152e_1
-  - argon2-cffi=21.1.0=py39h3811e60_2
-  - async_generator=1.10=py_0
-  - attrs=21.2.0=pyhd8ed1ab_0
-  - babel=2.9.1=pyh44b312d_0
-  - backcall=0.2.0=pyh9f0ad1d_0
-  - backports=1.0=py_2
-  - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
-  - bleach=4.1.0=pyhd8ed1ab_0
-  - blinker=1.4=py_1
-  - brotlipy=0.7.0=py39h3811e60_1003
-  - bzip2=1.0.8=h7f98852_4
-  - c-ares=1.18.1=h7f98852_0
-  - ca-certificates=2021.10.8=ha878542_0
-  - certifi=2021.10.8=py39hf3d152e_1
-  - certipy=0.1.3=py_0
-  - cffi=1.15.0=py39h4bc2ebd_0
-  - chardet=4.0.0=py39hf3d152e_2
-  - charset-normalizer=2.0.0=pyhd8ed1ab_0
-  - colorama=0.4.4=pyh9f0ad1d_0
-  - conda=4.10.3=py39hf3d152e_4
-  - conda-package-handling=1.7.3=py39h3811e60_1
-  - configurable-http-proxy=4.2.3=node12_hac6e442_0
-  - cryptography=35.0.0=py39h95dcef6_2
-  - debugpy=1.5.1=py39he80948d_0
-  - decorator=5.1.0=pyhd8ed1ab_0
-  - defusedxml=0.7.1=pyhd8ed1ab_0
-  - entrypoints=0.3=py39hde42818_1002
-  - greenlet=1.1.2=py39he80948d_1
-  - icu=69.1=h9c3ff4c_0
-  - idna=3.1=pyhd3deb0d_0
-  - importlib-metadata=4.8.2=py39hf3d152e_0
-  - importlib_resources=5.4.0=pyhd8ed1ab_0
-  - ipykernel=6.5.0=py39hef51801_1
-  - ipython=7.29.0=py39hef51801_2
-  - ipython_genutils=0.2.0=py_1
-  - jedi=0.18.1=py39hf3d152e_0
-  - jinja2=3.0.3=pyhd8ed1ab_0
-  - json5=0.9.5=pyh9f0ad1d_0
-  - jsonschema=4.2.1=pyhd8ed1ab_0
-  - jupyter_client=7.0.6=pyhd8ed1ab_0
-  - jupyter_core=4.9.1=py39hf3d152e_1
-  - jupyter_server=1.11.2=pyhd8ed1ab_0
-  - jupyter_telemetry=0.1.0=pyhd8ed1ab_1
-  - jupyterhub=1.5.0=py39hf3d152e_1
-  - jupyterhub-base=1.5.0=py39hf3d152e_1
-  - jupyterlab=3.2.4=pyhd8ed1ab_0
-  - jupyterlab_pygments=0.1.2=pyh9f0ad1d_0
-  - jupyterlab_server=2.8.2=pyhd8ed1ab_0
-  - krb5=1.19.2=hcc1bbae_3
-  - ld_impl_linux-64=2.36.1=hea4e1c9_2
-  - libarchive=3.5.2=hccf745f_1
-  - libcurl=7.80.0=h2574ce0_0
-  - libedit=3.1.20191231=he28a2e2_2
-  - libev=4.33=h516909a_1
-  - libffi=3.4.2=h7f98852_5
-  - libgcc-ng=11.2.0=h1d223b6_11
-  - libgomp=11.2.0=h1d223b6_11
-  - libiconv=1.16=h516909a_0
-  - libmamba=0.18.1=h3985d26_1
-  - libmambapy=0.18.1=py39h8bfa403_1
-  - libnghttp2=1.43.0=h812cca2_1
-  - libsodium=1.0.18=h36c2ea0_1
-  - libsolv=0.7.19=h780b84a_5
-  - libssh2=1.10.0=ha56f1ee_2
-  - libstdcxx-ng=11.2.0=he4da1e4_11
-  - libxml2=2.9.12=h885dcf4_1
-  - libzlib=1.2.11=h36c2ea0_1013
-  - lz4-c=1.9.3=h9c3ff4c_1
-  - lzo=2.10=h516909a_1000
-  - mako=1.1.6=pyhd8ed1ab_0
-  - mamba=0.18.1=py39hfa8f2c8_1
-  - markupsafe=2.0.1=py39h3811e60_1
-  - matplotlib-inline=0.1.3=pyhd8ed1ab_0
-  - mistune=0.8.4=py39h3811e60_1005
-  - nbclassic=0.3.4=pyhd8ed1ab_0
-  - nbclient=0.5.9=pyhd8ed1ab_0
-  - nbconvert=6.3.0=py39hf3d152e_1
-  - nbformat=5.1.3=pyhd8ed1ab_0
-  - ncurses=6.2=h58526e2_4
-  - nest-asyncio=1.5.1=pyhd8ed1ab_0
-  - nodejs=12.4.0=he1b5a44_0
-  - notebook=6.4.6=pyha770c72_0
-  - oauthlib=3.1.1=pyhd8ed1ab_0
-  - openssl=1.1.1l=h7f98852_0
-  - packaging=21.3=pyhd8ed1ab_0
-  - pamela=1.0.0=py_0
-  - pandoc=2.16.1=h7f98852_0
-  - pandocfilters=1.5.0=pyhd8ed1ab_0
-  - parso=0.8.2=pyhd8ed1ab_0
-  - pexpect=4.8.0=pyh9f0ad1d_2
-  - pickleshare=0.7.5=py39hde42818_1002
-  - pip=21.3.1=pyhd8ed1ab_0
-  - prometheus_client=0.12.0=pyhd8ed1ab_0
-  - prompt-toolkit=3.0.22=pyha770c72_0
-  - ptyprocess=0.7.0=pyhd3deb0d_0
-  - pybind11-abi=4=hd8ed1ab_3
-  - pycosat=0.6.3=py39h3811e60_1009
-  - pycparser=2.21=pyhd8ed1ab_0
-  - pycurl=7.44.1=py39h72e3413_1
-  - pygments=2.10.0=pyhd8ed1ab_0
-  - pyjwt=2.3.0=pyhd8ed1ab_0
-  - pyopenssl=21.0.0=pyhd8ed1ab_0
-  - pyparsing=3.0.6=pyhd8ed1ab_0
-  - pyrsistent=0.18.0=py39h3811e60_0
-  - pysocks=1.7.1=py39hf3d152e_4
-  - python=3.9.7=hb7a2778_3_cpython
-  - python-dateutil=2.8.2=pyhd8ed1ab_0
-  - python-json-logger=2.0.1=pyh9f0ad1d_0
-  - python_abi=3.9=2_cp39
-  - pytz=2021.3=pyhd8ed1ab_0
-  - pyzmq=22.3.0=py39h37b5a0c_1
-  - readline=8.1=h46c0cb4_0
-  - reproc=14.2.3=h7f98852_0
-  - reproc-cpp=14.2.3=h9c3ff4c_0
-  - requests=2.26.0=pyhd8ed1ab_0
-  - ruamel.yaml=0.17.17=py39h3811e60_1
-  - ruamel.yaml.clib=0.2.6=py39h3811e60_0
-  - ruamel_yaml=0.15.80=py39h3811e60_1006
-  - send2trash=1.8.0=pyhd8ed1ab_0
-  - setuptools=59.2.0=py39hf3d152e_0
-  - six=1.16.0=pyh6c4a22f_0
-  - sniffio=1.2.0=py39hf3d152e_2
-  - sqlalchemy=1.4.27=py39h3811e60_0
-  - sqlite=3.36.0=h9cd32fc_2
-  - terminado=0.12.1=py39hf3d152e_1
-  - testpath=0.5.0=pyhd8ed1ab_0
-  - tk=8.6.11=h27826a3_1
-  - tornado=6.1=py39h3811e60_2
-  - tqdm=4.62.3=pyhd8ed1ab_0
-  - traitlets=5.1.1=pyhd8ed1ab_0
-  - urllib3=1.26.7=pyhd8ed1ab_0
-  - wcwidth=0.2.5=pyh9f0ad1d_2
-  - webencodings=0.5.1=py_1
-  - websocket-client=1.2.1=py39hf3d152e_0
-  - wheel=0.37.0=pyhd8ed1ab_1
-  - xz=5.2.5=h516909a_1
-  - yaml=0.2.5=h516909a_0
-  - yaml-cpp=0.6.3=he1b5a44_4
-  - zeromq=4.3.4=h9c3ff4c_1
-  - zipp=3.6.0=pyhd8ed1ab_0
-  - zlib=1.2.11=h36c2ea0_1013
-  - zstd=1.5.0=ha95c52a_0
+  - _openmp_mutex=4.5=2_gnu
+  - asttokens=3.0.0=pyhd8ed1ab_1
+  - blosc=1.21.6=he440d0b_1
+  - brotli-python=1.1.0=py313h46c70d0_2
+  - bzip2=1.0.8=h4bc722e_7
+  - c-ares=1.34.4=hb9d3cd8_0
+  - c-blosc2=2.16.0=h3122c55_0
+  - ca-certificates=2025.1.31=hbcca054_0
+  - certifi=2025.1.31=pyhd8ed1ab_0
+  - cffi=1.17.1=py313hfab6e84_0
+  - charset-normalizer=3.4.1=pyhd8ed1ab_0
+  - comm=0.2.2=pyhd8ed1ab_1
+  - debugpy=1.8.12=py313h46c70d0_0
+  - decorator=5.1.1=pyhd8ed1ab_1
+  - exceptiongroup=1.2.2=pyhd8ed1ab_1
+  - executing=2.1.0=pyhd8ed1ab_1
+  - h2=4.2.0=pyhd8ed1ab_0
+  - hdf5=1.14.3=nompi_h2d575fe_109
+  - hpack=4.1.0=pyhd8ed1ab_0
+  - hyperframe=6.1.0=pyhd8ed1ab_0
+  - idna=3.10=pyhd8ed1ab_1
+  - importlib-metadata=8.6.1=pyha770c72_0
+  - ipykernel=6.29.5=pyh3099207_0
+  - ipython=8.32.0=pyh907856f_0
+  - jedi=0.19.2=pyhd8ed1ab_1
+  - jupyter_client=8.6.3=pyhd8ed1ab_1
+  - jupyter_core=5.7.2=pyh31011fe_1
+  - keyutils=1.6.1=h166bdaf_0
+  - krb5=1.21.3=h659f571_0
+  - ld_impl_linux-64=2.43=h712a8e2_2
+  - libaec=1.1.3=h59595ed_0
+  - libblas=3.9.0=28_h59b9bed_openblas
+  - libcblas=3.9.0=28_he106b2a_openblas
+  - libcurl=8.11.1=h332b0f4_0
+  - libedit=3.1.20250104=pl5321h7949ede_0
+  - libev=4.33=hd590300_2
+  - libexpat=2.6.4=h5888daf_0
+  - libffi=3.4.6=h2dba641_0
+  - libgcc=14.2.0=h77fa898_1
+  - libgcc-ng=14.2.0=h69a702a_1
+  - libgfortran=14.2.0=h69a702a_1
+  - libgfortran5=14.2.0=hd5240d6_1
+  - libgomp=14.2.0=h77fa898_1
+  - liblapack=3.9.0=28_h7ac8fdf_openblas
+  - liblzma=5.6.4=hb9d3cd8_0
+  - libmpdec=4.0.0=h4bc722e_0
+  - libnghttp2=1.64.0=h161d5f1_0
+  - libopenblas=0.3.28=pthreads_h94d23a6_1
+  - libsodium=1.0.20=h4ab18f5_0
+  - libsqlite=3.48.0=hee588c1_1
+  - libssh2=1.11.1=hf672d98_0
+  - libstdcxx=14.2.0=hc0a3c3a_1
+  - libstdcxx-ng=14.2.0=h4852527_1
+  - libuuid=2.38.1=h0b41bf4_0
+  - libzlib=1.3.1=hb9d3cd8_2
+  - lz4-c=1.10.0=h5888daf_1
+  - matplotlib-inline=0.1.7=pyhd8ed1ab_1
+  - ncurses=6.5=h2d0b736_3
+  - nest-asyncio=1.6.0=pyhd8ed1ab_1
+  - nomkl=1.0=h5ca1d4c_0
+  - numexpr=2.10.2=py313h5f97788_100
+  - numpy=2.2.3=py313h17eae1a_0
+  - openssl=3.4.1=h7b32b05_0
+  - packaging=24.2=pyhd8ed1ab_2
+  - pandas=2.2.3=py313ha87cce1_1
+  - parso=0.8.4=pyhd8ed1ab_1
+  - pexpect=4.9.0=pyhd8ed1ab_1
+  - pickleshare=0.7.5=pyhd8ed1ab_1004
+  - pip=25.0.1=pyh145f28c_0
+  - platformdirs=4.3.6=pyhd8ed1ab_1
+  - prompt-toolkit=3.0.50=pyha770c72_0
+  - psutil=6.1.1=py313h536fd9c_0
+  - ptyprocess=0.7.0=pyhd8ed1ab_1
+  - pure_eval=0.2.3=pyhd8ed1ab_1
+  - py-cpuinfo=9.0.0=pyhd8ed1ab_1
+  - pycparser=2.22=pyh29332c3_1
+  - pygments=2.19.1=pyhd8ed1ab_0
+  - pysocks=1.7.1=pyha55dd90_7
+  - pytables=3.10.2=py313hd261420_1
+  - python=3.13.1=ha99a958_105_cp313
+  - python-dateutil=2.9.0.post0=pyhff2d567_1
+  - python-tzdata=2025.1=pyhd8ed1ab_0
+  - python_abi=3.13=5_cp313
+  - pytz=2024.1=pyhd8ed1ab_0
+  - pyzmq=26.2.1=py313h8e95178_0
+  - readline=8.2=h8228510_1
+  - requests=2.32.3=pyhd8ed1ab_1
+  - six=1.17.0=pyhd8ed1ab_0
+  - snappy=1.2.1=h8bd8927_1
+  - stack_data=0.6.3=pyhd8ed1ab_1
+  - tk=8.6.13=noxft_h4845f30_101
+  - tornado=6.4.2=py313h536fd9c_0
+  - traitlets=5.14.3=pyhd8ed1ab_1
+  - typing-extensions=4.12.2=hd8ed1ab_1
+  - typing_extensions=4.12.2=pyha770c72_1
+  - tzdata=2025a=h78e105d_0
+  - urllib3=2.3.0=pyhd8ed1ab_0
+  - wcwidth=0.2.13=pyhd8ed1ab_1
+  - zeromq=4.3.5=h3b0a872_7
+  - zipp=3.21.0=pyhd8ed1ab_1
+  - zlib-ng=2.2.4=h7955e40_0
+  - zstandard=0.23.0=py313h80202fe_1
+  - zstd=1.5.6=ha6fb4c9_0
  - pip:
-    - alembic==1.4.1
-    - boto3==1.20.30
-    - botocore==1.23.30
-    - cfgv==3.3.1
-    - click==8.0.3
-    - cloudpickle==2.0.0
-    - cramjam==2.8.0
-    - databricks-cli==0.16.2
-    - distlib==0.3.4
-    - docker==5.0.3
-    - et-xmlfile==1.1.0
-    - fastparquet==2023.10.1
-    - filelock==3.4.2
-    - flask==2.0.2
-    - fsspec==2023.12.2
-    - gitdb==4.0.9
-    - gitpython==3.1.24
-    - gunicorn==20.1.0
-    - identify==2.4.2
-    - itsdangerous==2.0.1
-    - jmespath==0.10.0
-    - lxml==5.3.0
-    - minio==7.1.2
-    - mlflow==1.22.0
-    - nodeenv==1.6.0
-    - numpy==1.26.3
-    - openpyxl==3.1.2
-    - pandas==2.1.4
-    - platformdirs==2.4.1
-    - pre-commit==2.16.0
-    - prometheus-flask-exporter==0.18.7
-    - protobuf==3.19.1
-    - python-dotenv==0.19.2
-    - python-editor==1.0.4
-    - pyyaml==6.0
-    - querystring-parser==1.2.4
-    - s3transfer==0.5.0
+      - aiosqlite==0.21.0
+      - click==8.1.8
+      - cramjam==2.9.1
+      - et-xmlfile==2.0.0
+      - fastparquet==2024.11.0
+      - fsspec==2025.2.0
+      - gitdb==4.0.12
+      - gitpython==3.1.44
+      - greenlet==3.1.1
+      - iniconfig==2.0.0
+      - lxml==5.3.1
+      - openpyxl==3.1.5
+      - pluggy==1.5.0
+      - pyarrow==19.0.0
+      - pyomop==4.3.0
+      - pytest==8.3.4
      - simpledbf==0.2.6
-    - smmap==5.0.0
-    - sqlparse==0.4.2
-    - tabulate==0.8.9
-    - toml==0.10.2
-    - typing-extensions==4.0.1
-    - tzdata==2023.4
-    - virtualenv==20.13.0
-    - werkzeug==2.0.2
-prefix: /opt/conda
+      - smmap==5.0.2
+      - sqlalchemy==2.0.38
+prefix: /opt/conda/envs/acmc
--- a/example/config.json
+++ b/example/config.json
 {
    "concept_sets": {
-        "version": "1.0.0",
+        "version": "v1.0.1",
        "omop": {
            "vocabulary_id": "ACMC_Example",
            "vocabulary_name": "ACMC example phenotype",
@@ -11,16 +11,6 @@
                "concept_set_name": "ABDO_PAIN",
                "concept_set_status": "AGREED",
                "metadata": {
-                    "#": "18",
-                    "CONCEPT DESCRIPTION": "Abdominal pain",
-                    "CONCEPT TYPE": "Workload indicator (symptom)",
-                    "DATE ADDED ": "2023-08-25",
-                    "REQUEST REASON ": "Clinician SF - requested by email - symptom example from Qualitative Evidence Synthesis",
-                    "SOURCE INFO": "YES",
-                    "FUNCTION": "QUERY BY CODING LIST",
-                    "FUNCTION.1": "https://clinicalcodes.rss.mhs.man.ac.uk/",
-                    "CODING LIST": "https://git.soton.ac.uk/meld/meldb-external/phenotype/-/tree/main/codes/ClinicalCodes.org%20from%20the%20University%20of%20Manchester/Symptom%20code%20lists/Abdominal%20pain/res176-abdominal-pain.csv ",
-                    "NOTES": "2023-09-08: Clinical SF confirmed that the clinical view would be that this would need to be recurrent or persistent."
                }
            }
        ]
@@ -28,7 +18,7 @@
    "codes": [
        {
            "folder": "clinical-codes-org",
-            "description": "SF's clinical codes - downloaded 16/11/23",
+            "description": "Downloaded 16/11/23",
            "files": [
                {
                    "file": "Symptom code lists/Abdominal pain/res176-abdominal-pain.csv",

--- a/example/config2.json
+++ b/example/config2.json
+{
+    "concept_sets": {
+        "version": "v1.0.4",
+        "omop": {
+            "vocabulary_id": "ACMC_Example",
+            "vocabulary_name": "ACMC example phenotype",
+            "vocabulary_reference": "https://www.it-innovation.soton.ac.uk/projects/meldb/concept-processing/example"
+        },
+        "concept_set": [
+            {
+                "concept_set_name": "CVD_EVENTS",
+                "concept_set_status": "AGREED",
+                "metadata": {}
+            },
+            {
+                "concept_set_name": "DID_NOT_ATTEND",
+                "concept_set_status": "AGREED",
+                "metadata": {}
+            }
+        ]
+    },
+    "codes": [
+        {
+            "folder": "clinical-codes-org",
+            "description": "Downloaded 16/11/23",
+            "files": [
+                {
+                    "file": "Cardiovascular events (ICD10)/res52-cardiovascular-events-icd10.csv",
+                    "columns": {
+                        "icd10_code": "code",
+                        "metadata": []
+                    },
+                    "concept_set": [
+                        "CVD_EVENTS"
+                    ]
+                },
+                {
+                    "file": "Non-attendance codes/res201-did-not-attend-appointment.csv",
+                    "columns": {
+                        "read2_code": "code",
+                        "metadata": []
+                    },
+                    "concept_set": [
+                        "DID_NOT_ATTEND"
+                    ]
+                }
+            ]
+        }
+    ]
+}
\ No newline at end of file
--- a/mjb-conda.yaml
+++ b/mjb-conda.yaml
-name: acmc
-channels:
-  - conda-forge
-dependencies:
-  - _libgcc_mutex=0.1=conda_forge
-  - _openmp_mutex=4.5=2_gnu
-  - blosc=1.21.6=he440d0b_1
-  - brotli-python=1.1.0=py313h46c70d0_2
-  - bzip2=1.0.8=h4bc722e_7
-  - c-ares=1.34.4=hb9d3cd8_0
-  - c-blosc2=2.16.0=h3122c55_0
-  - ca-certificates=2025.1.31=hbcca054_0
-  - certifi=2024.12.14=pyhd8ed1ab_0
-  - cffi=1.17.1=py313hfab6e84_0
-  - charset-normalizer=3.4.1=pyhd8ed1ab_0
-  - h2=4.2.0=pyhd8ed1ab_0
-  - hdf5=1.14.3=nompi_h2d575fe_109
-  - hpack=4.1.0=pyhd8ed1ab_0
-  - hyperframe=6.1.0=pyhd8ed1ab_0
-  - idna=3.10=pyhd8ed1ab_1
-  - keyutils=1.6.1=h166bdaf_0
-  - krb5=1.21.3=h659f571_0
-  - ld_impl_linux-64=2.43=h712a8e2_2
-  - libaec=1.1.3=h59595ed_0
-  - libblas=3.9.0=28_h59b9bed_openblas
-  - libcblas=3.9.0=28_he106b2a_openblas
-  - libcurl=8.11.1=h332b0f4_0
-  - libedit=3.1.20250104=pl5321h7949ede_0
-  - libev=4.33=hd590300_2
-  - libexpat=2.6.4=h5888daf_0
-  - libffi=3.4.6=h2dba641_0
-  - libgcc=14.2.0=h77fa898_1
-  - libgcc-ng=14.2.0=h69a702a_1
-  - libgfortran=14.2.0=h69a702a_1
-  - libgfortran5=14.2.0=hd5240d6_1
-  - libgomp=14.2.0=h77fa898_1
-  - liblapack=3.9.0=28_h7ac8fdf_openblas
-  - liblzma=5.6.4=hb9d3cd8_0
-  - libmpdec=4.0.0=h4bc722e_0
-  - libnghttp2=1.64.0=h161d5f1_0
-  - libopenblas=0.3.28=pthreads_h94d23a6_1
-  - libsqlite=3.48.0=hee588c1_1
-  - libssh2=1.11.1=hf672d98_0
-  - libstdcxx=14.2.0=hc0a3c3a_1
-  - libstdcxx-ng=14.2.0=h4852527_1
-  - libuuid=2.38.1=h0b41bf4_0
-  - libzlib=1.3.1=hb9d3cd8_2
-  - lz4-c=1.10.0=h5888daf_1
-  - ncurses=6.5=h2d0b736_3
-  - nomkl=1.0=h5ca1d4c_0
-  - numexpr=2.10.2=py313h5f97788_100
-  - numpy=2.2.3=py313h17eae1a_0
-  - openssl=3.4.1=h7b32b05_0
-  - packaging=24.2=pyhd8ed1ab_2
-  - pandas=2.2.3=py313ha87cce1_1
-  - pip=25.0.1=pyh145f28c_0
-  - py-cpuinfo=9.0.0=pyhd8ed1ab_1
-  - pycparser=2.22=pyh29332c3_1
-  - pysocks=1.7.1=pyha55dd90_7
-  - pytables=3.10.2=py313hd261420_1
-  - python=3.13.1=ha99a958_105_cp313
-  - python-dateutil=2.9.0.post0=pyhff2d567_1
-  - python-tzdata=2025.1=pyhd8ed1ab_0
-  - python_abi=3.13=5_cp313
-  - pytz=2024.1=pyhd8ed1ab_0
-  - readline=8.2=h8228510_1
-  - requests=2.32.3=pyhd8ed1ab_1
-  - six=1.17.0=pyhd8ed1ab_0
-  - snappy=1.2.1=h8bd8927_1
-  - tk=8.6.13=noxft_h4845f30_101
-  - typing-extensions=4.12.2=hd8ed1ab_1
-  - typing_extensions=4.12.2=pyha770c72_1
-  - tzdata=2025a=h78e105d_0
-  - urllib3=2.3.0=pyhd8ed1ab_0
-  - zlib-ng=2.2.4=h7955e40_0
-  - zstandard=0.23.0=py313h80202fe_1
-  - zstd=1.5.6=ha6fb4c9_0
-  - pip:
-      - aiosqlite==0.21.0
-      - click==8.1.8
-      - cramjam==2.9.1
-      - et-xmlfile==2.0.0
-      - fastparquet==2024.11.0
-      - fsspec==2025.2.0
-      - greenlet==3.1.1
-      - lxml==5.3.1
-      - openpyxl==3.1.5
-      - pyarrow==19.0.0
-      - pyomop==4.3.0
-      - simpledbf==0.2.6
-      - sqlalchemy==2.0.38
-prefix: /opt/conda/envs/acmc
--- a/process_codes_WP.ipynb
+++ b/process_codes_WP.ipynb
--- a/phen.py
+++ b/phen.py
--- a/report.py
+++ b/report.py
-import json
-import os
-import pandas as pd
-import numpy as np
-import argparse
-
-#Get all Files in JSON
-def get_json_files(folders):
-    out = []
-    for folder in folders:
-        if "files" in folder:
-            for file in folder["files"]:
-                file_path = folder["folder"]+"/"+file["file"]
-                if "concept_set" in file:
-                    for concept in file["concept_set"]:
-                        out.append({"json_concept":concept, "filepath":file_path, "json_code_types":list(file["columns"].keys())})
-                elif "concept_set_categories" in file:
-                    for code, concept in file["concept_set_categories"].items():
-                        out.append({"json_concept":concept[0], "filepath":file_path, "json_code_types":list(file["columns"].keys())})
-                else:
-                    out.append({"json_concept":None, "filepath":file_path})
-
-    out = pd.DataFrame(out)
-    out["filepath"] = out["filepath"].astype(str)
-    return out
-
-
-#Get all Files Excel Summary
-def get_excel_files(out2):
-    out2 = out2[["CONCEPT NAME ", "CODING LIST", "AGREED", "FUNCTION"]].loc[1:] #select relevant columns
-
-    #Filter Concepts in use
-    out2 = out2[out2["AGREED"] == "USE"] #remove deprecated concepts
-    out2 = out2[out2["FUNCTION"] == "QUERY BY CODING LIST"] #remove deprecated concepts
-    out2 = out2.drop(['AGREED', 'FUNCTION'], axis=1)
-
-    #Get filepaths
-    out2["CODING LIST"] = out2["CODING LIST"].str.split(",") #split by ,
-    out2 = out2.explode("CODING LIST") #one row per file
-    out2["CODING LIST"] = out2["CODING LIST"].str.strip()
-    out2["CODING LIST"] = out2["CODING LIST"].str.replace("https://git.soton.ac.uk/meld/meldb-external/phenotype/-/tree/main/", "")
-    out2["CODING LIST"] = out2["CODING LIST"].str.replace("%20", " ")
-
-    out2 = out2.rename(columns={"CONCEPT NAME ":"excel_concept", "CODING LIST":"filepath"})
-    return out2
-
-
-#Get all Files in /codes
-def get_code_files(path_codes):
-    all_files = []
-    for root, dirs, files in os.walk(path_codes, topdown=False):
-        for name in files:
-            if ".ipynb_checkpoint" not in root: #exclude notebook checkpoints
-                if name.endswith(".csv") or name.endswith(".xlsx") or name.endswith(".dta"): #exclude non-data files
-                    all_files.append(os.path.join(root, name)) 
-    all_files = pd.DataFrame(all_files)
-    all_files = all_files.rename(columns={0:"filepath"})
-    all_files["filepath"] = all_files["filepath"].astype(str)
-    return all_files
-
-
-
-def test_concept_def(config, report, folders, summary):
-	report.write("## Check Concept Definitions")
-	out = get_json_files(folders)
-	out2 = get_excel_files(summary)
-	all_files = get_code_files(config["codes"])
-	
-	#Merge all into single table
-	outs = pd.merge(all_files, out, how="outer", on="filepath")
-	outs = pd.merge(outs, out2, how="outer", on="filepath")
-	
-	report.write("\n\nCode source files:\n")
-	report.write("- {} total files\n\n".format(len(all_files)))
-	
-	report.write("\n\nJSON concepts:\n")
-	report.write("- {} unique concepts\n".format(len(out["filepath"].unique())))
-	missing = outs[outs["json_concept"].isna() & outs["excel_concept"].notna()]
-	if len(missing) > 0:
-		report.write("- Missing from JSON\n")
-		for id, row in missing.iterrows():
-			report.write("\t - ❌ {} {}\n\n".format(row["filepath"], row["excel_concept"]))
-	
-	report.write("\n\nEXCEL concepts:\n")
-	report.write("- {} unique concepts\n".format(len(out2["filepath"].unique())))
-	missing = outs[outs["json_concept"].notna() & outs["excel_concept"].isna()]
-	if len(missing) > 0:
-		report.write("- Missing from EXCEL\n")
-		for id, row in missing.iterrows():
-			report.write("\t - ❌ {} {} {}\n\n".format(row["filepath"], row["json_concept"], row["json_code_types"]))
-
-def get_output_files(version):
-    output_files = [f"output/{version}_MELD_concepts_readv2.csv",
-                f"output/{version}_MELD_snomed_no_translate.csv",
-                f"output/{version}_MELD_icd10_no_translate.csv",
-                # f"output/{version}_MELD_med_no_translate.csv",
-                f"output/{version}_MELD_atc_no_translate.csv"
-               ]
-    error_file = f"output/{version}_MELD_errors.csv"
-    return output_files, error_file
-
-def test_concept_changes(config, report):
-	version_1 = config["previous"]
-	version_2 = config["version"]
-	output1, err1 = get_output_files(version_1)
-	output2, err2 = get_output_files(version_2)
-
-	report.write(f"\n\n## Compare Concepts {version_1} to {version_2}\n\n")
-
-	for out1, out2 in zip(output1, output2):
-		report.write(f"`{out1}` to `{out2}`\n")
-		
-		df1 = pd.read_csv(out1)
-		df1 = df1[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count()
-		df2 = pd.read_csv(out2)
-		df2 = df2[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count()
-
-		#Added/Removed Concepts
-		report.write("- Removed Concepts {}\n".format(list(set(df1.index) - set(df2.index))))
-		report.write("- Added Concepts {}\n".format(list(set(df2.index) - set(df1.index))))
-
-		#Changed Concepts
-		diff = df2 - df1 #diff in counts 
-		diff = diff[(~(diff["CONCEPT"] == 0.0)) & diff["CONCEPT"].notna()] #get non-zero counts
-		s = "\n"
-		for concept, row in diff.iterrows():
-			s += "\t - {} {}\n".format(concept, row["CONCEPT"])
-		report.write("- Changed Concepts {}\n\n".format(s))
-		
-# ✅ ❌
-		
-def main(config):
-	#Load Report
-	if config["report"].endswith(".md"):
-		report = open(config["report"], 'a')
-	else:
-		raise Exception("Unsupported filetype provided for source file")
-	
-	#Load Mapping File
-	if config["map"].endswith(".json"):
-		folders = json.load(open(config["map"],'rb'))
-	else:
-		raise Exception("Unsupported filetype provided for source file")
-	
-	#Load Excel Summary File
-	if config["summary"].endswith(".xlsx"):
-		summary = pd.read_excel(config["summary"], sheet_name="CONCEPT_TRACKING", dtype=str)
-	else:
-		raise Exception("Unsupported filetype provided for summary file")
-	
-	#Title with version
-	report.write("\n\n# Report {} \n\n".format(config["version"]))
-	
-	#Compare JSON Mapping with Excel Summary
-	test_concept_def(config, report, folders, summary)
-					 
-	#Changes in Concept Codes between versions
-	test_concept_changes(config, report)
-	
-
-
-if __name__ == '__main__':
-	parser = argparse.ArgumentParser(description="Script performs testing and generates report for output files",
-									 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-	
-	parser.add_argument("map", help="Concept/Phenotype Assignment File (json)")
-	parser.add_argument("summary", help="Summary working excel document")
-	parser.add_argument("codes", help="Folder containing all code source files")
-	parser.add_argument("report", help="Output Markdown file containing report")
-	parser.add_argument("version", help="Version of output")
-	parser.add_argument("previous", help="Previous version of output")
-	
-	args = parser.parse_args()
-	config = vars(args)
-	main(config)
\ No newline at end of file
--- a/import.sh
+++ b/import.sh
--- a/run.sh
+++ b/run.sh
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
+import pytest
+import argparse
+from unittest.mock import patch, MagicMock
+
+import trud
+import omop
+import phen
+from pathlib import Path
+from acmc import trud_install, omop_install, omop_clear, omop_delete, phen_init, phen_validate, phen_map, phen_publish, phen_copy, phen_diff
+
+def test_trud_install():
+    with patch("trud.install") as mock_install:
+        args = argparse.Namespace(api_key="test_key")
+        trud_install(args)
+        mock_install.assert_called_once_with("test_key")
+
+def test_omop_install():
+    with patch("omop.install") as mock_install:
+        args = argparse.Namespace(omop_folder="/path/to/omop")
+        omop_install(args)
+        mock_install.assert_called_once_with(omop.OMOP_DB_PATH, "/path/to/omop")
+
+def test_omop_clear():
+    with patch("omop.clear") as mock_clear:
+        args = argparse.Namespace()
+        omop_clear(args)
+        mock_clear.assert_called_once_with(omop.OMOP_DB_PATH)
+
+def test_omop_delete():
+    with patch("omop.delete") as mock_delete:
+        args = argparse.Namespace()
+        omop_delete(args)
+        mock_delete.assert_called_once_with(omop.OMOP_DB_PATH)
+
+def test_phen_init():
+    with patch("phen.init") as mock_init:
+        args = argparse.Namespace(phen_dir="/path/to/phen")
+        phen_init(args)
+        mock_init.assert_called_once_with("/path/to/phen")
+
+def test_phen_validate():
+    with patch("phen.validate") as mock_validate:
+        args = argparse.Namespace(phen_dir="/path/to/phen")
+        phen_validate(args)
+        mock_validate.assert_called_once_with("/path/to/phen")
+
+def test_phen_map():
+    with patch("phen.map") as mock_map:
+        args = argparse.Namespace(phen_dir="/path/to/phen", target_coding="icd10_code", translate=True, verify=True)
+        phen_map(args)
+        mock_map.assert_called_once_with("/path/to/phen", "icd10_code", True, True)
+
+def test_phen_publish():
+    with patch("phen.publish") as mock_publish:
+        args = argparse.Namespace(phen_dir="/path/to/phen")
+        phen_publish(args)
+        mock_publish.assert_called_once_with("/path/to/phen")
+
+def test_phen_copy():
+    with patch("phen.copy") as mock_copy:
+        args = argparse.Namespace(phen_dir="/path/to/phen", target_dir="/path/to/target", version="1.0")
+        phen_copy(args)
+        mock_copy.assert_called_once_with("/path/to/phen", "/path/to/target", "1.0")
+
+def test_phen_diff():
+    with patch("phen.diff") as mock_diff:
+        args = argparse.Namespace(phen_dir="/path/to/phen", phen_dir_old="/path/to/old_phen")
+        phen_diff(args)
+        mock_diff.assert_called_once_with("/path/to/phen", "/path/to/old_phen")