diff --git a/acmc/phen.py b/acmc/phen.py
index 866966863f61854779836ead5335e498cb3f3ba6..1ae243d160ac24a10303a3fab3c24a0d8175da55 100644
--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -11,6 +11,7 @@ import re
 import logging
 import requests
 import yaml
+from cerberus import Validator
 from pathlib import Path
 from urllib.parse import urlparse, urlunparse
 
@@ -44,6 +45,59 @@ COL_ACTIONS = [SPLIT_COL_ACTION, CODES_COL_ACTION, DIVIDE_COL_ACTION]
 
 CODE_FILE_TYPES = [".xlsx", ".xls", ".csv"]
 
+# config.yaml schema
+CONFIG_SCHEMA = {
+    "phenotype": {
+        "type": "dict",
+        "required": True,
+        "schema": {
+            "version": {
+                "type": "string",
+                "required": True,
+                "regex": r"^v\d+\.\d+\.\d+$"  # Enforces 'vN.N.N' format
+            },
+            "omop": {
+                "type": "dict",
+                "required": True,
+                "schema": {
+                    "vocabulary_id": {"type": "string", "required": True},
+                    "vocabulary_name": {"type": "string", "required": True},
+                    "vocabulary_reference": {
+                        "type": "string",
+                        "required": True,
+                        "regex": r"^https?://.*"  # Ensures it's a URL
+                    },
+                }
+            },
+            "concept_sets": {
+                "type": "list",
+                "required": True,
+                "schema": {
+                    "type": "dict",
+                    "schema": {
+                        "name": {"type": "string", "required": True},
+                        "file": {
+                            "type": "dict",
+                            "required": False,
+                            "schema": {
+                                "path": {"type": "string", "required": True},
+                                "columns": {"type": "dict", "required": True},
+                                "category": {"type": "string"},  # Optional but must be string if present
+                                "actions": {
+                                    "type": "dict",
+                                    "schema": {
+                                        "divide_col": {"type": "string"}
+                                    },
+                                },                                
+                            },                            
+                        },
+                        "metadata": {"type": "dict", "required": True},
+                    },
+                },   
+            },
+        },
+    }
+}
 
 class PhenValidationException(Exception):
     """Custom exception class raised when validation errors in phenotype configuration file"""
@@ -257,8 +311,21 @@ def validate(phen_dir):
 
     # Load configuration File
     if config_path.suffix == ".yaml":
-        with config_path.open("r") as file:
-            phenotype = yaml.safe_load(file)
+        try:
+            with config_path.open("r") as file:
+                phenotype = yaml.safe_load(file)
+    
+            validator = Validator(CONFIG_SCHEMA)
+            if validator.validate(phenotype):
+                logger.debug("YAML structure is valid.")
+            else:
+                logger.error(f"YAML structure validation failed: {validator.errors}")
+                raise Exception(
+                    f"YAML structure validation failed: {validator.errors}"
+                )                
+        except yaml.YAMLError as e:
+            logger.error(f"YAML syntax error: {e}")
+            raise e            
     else:
         raise Exception(
             f"Unsupported configuration filetype: {str(config_path.resolve())}"
@@ -286,50 +353,40 @@ def validate(phen_dir):
         else:
             concept_set_names.append(item["name"])
 
-    # TODO: change this to some sort of yaml schema validation
-    required_keys = {"name", "file", "metadata"}
-
     # check codes definition
     for item in phenotype["concept_sets"]:
+        # check concepte code file exists
+        concept_code_file_path = codes_path / item["file"]["path"]
+        if not concept_code_file_path.exists():
+            validation_errors.append(
+                f"Coding file {str(concept_code_file_path.resolve())} does not exist"
+            )
 
-        if required_keys.issubset(item.keys()):
+        # check concepte code file is not empty
+        if concept_code_file_path.stat().st_size == 0:
+            validation_errors.append(
+                f"Coding file {str(concept_code_file_path.resolve())} is an empty file"
+            )
 
-            # check concepte code file exists
-            concept_code_file_path = codes_path / item["file"]["path"]
-            if not concept_code_file_path.exists():
-                validation_errors.append(
-                    f"Coding file {str(concept_code_file_path.resolve())} does not exist"
-                )
+        # check code file type is supported
+        if concept_code_file_path.suffix not in CODE_FILE_TYPES:
+            raise ValueError(
+                f"Unsupported filetype {concept_code_file_path.suffix}, only support csv, xlsx, xls code file types"
+            )
 
-            # check concepte code file is not empty
-            if concept_code_file_path.stat().st_size == 0:
+        # check columns specified are a supported medical coding type
+        for column in item["file"]["columns"]:
+            if column not in code_types:
                 validation_errors.append(
-                    f"Coding file {str(concept_code_file_path.resolve())} is an empty file"
+                    f"Column type {column} for file {concept_code_file_path} is not supported"
                 )
 
-            # check code file type is supported
-            if concept_code_file_path.suffix not in CODE_FILE_TYPES:
-                raise ValueError(
-                    f"Unsupported filetype {concept_code_file_path.suffix}, only support csv, xlsx, xls code file types"
-                )
+        # check the actions are supported
+        if "actions" in item["file"]:
+            for action in item["file"]["actions"]:
+                if action not in COL_ACTIONS:
+                    validation_errors.append(f"Action {action} is not supported")
 
-            # check columns specified are a supported medical coding type
-            for column in item["file"]["columns"]:
-                if column not in code_types:
-                    validation_errors.append(
-                        f"Column type {column} for file {concept_code_file_path} is not supported"
-                    )
-
-            # check the actions are supported
-            if "actions" in item["file"]:
-                for action in item["file"]["actions"]:
-                    if action not in COL_ACTIONS:
-                        validation_errors.append(f"Action {action} is not supported")
-
-        else:
-            validation_errors.append(
-                f"Missing required elements {required_keys} in concept set {item}"
-            )
 
     if len(validation_errors) > 0:
         logger.error(validation_errors)
diff --git a/pyproject.toml b/pyproject.toml
index a6909823668464f612bc9eb88efa25c2c845b4dd..de414cf8243f6541bc2e66b0daa08078eaf13d83 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ requires-python = ">=3.9"
 
 dependencies = [
     "aiosqlite",
+    "cerberus",
     "click",
     "cramjam",
     "et-xmlfile",