acmc.parse
parse.py module
This module provides functionality to set up medical code translation classes
1""" 2parse.py module 3 4This module provides functionality to set up medical code translation classes 5 6""" 7 8import pandas as pd 9import numpy as np 10import os 11from typing import Callable, Optional, Tuple 12from pathlib import Path 13from acmc import trud, logging_config as lc 14 15# setup logging 16_logger = lc.setup_logger() 17 18SUPPORTED_CODE_TYPES = {"read2", "read3", "icd10", "snomed", "opcs4", "atc"} 19"""List of support medical coding types""" 20 21 22class CodesError: 23 """A class used in InvalidCodesException to report an error if a code parser check fails""" 24 25 def __init__(self, message, codes=None, codes_file=None, mask=None, code_type=None): 26 # initialise class variables with provided parameters 27 for key, value in locals().items(): 28 if key != "self": 29 setattr(self, key, value) 30 31 32class InvalidCodesException(Exception): 33 """Custom exception class raised when invalid codes are found that cannot be resolved by processing""" 34 35 def __init__(self, error): 36 super().__init__(error.message) 37 self.error = error 38 39 40class Proto: 41 """ 42 Define checks as list of 3 tuple: (Message, Condition, Process) 43 - Message = The name of the condition (what is printed and logged) 44 - Condition = True if Passed, and False if Failed 45 - Process = Aims to resolve all issues that stop condition from passing (Do not change index!) 46 """ 47 48 checks: list[ 49 tuple[ 50 str, # The description, e.g., "Not Empty" 51 Callable[ 52 [pd.DataFrame], 53 pd.Series, 54 ], # The first lambda function: takes a list and returns a pd.Series of booleans 55 Callable[ 56 [pd.DataFrame, Path], 57 pd.DataFrame, 58 ], # The second lambda function: takes a list and a string, and returns nothing 59 ] 60 ] 61 62 def __init__(self, name: str, trud_codes_path: Optional[Path] = None): 63 if trud_codes_path is not None: 64 if trud_codes_path.is_file(): 65 self.trud_codes_path: Path = trud_codes_path 66 self.db: pd.DataFrame = pd.read_parquet(self.trud_codes_path) 67 else: 68 raise FileNotFoundError( 69 f"Error: Read2 code file '{trud_codes_path}' does not exist. Please ensure you have installed TRUD correctly" 70 ) 71 72 self.name: str = name 73 74 def raise_exception(self, ex: Exception): 75 """Raises an exception inside a lambda function. Python does not allow using raise statement inside lambda because lambda can only contain expressions, not statements. Using raise_exception not raise_ as it's more explict""" 76 raise ex 77 78 def in_database( 79 self, codes: pd.DataFrame, db: pd.DataFrame, col: str 80 ) -> pd.DataFrame: 81 return codes.isin(db[col]) 82 83 def process( 84 self, codes: pd.DataFrame, codes_file: Path 85 ) -> Tuple[pd.DataFrame, list]: 86 """identify issues that do not pass and fix them with define/d process""" 87 errors = [] 88 # Iter through each item in check. 89 for msg, cond, fix in self.checks: 90 # Check if any codes fail the check to False 91 if not cond(codes).all(): 92 # Log the number of codes that failed 93 _logger.debug( 94 f"Check: {msg} {(~cond(codes)).sum()} failed, trying to fix" 95 ) 96 # try fix errors by running lamba "process" function 97 try: 98 codes = fix(codes, codes_file) 99 _logger.debug(f"Check: Fixed") 100 except InvalidCodesException as ex: 101 errors.append(ex.error) 102 else: 103 _logger.debug(f"Check: passed") 104 105 return codes, errors 106 107 def verify(self, codes: pd.DataFrame, codes_file: Path): 108 """verify codes in codes file""" 109 conds = np.array([]) 110 111 # Iter through each item in check. 112 for msg, cond, process in self.checks: 113 # run conditional check 114 out = cond(codes) 115 conds = np.append(conds, out.all()) 116 117 return conds 118 119 120class Read2(Proto): 121 """This Read2 class extends Proto, adding custom validation checks for a dataset of "Read2" codes. It ensures that the dataset is loaded, validates the codes based on several rules, and applies corrections or logs errors when necessary.""" 122 123 def __init__(self): 124 super().__init__("read2", trud.PROCESSED_PATH / "read2.parquet") 125 126 # validate checks 127 self.checks = [ 128 ( 129 # check codes are not empty, if empty throw an exception 130 "Not Empty", 131 lambda codes: pd.Series([len(codes) > 0]), 132 lambda codes, codes_file: self.raise_exception( 133 InvalidCodesException( 134 CodesError( 135 f"Code list is empty", 136 codes=codes, 137 codes_file=codes_file, 138 mask=None, 139 code_type=self.name, 140 ) 141 ) 142 ), 143 ), 144 ( 145 # check codes <5 characters, if too short pads it with . (dots) to reach 5 characters 146 "Too Short", 147 lambda codes: ~(codes.str.len() < 5), 148 lambda codes, codes_file: codes.str.pad( 149 width=5, side="right", fillchar="." 150 ), 151 ), 152 ( 153 # check codes > 5 characters, If too long, truncates them to 5 characters 154 "Too Long", 155 lambda codes: ~(codes.str.len() > 5), 156 lambda codes, codes_file: codes.str[:5], 157 ), 158 ( 159 # checks codes contain numbers, or dots (.), if not logs invalid code error 160 "Alphanumeric Dot", 161 lambda codes: codes.str.match(r"^[a-zA-Z0-9.]+$"), 162 lambda codes, codes_file: self.raise_exception( 163 InvalidCodesException( 164 CodesError( 165 f"Illegal code format, not alphanumeric dot", 166 codes=codes, 167 codes_file=codes_file, 168 mask=codes.str.match(r"^[a-zA-Z0-9.]+$"), 169 code_type=self.name, 170 ) 171 ) 172 ), 173 ), 174 ( 175 # checks code exists in self.db (the Read2 dataset). If missing log invalid codes. 176 "In Database", 177 lambda codes: self.in_database(codes, self.db, self.name), 178 lambda codes, codes_file: self.raise_exception( 179 InvalidCodesException( 180 CodesError( 181 f"Codes do not exist in database", 182 codes=codes, 183 codes_file=codes_file, 184 mask=self.in_database(codes, self.db, self.name), 185 code_type=self.name, 186 ) 187 ) 188 ), 189 ), 190 ] 191 192 193class Read3(Proto): 194 def __init__(self): 195 super().__init__("Read3", trud.PROCESSED_PATH / "read3.parquet") 196 197 self.checks = [ 198 ( 199 "Not Empty", 200 lambda codes: pd.Series([len(codes) > 0]), 201 lambda codes, codes_file: self.raise_exception( 202 InvalidCodesException( 203 CodesError( 204 f"Code list is empty", 205 codes=codes, 206 codes_file=codes_file, 207 mask=None, 208 code_type=self.name, 209 ) 210 ) 211 ), 212 ), 213 ( 214 "Too Short", 215 lambda codes: ~(codes.str.len() < 5), 216 lambda codes, codes_file: codes.str.pad( 217 width=5, side="right", fillchar="." 218 ), 219 ), 220 ( 221 "Too Long", 222 lambda codes: ~(codes.str.len() > 5), 223 lambda codes, codes_file: codes.str[:5], 224 ), 225 ( 226 "Alphanumeric Dot", 227 lambda codes: codes.str.match(r"^[a-zA-Z0-9.]+$"), 228 lambda codes, codes_file: self.raise_exception( 229 InvalidCodesException( 230 CodesError( 231 f"QA Alphanumeric Dot", 232 codes=codes, 233 codes_file=codes_file, 234 check_regex=codes.str.match(r"^[a-zA-Z0-9.]+$"), 235 code_type=self.name, 236 ) 237 ) 238 ), 239 ), 240 ( 241 "In Database", 242 lambda codes: self.in_database(codes, self.db, self.name), 243 lambda codes, codes_file: self.raise_exception( 244 InvalidCodesException( 245 CodesError( 246 f"QA In Database", 247 codes=codes, 248 codes_file=codes_file, 249 check_regex=self.in_database(codes, self.db, self.name), 250 code_type=self.name, 251 ) 252 ) 253 ), 254 ), 255 ] 256 257 258class Icd10(Proto): 259 def __init__(self): 260 super().__init__("icd10", trud.PROCESSED_PATH / "icd10.parquet") 261 262 self.checks = [ 263 ( 264 "Not Empty", 265 lambda codes: pd.Series([len(codes) > 0]), 266 lambda codes, codes_file: self.raise_exception( 267 InvalidCodesException( 268 CodesError( 269 f"Code list is empty {codes_file}", 270 codes=codes, 271 codes_file=codes_file, 272 mask=None, 273 code_type=self.name, 274 ) 275 ) 276 ), 277 ), 278 ( 279 "Too Short", 280 lambda codes: ~(codes.str.len() < 3), 281 lambda codes, codes_file: self.raise_exception( 282 InvalidCodesException( 283 CodesError( 284 f"QA Too Short", 285 codes=codes, 286 codes_file=codes_file, 287 mask=~(codes.str.len() < 3), 288 code_type=self.name, 289 ) 290 ) 291 ), 292 ), 293 ( 294 "Has Dot", 295 lambda codes: ~(codes.str.match(r".*\..*")), # check if contains dot 296 lambda codes, codes_file: codes.str.replace( 297 ".", "" 298 ), # delete any dots in string 299 # lambda codes : codes.str.split('\.').apply(lambda ls: ls[0]) #only get part before dot 300 ), 301 ( 302 "Alphanumeric Capital", 303 lambda codes: codes.str.match(r"^[A-Z0-9]+$"), 304 lambda codes, codes_file: self.raise_exception( 305 InvalidCodesException( 306 CodesError( 307 f"QA Alphanumeric Capital", 308 codes=codes, 309 codes_file=codes_file, 310 mask=codes.str.match(r"^[A-Z0-9]+$"), 311 code_type=self.name, 312 ) 313 ) 314 ), 315 ), 316 ( 317 "In Database", 318 lambda codes: ~( 319 ~self.in_database(codes, self.db, self.name) 320 & ~self.in_database(codes, self.db, self.name + "_alt") 321 ), 322 lambda codes, codes_file: self.raise_exception( 323 InvalidCodesException( 324 CodesError( 325 f"QA In Database", 326 codes=codes, 327 codes_file=codes_file, 328 mask=~( 329 ~self.in_database(codes, self.db, self.name) 330 & ~self.in_database(codes, self.db, self.name + "_alt") 331 ), 332 code_type=self.name, 333 ) 334 ) 335 ), 336 ), 337 # ( 338 # "ICD10 Regex", 339 # lambda codes : codes.str.match("[a-zA-Z][0-9][0-9]\.?[a-zA-Z0-9]*$"), #Alpha, Num, Num , Dot?, 4xAlphNum* 340 # lambda codes : lc.log_invalid_code(codes, 341 # codes.str.match("[a-zA-Z][0-9][0-9]\.?[a-zA-Z0-9]*$"), #Log non-matching rows 342 # code_type="icd10", 343 # 344 # ) 345 ] 346 347 348class Snomed(Proto): 349 def __init__(self): 350 super().__init__("snomed", trud.PROCESSED_PATH / "snomed.parquet") 351 352 self.checks = [ 353 # ( 354 # "Not Empty", 355 # lambda codes : pd.Series([len(codes) > 0]), 356 # lambda codes : raise_exception(Exception("Code List is Empty")) 357 # ), 358 ( 359 "Too Short", 360 lambda codes: ~(codes.str.len() < 6), 361 lambda codes, codes_file: self.raise_exception( 362 InvalidCodesException( 363 CodesError( 364 f"QA Too Short", 365 codes=codes, 366 codes_file=codes_file, 367 mask=~(codes.str.len() < 6), 368 code_type=self.name, 369 ) 370 ) 371 ), 372 ), 373 ( 374 "Too Long", 375 lambda codes: ~(codes.str.len() > 18), 376 lambda codes, codes_file: self.raise_exception( 377 InvalidCodesException( 378 CodesError( 379 f"QA Too Long", 380 codes=codes, 381 codes_file=codes_file, 382 mask=~(codes.str.len() > 18), 383 code_type=self.name, 384 ) 385 ) 386 ), 387 ), 388 ( 389 "Numeric", 390 lambda codes: codes.str.match(r"[0-9]+$"), 391 lambda codes, codes_file: self.raise_exception( 392 InvalidCodesException( 393 CodesError( 394 f"QA Numeric", 395 codes=codes, 396 codes_file=codes_file, 397 mask=codes.str.match(r"[0-9]+$"), 398 code_type=self.name, 399 ) 400 ) 401 ), 402 ), 403 # ( 404 # "Is Integer", 405 # lambda codes : codes.dtype == int, 406 # lambda codes : codes.astype(int) #Convert to integer 407 # ), 408 ( 409 "In Database", 410 lambda codes: self.in_database(codes, self.db, self.name), 411 lambda codes, codes_file: self.raise_exception( 412 InvalidCodesException( 413 CodesError( 414 f"QA In Database", 415 codes=codes, 416 codes_file=codes_file, 417 mask=self.in_database(codes, self.db, self.name), 418 code_type=self.name, 419 ) 420 ) 421 ), 422 ), 423 ] 424 425 426class Opcs4(Proto): 427 def __init__(self): 428 super().__init__("opcs4", trud.PROCESSED_PATH / "opcs4.parquet") 429 430 self.checks = [ 431 ( 432 "Not Empty", 433 lambda codes: pd.Series([len(codes) > 0]), 434 lambda codes, codes_file: self.raise_exception( 435 InvalidCodesException( 436 CodesError( 437 f"Code list is empty", 438 codes=codes, 439 codes_file=codes_file, 440 mask=None, 441 code_type=self.name, 442 ) 443 ) 444 ), 445 ), 446 ( 447 "In Database", 448 lambda codes: self.in_database(codes, self.db, self.name), 449 lambda codes, codes_file: self.raise_exception( 450 InvalidCodesException( 451 CodesError( 452 f"QA In Database", 453 codes=codes, 454 codes_file=codes_file, 455 mask=self.in_database(codes, self.db, self.name), 456 code_type=self.name, 457 ) 458 ) 459 ), 460 ), 461 ] 462 463 464class Atc(Proto): 465 def __init__(self): 466 super().__init__("atc", trud_codes_path=None) 467 self.checks = [ 468 ( 469 "Not Empty", 470 lambda codes: pd.Series([len(codes) > 0]), 471 lambda codes, codes_file: self.raise_exception( 472 InvalidCodesException( 473 CodesError( 474 f"Code list is empty", 475 codes=codes, 476 codes_file=codes_file, 477 mask=None, 478 code_type=self.name, 479 ) 480 ) 481 ), 482 ), 483 ( 484 "Alphanumeric Capital", 485 lambda codes: codes.str.match(r"^[A-Z0-9]+$"), 486 lambda codes, codes_file: self.raise_exception( 487 InvalidCodesException( 488 CodesError( 489 f"QA Alphanumeric Capital", 490 codes=codes, 491 codes_file=codes_file, 492 mask=codes.str.match(r"^[A-Z0-9]+$"), 493 code_type=self.name, 494 ) 495 ) 496 ), 497 ), 498 ] 499 500 501class Med(Proto): 502 def __init__(self): 503 super().__init__("med", trud_codes_path=None) 504 self.checks = [ 505 ( 506 "Not Empty", 507 lambda codes: pd.Series([len(codes) > 0]), 508 lambda codes, codes_file: self.raise_exception( 509 InvalidCodesException( 510 CodesError( 511 f"Code list is empty", 512 codes=codes, 513 codes_file=codes_file, 514 mask=None, 515 code_type=self.name, 516 ) 517 ) 518 ), 519 ) 520 ] 521 522 523class Cprd(Proto): 524 def __init__(self): 525 super().__init__("cprd", trud_codes_path=None) 526 self.checks = [ 527 ( 528 "Not Empty", 529 lambda codes: pd.Series([len(codes) > 0]), 530 lambda codes, codes_file: self.raise_exception( 531 InvalidCodesException( 532 CodesError( 533 f"Code list is empty", 534 codes=codes, 535 codes_file=codes_file, 536 mask=None, 537 code_type=self.name, 538 ) 539 ) 540 ), 541 ) 542 ] 543 544 545class CodeTypeParser: 546 """A class used in InvalidCodesException to report an error if a code parser check fails""" 547 548 def __init__(self, trud_processed_dir: Path = trud.PROCESSED_PATH): 549 if not trud_processed_dir.exists() or not trud_processed_dir.is_dir(): 550 raise FileNotFoundError( 551 f"Cannot initialise parsers as the TRUD processed directory {trud_processed_dir} does not exist, please check that TRUD has been installed: acmc trud install" 552 ) 553 554 self.code_types = { 555 "read2": Read2(), 556 "read3": Read3(), 557 "icd10": Icd10(), 558 "snomed": Snomed(), 559 "opcs4": Opcs4(), 560 "atc": Atc(), 561 "med": Med(), 562 "cprd": Cprd(), 563 }
List of support medical coding types
23class CodesError: 24 """A class used in InvalidCodesException to report an error if a code parser check fails""" 25 26 def __init__(self, message, codes=None, codes_file=None, mask=None, code_type=None): 27 # initialise class variables with provided parameters 28 for key, value in locals().items(): 29 if key != "self": 30 setattr(self, key, value)
A class used in InvalidCodesException to report an error if a code parser check fails
33class InvalidCodesException(Exception): 34 """Custom exception class raised when invalid codes are found that cannot be resolved by processing""" 35 36 def __init__(self, error): 37 super().__init__(error.message) 38 self.error = error
Custom exception class raised when invalid codes are found that cannot be resolved by processing
41class Proto: 42 """ 43 Define checks as list of 3 tuple: (Message, Condition, Process) 44 - Message = The name of the condition (what is printed and logged) 45 - Condition = True if Passed, and False if Failed 46 - Process = Aims to resolve all issues that stop condition from passing (Do not change index!) 47 """ 48 49 checks: list[ 50 tuple[ 51 str, # The description, e.g., "Not Empty" 52 Callable[ 53 [pd.DataFrame], 54 pd.Series, 55 ], # The first lambda function: takes a list and returns a pd.Series of booleans 56 Callable[ 57 [pd.DataFrame, Path], 58 pd.DataFrame, 59 ], # The second lambda function: takes a list and a string, and returns nothing 60 ] 61 ] 62 63 def __init__(self, name: str, trud_codes_path: Optional[Path] = None): 64 if trud_codes_path is not None: 65 if trud_codes_path.is_file(): 66 self.trud_codes_path: Path = trud_codes_path 67 self.db: pd.DataFrame = pd.read_parquet(self.trud_codes_path) 68 else: 69 raise FileNotFoundError( 70 f"Error: Read2 code file '{trud_codes_path}' does not exist. Please ensure you have installed TRUD correctly" 71 ) 72 73 self.name: str = name 74 75 def raise_exception(self, ex: Exception): 76 """Raises an exception inside a lambda function. Python does not allow using raise statement inside lambda because lambda can only contain expressions, not statements. Using raise_exception not raise_ as it's more explict""" 77 raise ex 78 79 def in_database( 80 self, codes: pd.DataFrame, db: pd.DataFrame, col: str 81 ) -> pd.DataFrame: 82 return codes.isin(db[col]) 83 84 def process( 85 self, codes: pd.DataFrame, codes_file: Path 86 ) -> Tuple[pd.DataFrame, list]: 87 """identify issues that do not pass and fix them with define/d process""" 88 errors = [] 89 # Iter through each item in check. 90 for msg, cond, fix in self.checks: 91 # Check if any codes fail the check to False 92 if not cond(codes).all(): 93 # Log the number of codes that failed 94 _logger.debug( 95 f"Check: {msg} {(~cond(codes)).sum()} failed, trying to fix" 96 ) 97 # try fix errors by running lamba "process" function 98 try: 99 codes = fix(codes, codes_file) 100 _logger.debug(f"Check: Fixed") 101 except InvalidCodesException as ex: 102 errors.append(ex.error) 103 else: 104 _logger.debug(f"Check: passed") 105 106 return codes, errors 107 108 def verify(self, codes: pd.DataFrame, codes_file: Path): 109 """verify codes in codes file""" 110 conds = np.array([]) 111 112 # Iter through each item in check. 113 for msg, cond, process in self.checks: 114 # run conditional check 115 out = cond(codes) 116 conds = np.append(conds, out.all()) 117 118 return conds
Define checks as list of 3 tuple: (Message, Condition, Process)
- Message = The name of the condition (what is printed and logged)
- Condition = True if Passed, and False if Failed
- Process = Aims to resolve all issues that stop condition from passing (Do not change index!)
63 def __init__(self, name: str, trud_codes_path: Optional[Path] = None): 64 if trud_codes_path is not None: 65 if trud_codes_path.is_file(): 66 self.trud_codes_path: Path = trud_codes_path 67 self.db: pd.DataFrame = pd.read_parquet(self.trud_codes_path) 68 else: 69 raise FileNotFoundError( 70 f"Error: Read2 code file '{trud_codes_path}' does not exist. Please ensure you have installed TRUD correctly" 71 ) 72 73 self.name: str = name
75 def raise_exception(self, ex: Exception): 76 """Raises an exception inside a lambda function. Python does not allow using raise statement inside lambda because lambda can only contain expressions, not statements. Using raise_exception not raise_ as it's more explict""" 77 raise ex
Raises an exception inside a lambda function. Python does not allow using raise statement inside lambda because lambda can only contain expressions, not statements. Using raise_exception not raise_ as it's more explict
84 def process( 85 self, codes: pd.DataFrame, codes_file: Path 86 ) -> Tuple[pd.DataFrame, list]: 87 """identify issues that do not pass and fix them with define/d process""" 88 errors = [] 89 # Iter through each item in check. 90 for msg, cond, fix in self.checks: 91 # Check if any codes fail the check to False 92 if not cond(codes).all(): 93 # Log the number of codes that failed 94 _logger.debug( 95 f"Check: {msg} {(~cond(codes)).sum()} failed, trying to fix" 96 ) 97 # try fix errors by running lamba "process" function 98 try: 99 codes = fix(codes, codes_file) 100 _logger.debug(f"Check: Fixed") 101 except InvalidCodesException as ex: 102 errors.append(ex.error) 103 else: 104 _logger.debug(f"Check: passed") 105 106 return codes, errors
identify issues that do not pass and fix them with define/d process
108 def verify(self, codes: pd.DataFrame, codes_file: Path): 109 """verify codes in codes file""" 110 conds = np.array([]) 111 112 # Iter through each item in check. 113 for msg, cond, process in self.checks: 114 # run conditional check 115 out = cond(codes) 116 conds = np.append(conds, out.all()) 117 118 return conds
verify codes in codes file
121class Read2(Proto): 122 """This Read2 class extends Proto, adding custom validation checks for a dataset of "Read2" codes. It ensures that the dataset is loaded, validates the codes based on several rules, and applies corrections or logs errors when necessary.""" 123 124 def __init__(self): 125 super().__init__("read2", trud.PROCESSED_PATH / "read2.parquet") 126 127 # validate checks 128 self.checks = [ 129 ( 130 # check codes are not empty, if empty throw an exception 131 "Not Empty", 132 lambda codes: pd.Series([len(codes) > 0]), 133 lambda codes, codes_file: self.raise_exception( 134 InvalidCodesException( 135 CodesError( 136 f"Code list is empty", 137 codes=codes, 138 codes_file=codes_file, 139 mask=None, 140 code_type=self.name, 141 ) 142 ) 143 ), 144 ), 145 ( 146 # check codes <5 characters, if too short pads it with . (dots) to reach 5 characters 147 "Too Short", 148 lambda codes: ~(codes.str.len() < 5), 149 lambda codes, codes_file: codes.str.pad( 150 width=5, side="right", fillchar="." 151 ), 152 ), 153 ( 154 # check codes > 5 characters, If too long, truncates them to 5 characters 155 "Too Long", 156 lambda codes: ~(codes.str.len() > 5), 157 lambda codes, codes_file: codes.str[:5], 158 ), 159 ( 160 # checks codes contain numbers, or dots (.), if not logs invalid code error 161 "Alphanumeric Dot", 162 lambda codes: codes.str.match(r"^[a-zA-Z0-9.]+$"), 163 lambda codes, codes_file: self.raise_exception( 164 InvalidCodesException( 165 CodesError( 166 f"Illegal code format, not alphanumeric dot", 167 codes=codes, 168 codes_file=codes_file, 169 mask=codes.str.match(r"^[a-zA-Z0-9.]+$"), 170 code_type=self.name, 171 ) 172 ) 173 ), 174 ), 175 ( 176 # checks code exists in self.db (the Read2 dataset). If missing log invalid codes. 177 "In Database", 178 lambda codes: self.in_database(codes, self.db, self.name), 179 lambda codes, codes_file: self.raise_exception( 180 InvalidCodesException( 181 CodesError( 182 f"Codes do not exist in database", 183 codes=codes, 184 codes_file=codes_file, 185 mask=self.in_database(codes, self.db, self.name), 186 code_type=self.name, 187 ) 188 ) 189 ), 190 ), 191 ]
This Read2 class extends Proto, adding custom validation checks for a dataset of "Read2" codes. It ensures that the dataset is loaded, validates the codes based on several rules, and applies corrections or logs errors when necessary.
Inherited Members
194class Read3(Proto): 195 def __init__(self): 196 super().__init__("Read3", trud.PROCESSED_PATH / "read3.parquet") 197 198 self.checks = [ 199 ( 200 "Not Empty", 201 lambda codes: pd.Series([len(codes) > 0]), 202 lambda codes, codes_file: self.raise_exception( 203 InvalidCodesException( 204 CodesError( 205 f"Code list is empty", 206 codes=codes, 207 codes_file=codes_file, 208 mask=None, 209 code_type=self.name, 210 ) 211 ) 212 ), 213 ), 214 ( 215 "Too Short", 216 lambda codes: ~(codes.str.len() < 5), 217 lambda codes, codes_file: codes.str.pad( 218 width=5, side="right", fillchar="." 219 ), 220 ), 221 ( 222 "Too Long", 223 lambda codes: ~(codes.str.len() > 5), 224 lambda codes, codes_file: codes.str[:5], 225 ), 226 ( 227 "Alphanumeric Dot", 228 lambda codes: codes.str.match(r"^[a-zA-Z0-9.]+$"), 229 lambda codes, codes_file: self.raise_exception( 230 InvalidCodesException( 231 CodesError( 232 f"QA Alphanumeric Dot", 233 codes=codes, 234 codes_file=codes_file, 235 check_regex=codes.str.match(r"^[a-zA-Z0-9.]+$"), 236 code_type=self.name, 237 ) 238 ) 239 ), 240 ), 241 ( 242 "In Database", 243 lambda codes: self.in_database(codes, self.db, self.name), 244 lambda codes, codes_file: self.raise_exception( 245 InvalidCodesException( 246 CodesError( 247 f"QA In Database", 248 codes=codes, 249 codes_file=codes_file, 250 check_regex=self.in_database(codes, self.db, self.name), 251 code_type=self.name, 252 ) 253 ) 254 ), 255 ), 256 ]
Define checks as list of 3 tuple: (Message, Condition, Process)
- Message = The name of the condition (what is printed and logged)
- Condition = True if Passed, and False if Failed
- Process = Aims to resolve all issues that stop condition from passing (Do not change index!)
Inherited Members
259class Icd10(Proto): 260 def __init__(self): 261 super().__init__("icd10", trud.PROCESSED_PATH / "icd10.parquet") 262 263 self.checks = [ 264 ( 265 "Not Empty", 266 lambda codes: pd.Series([len(codes) > 0]), 267 lambda codes, codes_file: self.raise_exception( 268 InvalidCodesException( 269 CodesError( 270 f"Code list is empty {codes_file}", 271 codes=codes, 272 codes_file=codes_file, 273 mask=None, 274 code_type=self.name, 275 ) 276 ) 277 ), 278 ), 279 ( 280 "Too Short", 281 lambda codes: ~(codes.str.len() < 3), 282 lambda codes, codes_file: self.raise_exception( 283 InvalidCodesException( 284 CodesError( 285 f"QA Too Short", 286 codes=codes, 287 codes_file=codes_file, 288 mask=~(codes.str.len() < 3), 289 code_type=self.name, 290 ) 291 ) 292 ), 293 ), 294 ( 295 "Has Dot", 296 lambda codes: ~(codes.str.match(r".*\..*")), # check if contains dot 297 lambda codes, codes_file: codes.str.replace( 298 ".", "" 299 ), # delete any dots in string 300 # lambda codes : codes.str.split('\.').apply(lambda ls: ls[0]) #only get part before dot 301 ), 302 ( 303 "Alphanumeric Capital", 304 lambda codes: codes.str.match(r"^[A-Z0-9]+$"), 305 lambda codes, codes_file: self.raise_exception( 306 InvalidCodesException( 307 CodesError( 308 f"QA Alphanumeric Capital", 309 codes=codes, 310 codes_file=codes_file, 311 mask=codes.str.match(r"^[A-Z0-9]+$"), 312 code_type=self.name, 313 ) 314 ) 315 ), 316 ), 317 ( 318 "In Database", 319 lambda codes: ~( 320 ~self.in_database(codes, self.db, self.name) 321 & ~self.in_database(codes, self.db, self.name + "_alt") 322 ), 323 lambda codes, codes_file: self.raise_exception( 324 InvalidCodesException( 325 CodesError( 326 f"QA In Database", 327 codes=codes, 328 codes_file=codes_file, 329 mask=~( 330 ~self.in_database(codes, self.db, self.name) 331 & ~self.in_database(codes, self.db, self.name + "_alt") 332 ), 333 code_type=self.name, 334 ) 335 ) 336 ), 337 ), 338 # ( 339 # "ICD10 Regex", 340 # lambda codes : codes.str.match("[a-zA-Z][0-9][0-9]\.?[a-zA-Z0-9]*$"), #Alpha, Num, Num , Dot?, 4xAlphNum* 341 # lambda codes : lc.log_invalid_code(codes, 342 # codes.str.match("[a-zA-Z][0-9][0-9]\.?[a-zA-Z0-9]*$"), #Log non-matching rows 343 # code_type="icd10", 344 # 345 # ) 346 ]
Define checks as list of 3 tuple: (Message, Condition, Process)
- Message = The name of the condition (what is printed and logged)
- Condition = True if Passed, and False if Failed
- Process = Aims to resolve all issues that stop condition from passing (Do not change index!)
Inherited Members
349class Snomed(Proto): 350 def __init__(self): 351 super().__init__("snomed", trud.PROCESSED_PATH / "snomed.parquet") 352 353 self.checks = [ 354 # ( 355 # "Not Empty", 356 # lambda codes : pd.Series([len(codes) > 0]), 357 # lambda codes : raise_exception(Exception("Code List is Empty")) 358 # ), 359 ( 360 "Too Short", 361 lambda codes: ~(codes.str.len() < 6), 362 lambda codes, codes_file: self.raise_exception( 363 InvalidCodesException( 364 CodesError( 365 f"QA Too Short", 366 codes=codes, 367 codes_file=codes_file, 368 mask=~(codes.str.len() < 6), 369 code_type=self.name, 370 ) 371 ) 372 ), 373 ), 374 ( 375 "Too Long", 376 lambda codes: ~(codes.str.len() > 18), 377 lambda codes, codes_file: self.raise_exception( 378 InvalidCodesException( 379 CodesError( 380 f"QA Too Long", 381 codes=codes, 382 codes_file=codes_file, 383 mask=~(codes.str.len() > 18), 384 code_type=self.name, 385 ) 386 ) 387 ), 388 ), 389 ( 390 "Numeric", 391 lambda codes: codes.str.match(r"[0-9]+$"), 392 lambda codes, codes_file: self.raise_exception( 393 InvalidCodesException( 394 CodesError( 395 f"QA Numeric", 396 codes=codes, 397 codes_file=codes_file, 398 mask=codes.str.match(r"[0-9]+$"), 399 code_type=self.name, 400 ) 401 ) 402 ), 403 ), 404 # ( 405 # "Is Integer", 406 # lambda codes : codes.dtype == int, 407 # lambda codes : codes.astype(int) #Convert to integer 408 # ), 409 ( 410 "In Database", 411 lambda codes: self.in_database(codes, self.db, self.name), 412 lambda codes, codes_file: self.raise_exception( 413 InvalidCodesException( 414 CodesError( 415 f"QA In Database", 416 codes=codes, 417 codes_file=codes_file, 418 mask=self.in_database(codes, self.db, self.name), 419 code_type=self.name, 420 ) 421 ) 422 ), 423 ), 424 ]
Define checks as list of 3 tuple: (Message, Condition, Process)
- Message = The name of the condition (what is printed and logged)
- Condition = True if Passed, and False if Failed
- Process = Aims to resolve all issues that stop condition from passing (Do not change index!)
Inherited Members
427class Opcs4(Proto): 428 def __init__(self): 429 super().__init__("opcs4", trud.PROCESSED_PATH / "opcs4.parquet") 430 431 self.checks = [ 432 ( 433 "Not Empty", 434 lambda codes: pd.Series([len(codes) > 0]), 435 lambda codes, codes_file: self.raise_exception( 436 InvalidCodesException( 437 CodesError( 438 f"Code list is empty", 439 codes=codes, 440 codes_file=codes_file, 441 mask=None, 442 code_type=self.name, 443 ) 444 ) 445 ), 446 ), 447 ( 448 "In Database", 449 lambda codes: self.in_database(codes, self.db, self.name), 450 lambda codes, codes_file: self.raise_exception( 451 InvalidCodesException( 452 CodesError( 453 f"QA In Database", 454 codes=codes, 455 codes_file=codes_file, 456 mask=self.in_database(codes, self.db, self.name), 457 code_type=self.name, 458 ) 459 ) 460 ), 461 ), 462 ]
Define checks as list of 3 tuple: (Message, Condition, Process)
- Message = The name of the condition (what is printed and logged)
- Condition = True if Passed, and False if Failed
- Process = Aims to resolve all issues that stop condition from passing (Do not change index!)
Inherited Members
465class Atc(Proto): 466 def __init__(self): 467 super().__init__("atc", trud_codes_path=None) 468 self.checks = [ 469 ( 470 "Not Empty", 471 lambda codes: pd.Series([len(codes) > 0]), 472 lambda codes, codes_file: self.raise_exception( 473 InvalidCodesException( 474 CodesError( 475 f"Code list is empty", 476 codes=codes, 477 codes_file=codes_file, 478 mask=None, 479 code_type=self.name, 480 ) 481 ) 482 ), 483 ), 484 ( 485 "Alphanumeric Capital", 486 lambda codes: codes.str.match(r"^[A-Z0-9]+$"), 487 lambda codes, codes_file: self.raise_exception( 488 InvalidCodesException( 489 CodesError( 490 f"QA Alphanumeric Capital", 491 codes=codes, 492 codes_file=codes_file, 493 mask=codes.str.match(r"^[A-Z0-9]+$"), 494 code_type=self.name, 495 ) 496 ) 497 ), 498 ), 499 ]
Define checks as list of 3 tuple: (Message, Condition, Process)
- Message = The name of the condition (what is printed and logged)
- Condition = True if Passed, and False if Failed
- Process = Aims to resolve all issues that stop condition from passing (Do not change index!)
Inherited Members
502class Med(Proto): 503 def __init__(self): 504 super().__init__("med", trud_codes_path=None) 505 self.checks = [ 506 ( 507 "Not Empty", 508 lambda codes: pd.Series([len(codes) > 0]), 509 lambda codes, codes_file: self.raise_exception( 510 InvalidCodesException( 511 CodesError( 512 f"Code list is empty", 513 codes=codes, 514 codes_file=codes_file, 515 mask=None, 516 code_type=self.name, 517 ) 518 ) 519 ), 520 ) 521 ]
Define checks as list of 3 tuple: (Message, Condition, Process)
- Message = The name of the condition (what is printed and logged)
- Condition = True if Passed, and False if Failed
- Process = Aims to resolve all issues that stop condition from passing (Do not change index!)
Inherited Members
524class Cprd(Proto): 525 def __init__(self): 526 super().__init__("cprd", trud_codes_path=None) 527 self.checks = [ 528 ( 529 "Not Empty", 530 lambda codes: pd.Series([len(codes) > 0]), 531 lambda codes, codes_file: self.raise_exception( 532 InvalidCodesException( 533 CodesError( 534 f"Code list is empty", 535 codes=codes, 536 codes_file=codes_file, 537 mask=None, 538 code_type=self.name, 539 ) 540 ) 541 ), 542 ) 543 ]
Define checks as list of 3 tuple: (Message, Condition, Process)
- Message = The name of the condition (what is printed and logged)
- Condition = True if Passed, and False if Failed
- Process = Aims to resolve all issues that stop condition from passing (Do not change index!)
Inherited Members
546class CodeTypeParser: 547 """A class used in InvalidCodesException to report an error if a code parser check fails""" 548 549 def __init__(self, trud_processed_dir: Path = trud.PROCESSED_PATH): 550 if not trud_processed_dir.exists() or not trud_processed_dir.is_dir(): 551 raise FileNotFoundError( 552 f"Cannot initialise parsers as the TRUD processed directory {trud_processed_dir} does not exist, please check that TRUD has been installed: acmc trud install" 553 ) 554 555 self.code_types = { 556 "read2": Read2(), 557 "read3": Read3(), 558 "icd10": Icd10(), 559 "snomed": Snomed(), 560 "opcs4": Opcs4(), 561 "atc": Atc(), 562 "med": Med(), 563 "cprd": Cprd(), 564 }
A class used in InvalidCodesException to report an error if a code parser check fails
549 def __init__(self, trud_processed_dir: Path = trud.PROCESSED_PATH): 550 if not trud_processed_dir.exists() or not trud_processed_dir.is_dir(): 551 raise FileNotFoundError( 552 f"Cannot initialise parsers as the TRUD processed directory {trud_processed_dir} does not exist, please check that TRUD has been installed: acmc trud install" 553 ) 554 555 self.code_types = { 556 "read2": Read2(), 557 "read3": Read3(), 558 "icd10": Icd10(), 559 "snomed": Snomed(), 560 "opcs4": Opcs4(), 561 "atc": Atc(), 562 "med": Med(), 563 "cprd": Cprd(), 564 }