diff --git a/README.md b/README.md index 3bd9fd947a4d5833ea962414ccff5d58b0ad0bfd..29629275d6c84e0c2728f0516469932dbd0734c4 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,22 @@ Please execute the following process: ```bash acmc omop install -f <path to downloaded OMOP zip file> -v <release version from email> ``` +Expected output: + +```bash +[INFO] - Installing OMOP from zip file: ../data/acmc-omop.zip +[INFO] - Extracted OMOP zip file ../data/acmc-omop.zip to vocab/omop/ +[INFO] - Processing 1 of 9 tables: vocab/omop/CONCEPT.csv +[INFO] - Processing 2 of 9 tables: vocab/omop/DOMAIN.csv +[INFO] - Processing 3 of 9 tables: vocab/omop/CONCEPT_CLASS.csv +[INFO] - Processing 4 of 9 tables: vocab/omop/RELATIONSHIP.csv +[INFO] - Processing 5 of 9 tables: vocab/omop/DRUG_STRENGTH.csv +[INFO] - Processing 6 of 9 tables: vocab/omop/VOCABULARY.csv +[INFO] - Processing 7 of 9 tables: vocab/omop/CONCEPT_SYNONYM.csv +[INFO] - Processing 8 of 9 tables: vocab/omop/CONCEPT_ANCESTOR.csv +[INFO] - Processing 9 of 9 tables: vocab/omop/CONCEPT_RELATIONSHIP.csv +[INFO] - OMOP installation completed +``` --- @@ -451,7 +467,7 @@ The `phen` command is used phenotype-related operations. ## License -MIT License +This project is licensed under the terms of the **Apache License 2.0**. For more details, please see the full license [here](http://www.apache.org/licenses/LICENSE-2.0). ## Support diff --git a/acmc/omop.py b/acmc/omop.py index fb40413801822dc2b5dcc13e9c02a38b9f052249..e63cc278311ec4d59c2b4b6bf4f2e7f9fa673ee6 100644 --- a/acmc/omop.py +++ b/acmc/omop.py @@ -42,7 +42,7 @@ vocabularies = { #Populate SQLite3 Database with default OMOP CONCEPTS def install (omop_zip_file, version): """Installs the OMOP release csv files in a file-based sql database""" - logger.info(f"Installing OMOP downloads {omop_zip_file}") + logger.info(f"Installing OMOP from zip file: {omop_zip_file}") omop_zip_path = Path(omop_zip_file) # Check if the file exists and is a ZIP file @@ -74,22 +74,24 @@ def install (omop_zip_file, version): # connect to database, if it does not exist it will be created conn = sqlite3.connect(DB_PATH) # Iterate through files in the folder - for filename in os.listdir(VOCAB_PATH): - if filename.endswith(".csv"): # Check if the file is a CSV - file_path = os.path.join(VOCAB_PATH, filename) - try: - logger.info(f"Reading table: {file_path}") - # read the CSV file with the specified delimiter - df = pd.read_csv(file_path, delimiter="\t", low_memory=False) - table_name = os.path.splitext(os.path.basename(file_path))[0] #Get name of file + csv_files = list(VOCAB_PATH.glob("*.csv")) + total_tables_count = len(csv_files) + table_count = 1 + for filename in csv_files: + try: + logger.info(f"Processing {table_count} of {total_tables_count} tables: {filename}") + # read the CSV file with the specified delimiter + df = pd.read_csv(filename, delimiter="\t", low_memory=False) + + # export Table to sqlite db + df.to_sql(filename.stem, conn, if_exists='replace', index=False) - # export Table to sqlite db - df.to_sql(table_name, conn, if_exists='replace', index=False) + # add to the metadata + vocabularies["tables"].append(filename.stem) + table_count = table_count + 1 + except Exception as e: + raise Exception(f"Error reading file {filename}: {e}") - # add to the metadata - vocabularies["tables"].append(filename) - except Exception as e: - raise Exception(f"Error reading file {file_path}: {e}") conn.close() # write version file