diff --git a/phen.py b/phen.py index 48c7d1b0fcb16724eb60930b84528e4a1627a53b..321a71cfc0b619e22820e3b97bfaa68657bbc8d3 100644 --- a/phen.py +++ b/phen.py @@ -22,8 +22,12 @@ pd.set_option("mode.chained_assignment", None) PHEN_DIR = 'phen' DEFAULT_PHEN_PATH = Path('build') / PHEN_DIR + CODES_DIR = 'codes' -OUTPUT_DIR = 'output' +MAP_DIR = 'map' +CONCEPT_SET_DIR = 'concept-set' +DEFAULT_PHEN_DIR_LIST = [CODES_DIR, MAP_DIR, CONCEPT_SET_DIR] + CONFIG_FILE = 'config.json' ERROR_FILE = 'errors.csv' REPORT_FILE = 'report.md' @@ -64,6 +68,12 @@ def construct_git_url(remote_url): new_netloc = f"{auth}@{parsed_url.netloc}" return urlunparse((parsed_url.scheme, new_netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) +def create_empty_git_dir(path): + """Creates a directory with a .gitkeep file so that it's tracked in git""" + path.mkdir(exist_ok=True) + keep_path = path / '.gitkeep' + keep_path.touch(exist_ok=True) + def init(phen_dir, remote_url): """Initial phenotype directory as git repo with standard structure""" print(f"Initialising Phenotype in directory: {phen_dir}") @@ -138,17 +148,8 @@ def init(phen_dir, remote_url): return print("Creating phen directory structure and config files") - # create codes directory - codes_path = phen_path / CODES_DIR - codes_path.mkdir(exist_ok=True) - keep_path = codes_path / '.gitkeep' - keep_path.touch(exist_ok=True) - - # create concept sets directory - output_path = phen_path / OUTPUT_DIR - output_path.mkdir(exist_ok=True) - keep_path = output_path / '.gitkeep' - keep_path.touch(exist_ok=True) + for d in DEFAULT_PHEN_DIR_LIST: + create_empty_git_dir(phen_path / d) # set initial version based on the number of commits in the repo, depending on how the repo was created # e.g., with a README.md, then there will be some initial commits before the phen config is added @@ -175,8 +176,8 @@ def init(phen_dir, remote_url): json.dump(config, f, indent=4) # add to git repo and commit - repo.git.add(codes_path) - repo.git.add(output_path) + for d in DEFAULT_PHEN_DIR_LIST: + repo.git.add(phen_path / d) repo.git.add(all=True) repo.index.commit("initialised the phen git repo.") @@ -529,10 +530,10 @@ def map(phen_dir, else: output_filename = target_code_type + '_no_translate.csv' - output_path = phen_path / OUTPUT_DIR / output_filename + map_path = phen_path / MAP_DIR / output_filename - out.to_csv(output_path, index=False) - print("Saved to", output_path) + out.to_csv(map_path, index=False) + print("Saved translations to", map_path) # Save Error File error_path = phen_path / ERROR_FILE @@ -660,12 +661,12 @@ def diff(phen_dir, phen_old_dir): raise ValueError(f"Unsupported filetype provided for report file {str(report_path.resolve())}") # Get maps files from phenotype - old_output_path = old_phen_path / OUTPUT_DIR - new_output_path = new_phen_path / OUTPUT_DIR + old_map_path = old_phen_path / MAP_DIR + new_map_path = new_phen_path / MAP_DIR # List files from output directories - old_output_files = [file.name for file in old_output_path.iterdir() if file.is_file() and not file.name.startswith('.')] - new_output_files = [file.name for file in new_output_path.iterdir() if file.is_file() and not file.name.startswith('.')] + old_output_files = [file.name for file in old_map_path.iterdir() if file.is_file() and not file.name.startswith('.')] + new_output_files = [file.name for file in new_map_path.iterdir() if file.is_file() and not file.name.startswith('.')] # Convert the lists to sets for easy comparison old_output_set = set(old_output_files) @@ -689,8 +690,8 @@ def diff(phen_dir, phen_old_dir): report.write(f"\n\n## Compare concepts {str(old_phen_path.resolve())} to {str(new_phen_path.resolve())}\n\n") # Compare common outputs between versions for file in common_outputs: - old_output = old_output_path / file - new_output = new_output_path / file + old_output = old_map_path / file + new_output = new_map_path / file df1 = pd.read_csv(old_output) df1 = df1[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count()