Skip to content
Snippets Groups Projects
Commit e5cbdbd2 authored by mjbonifa's avatar mjbonifa
Browse files

changed phen/output directory to phen/map and added concept-sets directory

parent 66f7c9e9
Branches
Tags
No related merge requests found
...@@ -22,8 +22,12 @@ pd.set_option("mode.chained_assignment", None) ...@@ -22,8 +22,12 @@ pd.set_option("mode.chained_assignment", None)
PHEN_DIR = 'phen' PHEN_DIR = 'phen'
DEFAULT_PHEN_PATH = Path('build') / PHEN_DIR DEFAULT_PHEN_PATH = Path('build') / PHEN_DIR
CODES_DIR = 'codes' CODES_DIR = 'codes'
OUTPUT_DIR = 'output' MAP_DIR = 'map'
CONCEPT_SET_DIR = 'concept-set'
DEFAULT_PHEN_DIR_LIST = [CODES_DIR, MAP_DIR, CONCEPT_SET_DIR]
CONFIG_FILE = 'config.json' CONFIG_FILE = 'config.json'
ERROR_FILE = 'errors.csv' ERROR_FILE = 'errors.csv'
REPORT_FILE = 'report.md' REPORT_FILE = 'report.md'
...@@ -64,6 +68,12 @@ def construct_git_url(remote_url): ...@@ -64,6 +68,12 @@ def construct_git_url(remote_url):
new_netloc = f"{auth}@{parsed_url.netloc}" new_netloc = f"{auth}@{parsed_url.netloc}"
return urlunparse((parsed_url.scheme, new_netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) return urlunparse((parsed_url.scheme, new_netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment))
def create_empty_git_dir(path):
"""Creates a directory with a .gitkeep file so that it's tracked in git"""
path.mkdir(exist_ok=True)
keep_path = path / '.gitkeep'
keep_path.touch(exist_ok=True)
def init(phen_dir, remote_url): def init(phen_dir, remote_url):
"""Initial phenotype directory as git repo with standard structure""" """Initial phenotype directory as git repo with standard structure"""
print(f"Initialising Phenotype in directory: {phen_dir}") print(f"Initialising Phenotype in directory: {phen_dir}")
...@@ -138,17 +148,8 @@ def init(phen_dir, remote_url): ...@@ -138,17 +148,8 @@ def init(phen_dir, remote_url):
return return
print("Creating phen directory structure and config files") print("Creating phen directory structure and config files")
# create codes directory for d in DEFAULT_PHEN_DIR_LIST:
codes_path = phen_path / CODES_DIR create_empty_git_dir(phen_path / d)
codes_path.mkdir(exist_ok=True)
keep_path = codes_path / '.gitkeep'
keep_path.touch(exist_ok=True)
# create concept sets directory
output_path = phen_path / OUTPUT_DIR
output_path.mkdir(exist_ok=True)
keep_path = output_path / '.gitkeep'
keep_path.touch(exist_ok=True)
# set initial version based on the number of commits in the repo, depending on how the repo was created # set initial version based on the number of commits in the repo, depending on how the repo was created
# e.g., with a README.md, then there will be some initial commits before the phen config is added # e.g., with a README.md, then there will be some initial commits before the phen config is added
...@@ -175,8 +176,8 @@ def init(phen_dir, remote_url): ...@@ -175,8 +176,8 @@ def init(phen_dir, remote_url):
json.dump(config, f, indent=4) json.dump(config, f, indent=4)
# add to git repo and commit # add to git repo and commit
repo.git.add(codes_path) for d in DEFAULT_PHEN_DIR_LIST:
repo.git.add(output_path) repo.git.add(phen_path / d)
repo.git.add(all=True) repo.git.add(all=True)
repo.index.commit("initialised the phen git repo.") repo.index.commit("initialised the phen git repo.")
...@@ -529,10 +530,10 @@ def map(phen_dir, ...@@ -529,10 +530,10 @@ def map(phen_dir,
else: else:
output_filename = target_code_type + '_no_translate.csv' output_filename = target_code_type + '_no_translate.csv'
output_path = phen_path / OUTPUT_DIR / output_filename map_path = phen_path / MAP_DIR / output_filename
out.to_csv(output_path, index=False) out.to_csv(map_path, index=False)
print("Saved to", output_path) print("Saved translations to", map_path)
# Save Error File # Save Error File
error_path = phen_path / ERROR_FILE error_path = phen_path / ERROR_FILE
...@@ -660,12 +661,12 @@ def diff(phen_dir, phen_old_dir): ...@@ -660,12 +661,12 @@ def diff(phen_dir, phen_old_dir):
raise ValueError(f"Unsupported filetype provided for report file {str(report_path.resolve())}") raise ValueError(f"Unsupported filetype provided for report file {str(report_path.resolve())}")
# Get maps files from phenotype # Get maps files from phenotype
old_output_path = old_phen_path / OUTPUT_DIR old_map_path = old_phen_path / MAP_DIR
new_output_path = new_phen_path / OUTPUT_DIR new_map_path = new_phen_path / MAP_DIR
# List files from output directories # List files from output directories
old_output_files = [file.name for file in old_output_path.iterdir() if file.is_file() and not file.name.startswith('.')] old_output_files = [file.name for file in old_map_path.iterdir() if file.is_file() and not file.name.startswith('.')]
new_output_files = [file.name for file in new_output_path.iterdir() if file.is_file() and not file.name.startswith('.')] new_output_files = [file.name for file in new_map_path.iterdir() if file.is_file() and not file.name.startswith('.')]
# Convert the lists to sets for easy comparison # Convert the lists to sets for easy comparison
old_output_set = set(old_output_files) old_output_set = set(old_output_files)
...@@ -689,8 +690,8 @@ def diff(phen_dir, phen_old_dir): ...@@ -689,8 +690,8 @@ def diff(phen_dir, phen_old_dir):
report.write(f"\n\n## Compare concepts {str(old_phen_path.resolve())} to {str(new_phen_path.resolve())}\n\n") report.write(f"\n\n## Compare concepts {str(old_phen_path.resolve())} to {str(new_phen_path.resolve())}\n\n")
# Compare common outputs between versions # Compare common outputs between versions
for file in common_outputs: for file in common_outputs:
old_output = old_output_path / file old_output = old_map_path / file
new_output = new_output_path / file new_output = new_map_path / file
df1 = pd.read_csv(old_output) df1 = pd.read_csv(old_output)
df1 = df1[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count() df1 = df1[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment