Skip to content
Snippets Groups Projects
Commit e5cbdbd2 authored by mjbonifa's avatar mjbonifa
Browse files

changed phen/output directory to phen/map and added concept-sets directory

parent 66f7c9e9
No related branches found
No related tags found
No related merge requests found
......@@ -22,8 +22,12 @@ pd.set_option("mode.chained_assignment", None)
PHEN_DIR = 'phen'
DEFAULT_PHEN_PATH = Path('build') / PHEN_DIR
CODES_DIR = 'codes'
OUTPUT_DIR = 'output'
MAP_DIR = 'map'
CONCEPT_SET_DIR = 'concept-set'
DEFAULT_PHEN_DIR_LIST = [CODES_DIR, MAP_DIR, CONCEPT_SET_DIR]
CONFIG_FILE = 'config.json'
ERROR_FILE = 'errors.csv'
REPORT_FILE = 'report.md'
......@@ -64,6 +68,12 @@ def construct_git_url(remote_url):
new_netloc = f"{auth}@{parsed_url.netloc}"
return urlunparse((parsed_url.scheme, new_netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment))
def create_empty_git_dir(path):
"""Creates a directory with a .gitkeep file so that it's tracked in git"""
path.mkdir(exist_ok=True)
keep_path = path / '.gitkeep'
keep_path.touch(exist_ok=True)
def init(phen_dir, remote_url):
"""Initial phenotype directory as git repo with standard structure"""
print(f"Initialising Phenotype in directory: {phen_dir}")
......@@ -138,17 +148,8 @@ def init(phen_dir, remote_url):
return
print("Creating phen directory structure and config files")
# create codes directory
codes_path = phen_path / CODES_DIR
codes_path.mkdir(exist_ok=True)
keep_path = codes_path / '.gitkeep'
keep_path.touch(exist_ok=True)
# create concept sets directory
output_path = phen_path / OUTPUT_DIR
output_path.mkdir(exist_ok=True)
keep_path = output_path / '.gitkeep'
keep_path.touch(exist_ok=True)
for d in DEFAULT_PHEN_DIR_LIST:
create_empty_git_dir(phen_path / d)
# set initial version based on the number of commits in the repo, depending on how the repo was created
# e.g., with a README.md, then there will be some initial commits before the phen config is added
......@@ -175,8 +176,8 @@ def init(phen_dir, remote_url):
json.dump(config, f, indent=4)
# add to git repo and commit
repo.git.add(codes_path)
repo.git.add(output_path)
for d in DEFAULT_PHEN_DIR_LIST:
repo.git.add(phen_path / d)
repo.git.add(all=True)
repo.index.commit("initialised the phen git repo.")
......@@ -529,10 +530,10 @@ def map(phen_dir,
else:
output_filename = target_code_type + '_no_translate.csv'
output_path = phen_path / OUTPUT_DIR / output_filename
map_path = phen_path / MAP_DIR / output_filename
out.to_csv(output_path, index=False)
print("Saved to", output_path)
out.to_csv(map_path, index=False)
print("Saved translations to", map_path)
# Save Error File
error_path = phen_path / ERROR_FILE
......@@ -660,12 +661,12 @@ def diff(phen_dir, phen_old_dir):
raise ValueError(f"Unsupported filetype provided for report file {str(report_path.resolve())}")
# Get maps files from phenotype
old_output_path = old_phen_path / OUTPUT_DIR
new_output_path = new_phen_path / OUTPUT_DIR
old_map_path = old_phen_path / MAP_DIR
new_map_path = new_phen_path / MAP_DIR
# List files from output directories
old_output_files = [file.name for file in old_output_path.iterdir() if file.is_file() and not file.name.startswith('.')]
new_output_files = [file.name for file in new_output_path.iterdir() if file.is_file() and not file.name.startswith('.')]
old_output_files = [file.name for file in old_map_path.iterdir() if file.is_file() and not file.name.startswith('.')]
new_output_files = [file.name for file in new_map_path.iterdir() if file.is_file() and not file.name.startswith('.')]
# Convert the lists to sets for easy comparison
old_output_set = set(old_output_files)
......@@ -689,8 +690,8 @@ def diff(phen_dir, phen_old_dir):
report.write(f"\n\n## Compare concepts {str(old_phen_path.resolve())} to {str(new_phen_path.resolve())}\n\n")
# Compare common outputs between versions
for file in common_outputs:
old_output = old_output_path / file
new_output = new_output_path / file
old_output = old_map_path / file
new_output = new_map_path / file
df1 = pd.read_csv(old_output)
df1 = df1[["CONCEPT","CONCEPT_SET"]].groupby("CONCEPT_SET").count()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment