From 4b77e5d349b735725db3c4d023eaa5fa8a671af4 Mon Sep 17 00:00:00 2001 From: Michael Boniface <m.j.boniface@soton.ac.uk> Date: Wed, 26 Feb 2025 22:11:32 +0000 Subject: [PATCH] fix: yaml library dump strips all quotes from the output and there's no configuration. SO you need to create a bespoke Dumper which has been added to a new module util.py because dumping yaml is done across many files. closes #41 --- acmc/omop.py | 11 +++++++++-- acmc/phen.py | 29 +++++++++++++++++++++++++---- acmc/trud.py | 11 +++++++++-- acmc/util.py | 7 +++++++ 4 files changed, 50 insertions(+), 8 deletions(-) create mode 100644 acmc/util.py diff --git a/acmc/omop.py b/acmc/omop.py index dd5a461..d3f1710 100644 --- a/acmc/omop.py +++ b/acmc/omop.py @@ -9,7 +9,7 @@ import json import yaml from pathlib import Path -from acmc import logging_config +from acmc import util, logging_config # setup logging logger = logging_config.setup_logger() @@ -123,7 +123,14 @@ def write_version_file(version): """Writes the OMOP vocaburaries and version to a file""" vocabularies["version"] = version with open(VERSION_PATH, "w") as file: - yaml.dump(vocabularies, file, default_flow_style=False, sort_keys=False) + yaml.dump( + vocabularies, + file, + Dumper=util.QuotedDumper, + default_flow_style=False, + sort_keys=False, + default_style='"', + ) def clear(db_path): diff --git a/acmc/phen.py b/acmc/phen.py index 50b166b..23c8ac7 100644 --- a/acmc/phen.py +++ b/acmc/phen.py @@ -17,7 +17,7 @@ from pathlib import Path from urllib.parse import urlparse, urlunparse import acmc -from acmc import trud, omop, parse +from acmc import trud, omop, parse, util # setup logging import acmc.logging_config as lc @@ -275,7 +275,14 @@ def init(phen_dir, remote_url): } with open(phen_path / CONFIG_FILE, "w") as file: - yaml.dump(config, file, default_flow_style=False, sort_keys=False) + yaml.dump( + config, + file, + Dumper=util.QuotedDumper, + default_flow_style=False, + sort_keys=False, + default_style='"', + ) # add git ignore ignore_content = """# Ignore SQLite database files @@ -611,7 +618,14 @@ def write_vocab_version(phen_path): } with open(phen_path / VOCAB_VERSION_FILE, "w") as file: - yaml.dump(version_data, file, default_flow_style=False, sort_keys=False) + yaml.dump( + version_data, + file, + Dumper=util.QuotedDumper, + default_flow_style=False, + sort_keys=False, + default_style='"', + ) def map(phen_dir, target_code_type): @@ -790,7 +804,14 @@ def publish(phen_dir, remote_url): logger.debug(f"New version: {version}") config["phenotype"]["version"] = version with open(config_path, "w") as file: - yaml.dump(config, file, default_flow_style=False, sort_keys=False) + yaml.dump( + config, + file, + Dumper=util.QuotedDumper, + default_flow_style=False, + sort_keys=False, + default_style='"', + ) # Add and commit changes to repo commit_message = f"Committing updates to phenotype {phen_path}" diff --git a/acmc/trud.py b/acmc/trud.py index 93298f9..08f5c4a 100644 --- a/acmc/trud.py +++ b/acmc/trud.py @@ -11,7 +11,7 @@ import yaml from pathlib import Path # setup logging -import acmc.logging_config as lc +from acmc import util, logging_config as lc logger = lc.setup_logger() @@ -384,7 +384,14 @@ def install(): data = [{k: v for k, v in d.items() if k != "extract"} for d in items] # save TRUD versions to file to main record of what was downloaded with open(VERSION_PATH, "w") as file: - yaml.dump(data, file, default_flow_style=False, sort_keys=False) + yaml.dump( + data, + file, + Dumper=util.QuotedDumper, + default_flow_style=False, + sort_keys=False, + default_style='"', + ) # Validate and process each item ID for item in items: diff --git a/acmc/util.py b/acmc/util.py new file mode 100644 index 0000000..01bb458 --- /dev/null +++ b/acmc/util.py @@ -0,0 +1,7 @@ +import yaml + + +# Custom Dumper to retain quotes on strings in yaml library +class QuotedDumper(yaml.Dumper): + def increase_indent(self, flow=False, indentless=False): + return super(QuotedDumper, self).increase_indent(flow, indentless) -- GitLab