From 4b77e5d349b735725db3c4d023eaa5fa8a671af4 Mon Sep 17 00:00:00 2001
From: Michael Boniface <m.j.boniface@soton.ac.uk>
Date: Wed, 26 Feb 2025 22:11:32 +0000
Subject: [PATCH] fix: yaml library dump strips all quotes from the output and
 there's no configuration. SO you need to create a bespoke Dumper which has
 been added to a new module util.py because dumping yaml is done across many
 files. closes #41

---
 acmc/omop.py | 11 +++++++++--
 acmc/phen.py | 29 +++++++++++++++++++++++++----
 acmc/trud.py | 11 +++++++++--
 acmc/util.py |  7 +++++++
 4 files changed, 50 insertions(+), 8 deletions(-)
 create mode 100644 acmc/util.py

diff --git a/acmc/omop.py b/acmc/omop.py
index dd5a461..d3f1710 100644
--- a/acmc/omop.py
+++ b/acmc/omop.py
@@ -9,7 +9,7 @@ import json
 import yaml
 from pathlib import Path
 
-from acmc import logging_config
+from acmc import util, logging_config
 
 # setup logging
 logger = logging_config.setup_logger()
@@ -123,7 +123,14 @@ def write_version_file(version):
     """Writes the OMOP vocaburaries and version to a file"""
     vocabularies["version"] = version
     with open(VERSION_PATH, "w") as file:
-        yaml.dump(vocabularies, file, default_flow_style=False, sort_keys=False)
+        yaml.dump(
+            vocabularies,
+            file,
+            Dumper=util.QuotedDumper,
+            default_flow_style=False,
+            sort_keys=False,
+            default_style='"',
+        )
 
 
 def clear(db_path):
diff --git a/acmc/phen.py b/acmc/phen.py
index 50b166b..23c8ac7 100644
--- a/acmc/phen.py
+++ b/acmc/phen.py
@@ -17,7 +17,7 @@ from pathlib import Path
 from urllib.parse import urlparse, urlunparse
 
 import acmc
-from acmc import trud, omop, parse
+from acmc import trud, omop, parse, util
 
 # setup logging
 import acmc.logging_config as lc
@@ -275,7 +275,14 @@ def init(phen_dir, remote_url):
     }
 
     with open(phen_path / CONFIG_FILE, "w") as file:
-        yaml.dump(config, file, default_flow_style=False, sort_keys=False)
+        yaml.dump(
+            config,
+            file,
+            Dumper=util.QuotedDumper,
+            default_flow_style=False,
+            sort_keys=False,
+            default_style='"',
+        )
 
     # add git ignore
     ignore_content = """# Ignore SQLite database files
@@ -611,7 +618,14 @@ def write_vocab_version(phen_path):
     }
 
     with open(phen_path / VOCAB_VERSION_FILE, "w") as file:
-        yaml.dump(version_data, file, default_flow_style=False, sort_keys=False)
+        yaml.dump(
+            version_data,
+            file,
+            Dumper=util.QuotedDumper,
+            default_flow_style=False,
+            sort_keys=False,
+            default_style='"',
+        )
 
 
 def map(phen_dir, target_code_type):
@@ -790,7 +804,14 @@ def publish(phen_dir, remote_url):
     logger.debug(f"New version: {version}")
     config["phenotype"]["version"] = version
     with open(config_path, "w") as file:
-        yaml.dump(config, file, default_flow_style=False, sort_keys=False)
+        yaml.dump(
+            config,
+            file,
+            Dumper=util.QuotedDumper,
+            default_flow_style=False,
+            sort_keys=False,
+            default_style='"',
+        )
 
     # Add and commit changes to repo
     commit_message = f"Committing updates to phenotype {phen_path}"
diff --git a/acmc/trud.py b/acmc/trud.py
index 93298f9..08f5c4a 100644
--- a/acmc/trud.py
+++ b/acmc/trud.py
@@ -11,7 +11,7 @@ import yaml
 from pathlib import Path
 
 # setup logging
-import acmc.logging_config as lc
+from acmc import util, logging_config as lc
 
 logger = lc.setup_logger()
 
@@ -384,7 +384,14 @@ def install():
     data = [{k: v for k, v in d.items() if k != "extract"} for d in items]
     # save TRUD versions to file to main record of what was downloaded
     with open(VERSION_PATH, "w") as file:
-        yaml.dump(data, file, default_flow_style=False, sort_keys=False)
+        yaml.dump(
+            data,
+            file,
+            Dumper=util.QuotedDumper,
+            default_flow_style=False,
+            sort_keys=False,
+            default_style='"',
+        )
 
     # Validate and process each item ID
     for item in items:
diff --git a/acmc/util.py b/acmc/util.py
new file mode 100644
index 0000000..01bb458
--- /dev/null
+++ b/acmc/util.py
@@ -0,0 +1,7 @@
+import yaml
+
+
+# Custom Dumper to retain quotes on strings in yaml library
+class QuotedDumper(yaml.Dumper):
+    def increase_indent(self, flow=False, indentless=False):
+        return super(QuotedDumper, self).increase_indent(flow, indentless)
-- 
GitLab