From 12c11d66696628b6bbc9e4d87b4391e72ba8c337 Mon Sep 17 00:00:00 2001
From: Jakub Dylag <jjd1c23@soton.ac.uk>
Date: Thu, 31 Oct 2024 19:51:30 +0000
Subject: [PATCH] output reproducible - order and uniformity in output files

---
 publish.py | 9 +++++----
 run.sh     | 4 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/publish.py b/publish.py
index b7335a4..bfd7301 100644
--- a/publish.py
+++ b/publish.py
@@ -9,11 +9,12 @@ def main(config):
 	else:
 		raise Exception("Concepts file must be '.csv' filetype")
 	
-	filename = config["concepts"][:-4]
-	
 	for name, concept in df.groupby("MELDB_concept"):
-		concept.sort_values(by="code")
-		concept.to_csv(os.path.join(config["output"], str(name)+".csv"), 
+		concept = concept.sort_values(by="code") #sort rows
+		concept = concept.dropna(how='all', axis=1)  #remove empty cols
+		concept = concept.reindex(sorted(concept.columns), axis=1) #sort cols alphabetically
+
+		concept.to_csv(os.path.join(config["output"], str(name)+".csv"), #save to csv
 					   index=False )
 		
 
diff --git a/run.sh b/run.sh
index 85acb40..b64772d 100644
--- a/run.sh
+++ b/run.sh
@@ -1,7 +1,7 @@
 #! /usr/bin/bash
 
-version="V3_2_7"
-previous="V3_2_6"
+version="V3_2_10"
+previous="V3_2_9"
 
 python main.py -r2 PHEN_assign_v3.json CONC_summary_working.xlsx
 mv output/MELD_concepts_read.csv output/${version}_MELD_concepts_readv2.csv
-- 
GitLab