From f4b89b6f8edc33f80b84b9ed2d3a85e13a8644b3 Mon Sep 17 00:00:00 2001
From: Fanis Baikas <fan.baikas@gmail.com>
Date: Thu, 1 Feb 2024 11:41:28 +0000
Subject: [PATCH] Created README.md and license files. Added run_exp1.sh and
 run_exp2.sh scripts. Fixed bug in sequence_finder.py.

---
 LICENSE                | 21 +++++++++++++
 README.md              | 69 ++++++++++++++++++++++++++++++++++++++++++
 scripts/run_exp1.sh    | 24 +++++++++++++++
 scripts/run_exp2.sh    | 28 +++++++++++++++++
 src/sequence_finder.py |  4 +--
 5 files changed, 144 insertions(+), 2 deletions(-)
 create mode 100644 LICENSE
 create mode 100644 README.md
 create mode 100644 scripts/run_exp1.sh
 create mode 100644 scripts/run_exp2.sh

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5522eaa
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 [fullname]
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..09ec069
--- /dev/null
+++ b/README.md
@@ -0,0 +1,69 @@
+# EvoDSS: Evolutinary Data Subset Selection for Incremental Learning on Memory-Constrained Systems
+
+### This repository contains the open-source implementation for EvoDSS, a genetic algorithm for selecting subsets of data examples that need to be preserved in non-volatile memory for further reuse, while learning in a class-incremental supervised setting. This work has been submitted to GECCO 2024 (currently under review).
+
+
+## Repository folder structure
+This the original folder structure of the repo before running the scripts to generate results.
+```
+.
+├── src         # Python source code 
+├── scripts     # Bash scripts for generating results
+└── tests       # Unit tests 
+```
+
+After running the scripts, the folder structure changes to:
+```
+.
+├── artifacts   # Results from intermediate computations
+├── datasets    # Dataset file, downloaded by PyTorch library
+├── plots       # Plot figures created by generate_plots.py 
+├── results     # Experimental results 
+├── src         # Python source code 
+├── scripts     # Bash scripts for generating results
+└── tests       # Unit tests 
+```
+
+## Setup
+### Setup the conda environment
+Clone the repository and setup the conda environment to run EvoDSS code:
+```
+cd <path_to_evodss>
+conda env create -f env.yml
+conda activate evodss
+```
+
+## Reproduce experiments
+
+To reproduce experimental results, follow the steps described below in the exact order.
+
+(1) Generate the results for random balanced subsets:
+```
+cd scripts
+sh random_subsets_script.sh mnist
+sh random_subsets_script.sh fashion
+```
+
+(2) Run the greedy algorithm to find the "low" and "high" accuracy sequences:
+```
+python ../src/sequence_finder.py MNIST
+python ../src/sequence_finder.py FashionMNIST
+```
+
+(3) Run experiment 1. The default list of subset proportions is `[0.005, 0.01, ..., 0.256]`. For every subset proportion, 
+experiment is repeated for 20 trials. To change the subset proportions list and number of trials, modify the 
+`subset_props` and `num_of_trials` parameters in `run_exp1.sh`.
+
+```
+sh run_exp1.sh mnist
+sh run_exp1.sh fashion
+```
+
+(4) Run experiment 2. The default list of memory_sizes is `[0.125, 2, 16]`. For every memory size, the experiment is
+repeated for 20 trials. To change the list of memory sizes and number of trials, modify the `memory_sizes` and
+`num_of_trials` parameters in `run_exp2.sh`.
+ 
+(5) Generate plots:
+```
+python ../src/generate_plots.py
+```
\ No newline at end of file
diff --git a/scripts/run_exp1.sh b/scripts/run_exp1.sh
new file mode 100644
index 0000000..3e1c15e
--- /dev/null
+++ b/scripts/run_exp1.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+# List of subset proportions
+subset_props=("0.0005" "0.001" "0.002" "0.004" "0.008" "0.016" "0.032" "0.064" "0.128" "0.256")
+num_of_trials=20
+
+if [ "$1" = "fashion" ]; then
+  dataset_name="FashionMNIST"
+elif [ "$1" = "mnist" ]; then
+  dataset_name="MNIST"
+else
+  echo "Incorrect dataset name"
+  exit 1
+fi
+
+# Run the 1st experiment for all subset proportions
+for subset_prop in "${subset_props[@]}"; do
+  for ((trial=1; trial<=num_of_trials; trial++)); do
+      echo "Now running: evo_subset_search.py dataset_name=${dataset_name}, subset_prop=${subset_prop}, trial=${trial}"
+      python ../src/evo_subset_search.py $dataset_name $subset_prop $trial
+  done
+done
+
+echo "Experiment 1 done!"
\ No newline at end of file
diff --git a/scripts/run_exp2.sh b/scripts/run_exp2.sh
new file mode 100644
index 0000000..f00ff15
--- /dev/null
+++ b/scripts/run_exp2.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+# List of subset proportions
+memory_sizes=("0.125" "2" "16")
+sequences=("low" "high")
+num_of_trials=20
+
+if [ "$1" = "fashion" ]; then
+  dataset_name="FashionMNIST"
+elif [ "$1" = "mnist" ]; then
+  dataset_name="MNIST"
+else
+  echo "Incorrect dataset name"
+  exit 1
+fi
+
+# Run the 2nd experiment for all memory sizes
+for memory_size in "${memory_sizes[@]}"; do
+  for sequence in "${sequences[@]}"; do
+    for ((trial=1; trial<=num_of_trials; trial++)); do
+        echo "Now running: class_incremental_learning.py dataset_name=${dataset_name}, memory_size=${memory_size}, sequence=${sequence}, trial=${trial}"
+        python ../src/class_incremental_learning.py $dataset_name $memory_size $sequence $trial
+    done
+  done
+done
+
+echo "Experiment 2 done!"
+
diff --git a/src/sequence_finder.py b/src/sequence_finder.py
index 6871e7c..92b366e 100644
--- a/src/sequence_finder.py
+++ b/src/sequence_finder.py
@@ -34,7 +34,7 @@ def find_next_class(Q, available_classes, M, test_example_idxs, acc_list, high=T
             new_test_example_idxs = torch.nonzero((test_set.targets == class_num))
             new_test_example_idxs = torch.squeeze(new_test_example_idxs).numpy().tolist()
 
-            acc = ACC(classifier, X_test[test_example_idxs + new_test_example_idxs], y_test[test_example_idxs + new_test_example_idxs],
+            acc = ACC(classifier, X_test, y_test,
                       subset_idxs=M + new_class_idxs, test_subset_idxs=test_example_idxs + new_test_example_idxs)
 
             # print('Q =', Q, ', new class', class_num, ' acc = ' + f'{acc:.5f}')
@@ -77,7 +77,7 @@ def find_first_pair(available_classes):
 
             M, test_example_idxs = get_train_and_test_examples_of_class_pair(Q)
 
-            acc = ACC(classifier, X_test[test_example_idxs], y_test[test_example_idxs], subset_idxs=M,
+            acc = ACC(classifier, X_test, y_test, subset_idxs=M,
                       test_subset_idxs=test_example_idxs)
 
             print('Q =', Q, ' acc = ' + f'{acc:.5f}')
-- 
GitLab