From f4b89b6f8edc33f80b84b9ed2d3a85e13a8644b3 Mon Sep 17 00:00:00 2001 From: Fanis Baikas <fan.baikas@gmail.com> Date: Thu, 1 Feb 2024 11:41:28 +0000 Subject: [PATCH] Created README.md and license files. Added run_exp1.sh and run_exp2.sh scripts. Fixed bug in sequence_finder.py. --- LICENSE | 21 +++++++++++++ README.md | 69 ++++++++++++++++++++++++++++++++++++++++++ scripts/run_exp1.sh | 24 +++++++++++++++ scripts/run_exp2.sh | 28 +++++++++++++++++ src/sequence_finder.py | 4 +-- 5 files changed, 144 insertions(+), 2 deletions(-) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 scripts/run_exp1.sh create mode 100644 scripts/run_exp2.sh diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5522eaa --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 [fullname] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..09ec069 --- /dev/null +++ b/README.md @@ -0,0 +1,69 @@ +# EvoDSS: Evolutinary Data Subset Selection for Incremental Learning on Memory-Constrained Systems + +### This repository contains the open-source implementation for EvoDSS, a genetic algorithm for selecting subsets of data examples that need to be preserved in non-volatile memory for further reuse, while learning in a class-incremental supervised setting. This work has been submitted to GECCO 2024 (currently under review). + + +## Repository folder structure +This the original folder structure of the repo before running the scripts to generate results. +``` +. +├── src # Python source code +├── scripts # Bash scripts for generating results +└── tests # Unit tests +``` + +After running the scripts, the folder structure changes to: +``` +. +├── artifacts # Results from intermediate computations +├── datasets # Dataset file, downloaded by PyTorch library +├── plots # Plot figures created by generate_plots.py +├── results # Experimental results +├── src # Python source code +├── scripts # Bash scripts for generating results +└── tests # Unit tests +``` + +## Setup +### Setup the conda environment +Clone the repository and setup the conda environment to run EvoDSS code: +``` +cd <path_to_evodss> +conda env create -f env.yml +conda activate evodss +``` + +## Reproduce experiments + +To reproduce experimental results, follow the steps described below in the exact order. + +(1) Generate the results for random balanced subsets: +``` +cd scripts +sh random_subsets_script.sh mnist +sh random_subsets_script.sh fashion +``` + +(2) Run the greedy algorithm to find the "low" and "high" accuracy sequences: +``` +python ../src/sequence_finder.py MNIST +python ../src/sequence_finder.py FashionMNIST +``` + +(3) Run experiment 1. The default list of subset proportions is `[0.005, 0.01, ..., 0.256]`. For every subset proportion, +experiment is repeated for 20 trials. To change the subset proportions list and number of trials, modify the +`subset_props` and `num_of_trials` parameters in `run_exp1.sh`. + +``` +sh run_exp1.sh mnist +sh run_exp1.sh fashion +``` + +(4) Run experiment 2. The default list of memory_sizes is `[0.125, 2, 16]`. For every memory size, the experiment is +repeated for 20 trials. To change the list of memory sizes and number of trials, modify the `memory_sizes` and +`num_of_trials` parameters in `run_exp2.sh`. + +(5) Generate plots: +``` +python ../src/generate_plots.py +``` \ No newline at end of file diff --git a/scripts/run_exp1.sh b/scripts/run_exp1.sh new file mode 100644 index 0000000..3e1c15e --- /dev/null +++ b/scripts/run_exp1.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +# List of subset proportions +subset_props=("0.0005" "0.001" "0.002" "0.004" "0.008" "0.016" "0.032" "0.064" "0.128" "0.256") +num_of_trials=20 + +if [ "$1" = "fashion" ]; then + dataset_name="FashionMNIST" +elif [ "$1" = "mnist" ]; then + dataset_name="MNIST" +else + echo "Incorrect dataset name" + exit 1 +fi + +# Run the 1st experiment for all subset proportions +for subset_prop in "${subset_props[@]}"; do + for ((trial=1; trial<=num_of_trials; trial++)); do + echo "Now running: evo_subset_search.py dataset_name=${dataset_name}, subset_prop=${subset_prop}, trial=${trial}" + python ../src/evo_subset_search.py $dataset_name $subset_prop $trial + done +done + +echo "Experiment 1 done!" \ No newline at end of file diff --git a/scripts/run_exp2.sh b/scripts/run_exp2.sh new file mode 100644 index 0000000..f00ff15 --- /dev/null +++ b/scripts/run_exp2.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +# List of subset proportions +memory_sizes=("0.125" "2" "16") +sequences=("low" "high") +num_of_trials=20 + +if [ "$1" = "fashion" ]; then + dataset_name="FashionMNIST" +elif [ "$1" = "mnist" ]; then + dataset_name="MNIST" +else + echo "Incorrect dataset name" + exit 1 +fi + +# Run the 2nd experiment for all memory sizes +for memory_size in "${memory_sizes[@]}"; do + for sequence in "${sequences[@]}"; do + for ((trial=1; trial<=num_of_trials; trial++)); do + echo "Now running: class_incremental_learning.py dataset_name=${dataset_name}, memory_size=${memory_size}, sequence=${sequence}, trial=${trial}" + python ../src/class_incremental_learning.py $dataset_name $memory_size $sequence $trial + done + done +done + +echo "Experiment 2 done!" + diff --git a/src/sequence_finder.py b/src/sequence_finder.py index 6871e7c..92b366e 100644 --- a/src/sequence_finder.py +++ b/src/sequence_finder.py @@ -34,7 +34,7 @@ def find_next_class(Q, available_classes, M, test_example_idxs, acc_list, high=T new_test_example_idxs = torch.nonzero((test_set.targets == class_num)) new_test_example_idxs = torch.squeeze(new_test_example_idxs).numpy().tolist() - acc = ACC(classifier, X_test[test_example_idxs + new_test_example_idxs], y_test[test_example_idxs + new_test_example_idxs], + acc = ACC(classifier, X_test, y_test, subset_idxs=M + new_class_idxs, test_subset_idxs=test_example_idxs + new_test_example_idxs) # print('Q =', Q, ', new class', class_num, ' acc = ' + f'{acc:.5f}') @@ -77,7 +77,7 @@ def find_first_pair(available_classes): M, test_example_idxs = get_train_and_test_examples_of_class_pair(Q) - acc = ACC(classifier, X_test[test_example_idxs], y_test[test_example_idxs], subset_idxs=M, + acc = ACC(classifier, X_test, y_test, subset_idxs=M, test_subset_idxs=test_example_idxs) print('Q =', Q, ' acc = ' + f'{acc:.5f}') -- GitLab