diff --git a/Model_KS_02.py b/Model_KS_02.py deleted file mode 100644 index 307a132da97748a1bb16d557bfe840d6b61fd5cc..0000000000000000000000000000000000000000 --- a/Model_KS_02.py +++ /dev/null @@ -1,203 +0,0 @@ -###################Dependency Imports################# -import tensorflow as tf -import larq as lq -import numpy as np -import os -import matplotlib - -matplotlib.use('Agg') -import matplotlib.pyplot as plot -from sklearn.metrics import confusion_matrix -import time -import datetime - -#################OUR IMPORTS################## -import PreProcessing -import ModelFunctions - -##Sharath's Imports -import metrics -import utils - -class_dict_classToNum = { - 'brakes squeaking': 0, - 'car': 1, - 'children': 2, - 'large vehicle': 3, - 'people speaking': 4, - 'people walking': 5 -} -class_dict_numToClass = dict(map(reversed, class_dict_classToNum.items())) - - -class model: - def __init__(self): - self.model = tf.keras.models.Sequential() - return - - def Create_FP_Model(self, X_train, Y_train): - # CRNN model definition - cnn_nb_filt = 256 # CNN filter size - cnn_pool_size = [2, 2, 2] # Maxpooling across frequency. Length of cnn_pool_size = number of CNN layers - fc_nb = [1024, 32] # Number of FC nodes. Length of fc_nb = number of FC layers - dropout_rate = 0.5 # Dropout after each layer - - for _i, _cnt in enumerate(cnn_pool_size): - self.model.add( - tf.keras.layers.Conv2D(cnn_nb_filt, (3, 3), input_shape=(X_train.shape[1], X_train.shape[2], 1), - padding="same")) - self.model.add(tf.keras.layers.BatchNormalization()) - self.model.add(tf.keras.layers.Activation("relu")) - self.model.add(tf.keras.layers.MaxPooling2D((1, cnn_pool_size[_i]))) - self.model.add(tf.keras.layers.Reshape( - (X_train.shape[-2], int(cnn_nb_filt * (X_train.shape[-1] / np.prod(cnn_pool_size)))))) - - for _f in fc_nb: - self.model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(_f))) - self.model.add(tf.keras.layers.Dropout(dropout_rate)) - - self.model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(Y_train.shape[2]))) - self.model.add(tf.keras.layers.Activation("sigmoid", name="strong_out")) - self.model.compile(optimizer='Adam', loss='binary_crossentropy') - - def Summary(self): - lq.models.summary(self.model) - - -def plot_functions(_nb_epoch, _tr_loss, _val_loss, _f1, _er, extension=''): - plot.figure() - - plot.subplot(211) - plot.plot(range(_nb_epoch), _tr_loss, label='train loss') - plot.plot(range(_nb_epoch), _val_loss, label='val loss') - plot.legend() - plot.grid(True) - - plot.subplot(212) - plot.plot(range(_nb_epoch), _f1, label='f') - plot.plot(range(_nb_epoch), _er, label='er') - plot.legend() - plot.grid(True) - - plot.savefig(__models_dir + __fig_name + extension) - plot.close() - print('figure name : {}'.format(__fig_name)) - - -if __name__ == '__main__': - batch_size = 128 # Decrease this if you want to run on smaller GPU's - seq_len = 256 # Frame sequence length. Input to the CRNN. - nb_epoch = 500 # Training epochs - patience = int(0.25 * nb_epoch) # Patience for early stopping - - preprocess = PreProcessing.npz_preprocessing() - X_train, Y_train = preprocess.load_from_npz( - "/mainfs/cdt/TUT-sound-events-2017-modified/processed/train_fold_1_data.npz") - # X_train, Y_train = preprocess.load_from_npz("/Users/charles/Documents/MINDS/Year1/6003_Project/datasets_processed/train_fold_1_data.npz") - X_train, Y_train = preprocess.split_into_batches(X_train, Y_train, seq_len) - - X_test, Y_test = preprocess.load_from_npz( - "/mainfs/cdt/TUT-sound-events-2017-modified/processed/test_fold_1_data.npz") - # X_test, Y_test = preprocess.load_from_npz("/Users/charles/Documents/MINDS/Year1/6003_Project/datasets_processed/test_fold_1_data.npz") - X_test, Y_test = preprocess.split_into_batches(X_test, Y_test, seq_len) - - our_model = model() - our_model.Create_FP_Model(X_train, Y_train) - our_model.Summary() - - # get the data ready for the network by adding another dimension for the feature maps - X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)) - X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)) - - # ------------------------------------ - # the next bit is just copy and paste from Sharath SED - # ------------------------------------ - # Number of frames in 1 second, required to calculate F and ER for 1 sec segments. - # Make sure the nfft and sr are the same as in feature.py - sr = 44100 - nfft = 2048 - frames_1_sec = int(sr / (nfft / 2.0)) - - __fig_name = time.strftime("%m_%d_%H_%M_%S") - file_name = os.path.splitext(__file__)[0] - # file_name = os.path.splitext(os.path.basename(__file__))[0] - - # Folder for saving model and training curves - __models_dir = '/mainfs/cdt/models/' + file_name + "/" - # __models_dir = '/Users/charles/Documents/MINDS/Year1/6003_Project/local_testing/models/' + file_name + "/" - from pathlib import Path - - Path(__models_dir).mkdir(parents=True, exist_ok=True) - - # TensorBoard Vars - log_dir = __models_dir + "TensorLogs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, - histogram_freq=0, - write_graph=True, - write_images=True, - update_freq='epoch', - profile_batch=2, - embeddings_freq=1) - - # Training - avg_er = list() - avg_f1 = list() - figname_best_model = '' - best_epoch, pat_cnt, best_er, f1_for_best_er, best_conf_mat = 0, 0, 99999, None, None - tr_loss, val_loss, f1_overall_1sec_list, er_overall_1sec_list = [0] * nb_epoch, [0] * nb_epoch, [0] * nb_epoch, [ - 0] * nb_epoch - posterior_thresh = 0.5 - for i in range(nb_epoch): - print('Epoch : {} '.format(i), end='') - hist = our_model.model.fit( - X_train, Y_train, - batch_size=batch_size, - validation_data=(X_test, Y_test), - epochs=1, - verbose=2, - callbacks=[tensorboard_callback] - ) - - val_loss[i] = hist.history.get('val_loss')[-1] - tr_loss[i] = hist.history.get('loss')[-1] - - # Calculate the predictions on test data, in order to calculate ER and F scores - pred = our_model.model.predict(X_test) - pred_thresh = pred > posterior_thresh - score_list = metrics.compute_scores(pred_thresh, Y_test, frames_in_1_sec=frames_1_sec) - - f1_overall_1sec_list[i] = score_list['f1_overall_1sec'] - er_overall_1sec_list[i] = score_list['er_overall_1sec'] - pat_cnt = pat_cnt + 1 - - # Calculate confusion matrix - test_pred_cnt = np.sum(pred_thresh, 2) - Y_test_cnt = np.sum(Y_test, 2) - conf_mat = confusion_matrix(Y_test_cnt.reshape(-1), test_pred_cnt.reshape(-1)) - conf_mat = conf_mat / (utils.eps + np.sum(conf_mat, 1)[:, None].astype('float')) - - if er_overall_1sec_list[i] < best_er: - best_conf_mat = conf_mat - best_er = er_overall_1sec_list[i] - f1_for_best_er = f1_overall_1sec_list[i] - our_model.model.save(os.path.join(__models_dir, '{}__{}.h5'.format(file_name, __fig_name))) - figname_best_model = __fig_name - best_epoch = i - pat_cnt = 0 - - print('tr Er : {}, val Er : {}, F1_overall : {}, ER_overall : {} Best ER : {}, best_epoch: {}'.format( - tr_loss[i], val_loss[i], f1_overall_1sec_list[i], er_overall_1sec_list[i], best_er, best_epoch)) - plot_functions(nb_epoch, tr_loss, val_loss, f1_overall_1sec_list, er_overall_1sec_list) - if pat_cnt > patience: - break - avg_er.append(best_er) - avg_f1.append(f1_for_best_er) - print('saved model for the best_epoch: {} with best_f1: {} f1_for_best_er: {}'.format( - best_epoch, best_er, f1_for_best_er)) - print('best_conf_mat: {}'.format(best_conf_mat)) - print('best_conf_mat_diag: {}'.format(np.diag(best_conf_mat))) - - # lets make some graphs - model_filepath = __models_dir + '{}__{}.h5'.format(file_name, figname_best_model) - ModelFunctions.Generate_Model_Graphs(X_test, model_filepath, __models_dir) - ModelFunctions.Generate_Ground_Truth_Graphs(Y_test, __models_dir) diff --git a/Model_KS_03.py b/Model_KS_03.py deleted file mode 100644 index 2f059012d0eeccce4e672d405b48db90e8051081..0000000000000000000000000000000000000000 --- a/Model_KS_03.py +++ /dev/null @@ -1,216 +0,0 @@ -###################Dependency Imports################# -import tensorflow as tf -import larq as lq -import numpy as np -import os -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plot -from sklearn.metrics import confusion_matrix -import time -from pathlib import Path -from tensorflow.keras.regularizers import l1 -import datetime - - -#################OUR IMPORTS################## -import PreProcessing -import ModelFunctions - -##Sharath's Imports -import metrics -import utils - -class_dict_classToNum = { - 'brakes squeaking': 0, - 'car': 1, - 'children': 2, - 'large vehicle': 3, - 'people speaking': 4, - 'people walking': 5 - } -class_dict_numToClass = dict(map(reversed, class_dict_classToNum.items())) - -class model: - def __init__(self): - self.model = tf.keras.models.Sequential() - return - - def Create_FP_Model(self, X_train, Y_train): - # CRNN model definition - cnn_nb_filt = 128 # CNN filter size - cnn_pool_size = [5, 2, 2] # Maxpooling across frequency. Length of cnn_pool_size = number of CNN layers - rnn_nb = [32, 32] # Number of RNN nodes. Length of rnn_nb = number of RNN layers - fc_nb = [32] # Number of FC nodes. Length of fc_nb = number of FC layers - dropout_rate = 0.5 # Dropout after each layer - - for _i, _cnt in enumerate(cnn_pool_size): - self.model.add(tf.keras.layers.Conv2D(cnn_nb_filt, (3, 3), input_shape=(X_train.shape[1], X_train.shape[2], 1), padding="same", kernel_regularizer=l1(0.0001))) - self.model.add(tf.keras.layers.BatchNormalization()) - self.model.add(tf.keras.layers.Activation("relu")) - self.model.add(tf.keras.layers.MaxPooling2D((1, cnn_pool_size[_i]))) - self.model.add(tf.keras.layers.Reshape((X_train.shape[-2], int(cnn_nb_filt * (X_train.shape[-1] / np.prod(cnn_pool_size)))))) - - for _r in rnn_nb: - self.model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(_r, activation="tanh", dropout=dropout_rate, recurrent_dropout=dropout_rate, return_sequences=True), merge_mode="mul")) - - for _f in fc_nb: - self.model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(_f))) - self.model.add(tf.keras.layers.Dropout(dropout_rate)) - - self.model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(Y_train.shape[2]))) - self.model.add(tf.keras.layers.Activation("sigmoid", name="strong_out")) - self.model.compile(optimizer='Adam', loss='binary_crossentropy') - - def Summary(self): - lq.models.summary(self.model) - -def plot_functions(_nb_epoch, _tr_loss, _val_loss, _f1, _er, extension=''): - plot.figure() - - plot.subplot(211) - plot.plot(range(_nb_epoch), _tr_loss, label='train loss') - plot.plot(range(_nb_epoch), _val_loss, label='val loss') - plot.legend() - plot.grid(True) - - plot.subplot(212) - plot.plot(range(_nb_epoch), _f1, label='f') - plot.plot(range(_nb_epoch), _er, label='er') - plot.legend() - plot.grid(True) - - plot.savefig(__models_dir + __fig_name + extension) - plot.close() - print('figure name : {}'.format(__fig_name)) - -if __name__ == '__main__': - #set up some classes - preprocess = PreProcessing.npz_preprocessing() - our_model = model() - model_filepath = "" - - for fold in [1, 2, 3, 4]: - # COMMENT AND UNCOMMENT TO RUN LOCALLY: - # IRIDIS - train_data_path = "/mainfs/cdt/TUT-sound-events-2017-modified/processed/train_fold_{}_data.npz".format(fold) - test_data_path = "/mainfs/cdt/TUT-sound-events-2017-modified/processed/test_fold_{}_data.npz".format(fold) - file_name = os.path.splitext(__file__)[0] - __models_dir = '/mainfs/cdt/models/' + file_name + "/" - - #LOCAL - #train_data_path = "/Users/charles/Documents/MINDS/Year1/6003_Project/datasets_processed/TUT-2016/train_fold_{}_data.npz".format(fold) - #test_data_path = "/Users/charles/Documents/MINDS/Year1/6003_Project/datasets_processed/TUT-2016/test_fold_{}_data.npz".format(fold) - # file_name = os.path.splitext(os.path.basename(__file__))[0] - # __models_dir = '/Users/charles/Documents/MINDS/Year1/6003_Project/local_testing/models/' + file_name + "/" - - print("########FOLD: {}".format(fold)) - - batch_size = 128 # Decrease this if you want to run on smaller GPU's - seq_len = 256 # Frame sequence length. Input to the CRNN. - nb_epoch = 10 # Training epochs - patience = int(0.25 * nb_epoch) # Patience for early stopping - Path(__models_dir).mkdir(parents=True, exist_ok=True) - - X_train, Y_train = preprocess.load_from_npz(train_data_path) - X_train, Y_train = preprocess.split_into_batches(X_train, Y_train, seq_len) - - X_test, Y_test = preprocess.load_from_npz(test_data_path) - X_test, Y_test = preprocess.split_into_batches(X_test, Y_test, seq_len) - - if fold == 1: - print("Creating New Model...") - our_model.Create_FP_Model(X_train, Y_train) - else: - print("Loading: {}...".format(model_filepath)) - our_model.model = tf.keras.models.load_model(model_filepath) - our_model.Summary() - - #get the data ready for the network by adding another dimension for the feature maps - X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)) - X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)) - - #------------------------------------ - #the next bit is just copy and paste from Sharath SED - #------------------------------------ - # Number of frames in 1 second, required to calculate F and ER for 1 sec segments. - # Make sure the nfft and sr are the same as in feature.py - sr = 44100 - nfft = 2048 - frames_1_sec = int(sr / (nfft / 2.0)) - - __fig_name = time.strftime("%m_%d_%H_%M_%S") - file_name = os.path.splitext(__file__)[0] - # file_name = os.path.splitext(os.path.basename(__file__))[0] - - # TensorBoard Vars - log_dir = __models_dir + "TensorLogs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, - histogram_freq=0, - write_graph=True, - write_images=True, - update_freq='epoch', - profile_batch=2, - embeddings_freq=1) - - # Training - avg_er = list() - avg_f1 = list() - best_epoch, pat_cnt, best_er, f1_for_best_er, best_conf_mat = 0, 0, 99999, None, None - tr_loss, val_loss, f1_overall_1sec_list, er_overall_1sec_list = [0] * nb_epoch, [0] * nb_epoch, [0] * nb_epoch, [0] * nb_epoch - posterior_thresh = 0.5 - for i in range(nb_epoch): - print('Epoch : {} '.format(i), end='') - hist = our_model.model.fit( - X_train, Y_train, - batch_size=batch_size, - validation_data=(X_test, Y_test), - epochs=1, - verbose=2, - callbacks=[tensorboard_callback] - ) - - val_loss[i] = hist.history.get('val_loss')[-1] - tr_loss[i] = hist.history.get('loss')[-1] - - # Calculate the predictions on test data, in order to calculate ER and F scores - pred = our_model.model.predict(X_test) - pred_thresh = pred > posterior_thresh - score_list = metrics.compute_scores(pred_thresh, Y_test, frames_in_1_sec=frames_1_sec) - - f1_overall_1sec_list[i] = score_list['f1_overall_1sec'] - er_overall_1sec_list[i] = score_list['er_overall_1sec'] - pat_cnt = pat_cnt + 1 - - # Calculate confusion matrix - test_pred_cnt = np.sum(pred_thresh, 2) - Y_test_cnt = np.sum(Y_test, 2) - conf_mat = confusion_matrix(Y_test_cnt.reshape(-1), test_pred_cnt.reshape(-1)) - conf_mat = conf_mat / (utils.eps + np.sum(conf_mat, 1)[:, None].astype('float')) - - if er_overall_1sec_list[i] < best_er: - best_conf_mat = conf_mat - best_er = er_overall_1sec_list[i] - f1_for_best_er = f1_overall_1sec_list[i] - our_model.model.save(os.path.join(__models_dir, '{}__{}.tf'.format(file_name, __fig_name)), save_format='tf') - model_filepath = __models_dir + '{}__{}.tf'.format(file_name, __fig_name) - best_epoch = i - pat_cnt = 0 - - print('tr Er : {}, val Er : {}, F1_overall : {}, ER_overall : {} Best ER : {}, best_epoch: {}'.format( - tr_loss[i], val_loss[i], f1_overall_1sec_list[i], er_overall_1sec_list[i], best_er, best_epoch)) - plot_functions(nb_epoch, tr_loss, val_loss, f1_overall_1sec_list, er_overall_1sec_list) - if pat_cnt > patience: - break - avg_er.append(best_er) - avg_f1.append(f1_for_best_er) - print('saved model for the best_epoch: {} with best_f1: {} f1_for_best_er: {}'.format( - best_epoch, best_er, f1_for_best_er)) - print('best_conf_mat: {}'.format(best_conf_mat)) - print('best_conf_mat_diag: {}'.format(np.diag(best_conf_mat))) - - #lets make some graphs - fold_label_dir = __models_dir + "/{}/".format(fold) - Path(fold_label_dir).mkdir(parents=True, exist_ok=True) - ModelFunctions.Generate_Model_Graphs(X_test, model_filepath, fold_label_dir) - ModelFunctions.Generate_Ground_Truth_Graphs(Y_test, fold_label_dir) \ No newline at end of file