diff --git a/recipes/AISHELL-1/ASR/CTC/hparams/train_with_wav2vec.yaml b/recipes/AISHELL-1/ASR/CTC/hparams/train_with_wav2vec.yaml
index 9bd0b52d22511aac7235cf05f165a3b25b8a6124..486685f258aba538b0262cdd5339b23c9dc266a2 100644
--- a/recipes/AISHELL-1/ASR/CTC/hparams/train_with_wav2vec.yaml
+++ b/recipes/AISHELL-1/ASR/CTC/hparams/train_with_wav2vec.yaml
@@ -28,7 +28,8 @@ test_data: !ref <output_folder>/test.csv
 wav2vec2_hub: TencentGameMate/chinese-wav2vec2-large
 wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 80
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -76,7 +77,8 @@ tokenizer: !apply:transformers.BertTokenizer.from_pretrained
 # bert-base-chinese tokens length
 output_neurons: 21128
 
-# Decoding parameters
+############################## Decoding ########################################
+
 # Be sure that the bos and eos index match with the BPEs ones
 # Decoding parameters
 test_searcher: !name:speechbrain.decoders.CTCBeamSearcher
@@ -98,64 +100,37 @@ beta: 0.5
 # which Chinese writing normally does not do.
 # If remove_spaces, spaces are removed
 # from the transcript before computing CER.
-# (e.g., 祝 可爱 的 你 —> 祝可爱的你)
 remove_spaces: True
 split_tokens: !apply:operator.not_ [!ref <remove_spaces>]
 
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [90, 100, 110]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
    orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
 
 # Time Drop
-time_drop_length_low: 35  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 45  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 2  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 2  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <time_drop_length_low>
-   drop_length_high: !ref <time_drop_length_high>
-   drop_count_low: !ref <time_drop_count_low>
-   drop_count_high: !ref <time_drop_count_high>
-   replace: !ref <time_drop_replace>
-   dim: 1
+   drop_length_low: 35
+   drop_length_high: 45
+   drop_count_low: 2
+   drop_count_high: 2
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <freq_drop_length_low>
-   drop_length_high: !ref <freq_drop_length_high>
-   drop_count_low: !ref <freq_drop_count_low>
-   drop_count_high: !ref <freq_drop_count_high>
-   replace: !ref <freq_drop_replace>
+   drop_length_low: 25
+   drop_length_high: 35
+   drop_count_low: 2
+   drop_count_high: 2
    dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-   warp_window: !ref <time_warp_window>
-   warp_mode: !ref <time_warp_mode>
-   dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: False
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 3
    max_augmentations: 3
    augment_prob: 1.0
@@ -164,6 +139,8 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
       !ref <freq_drop>,
       !ref <time_warp>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
    input_shape: [null, null, !ref <wav2vec_output_dim>]
    linear1: !name:speechbrain.nnet.linear.Linear
@@ -230,6 +207,8 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.9
    patient: 0
 
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/AISHELL-1/ASR/CTC/train_with_wav2vec.py b/recipes/AISHELL-1/ASR/CTC/train_with_wav2vec.py
index 227204f44c3dc7f7f9d630610509d264665ce4cb..43783eed7f49f54a3f41e8e6adffdb2381eee50e 100644
--- a/recipes/AISHELL-1/ASR/CTC/train_with_wav2vec.py
+++ b/recipes/AISHELL-1/ASR/CTC/train_with_wav2vec.py
@@ -56,6 +56,8 @@ class ASR(sb.Brain):
         ids = batch.id
         tokens, tokens_lens = batch.tokens
 
+        # Labels must be extended if parallel augmentation or concatenated
+        # augmentation was performed on the input (increasing the time dimension)
         if stage == sb.Stage.TRAIN:
             if hasattr(self.hparams, "fea_augment"):
                 tokens = self.hparams.fea_augment.replicate_labels(tokens)
diff --git a/recipes/AISHELL-1/ASR/seq2seq/hparams/train.yaml b/recipes/AISHELL-1/ASR/seq2seq/hparams/train.yaml
index 75b303f6691ed73a720606b0eab472724c996315..e6fda7de26ed417a2cae1ddf6389aacb717d420d 100644
--- a/recipes/AISHELL-1/ASR/seq2seq/hparams/train.yaml
+++ b/recipes/AISHELL-1/ASR/seq2seq/hparams/train.yaml
@@ -29,7 +29,8 @@ test_data: !ref <output_folder>/test.csv
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 tokenizer_file: speechbrain/asr-transformer-aishell/tokenizer.ckpt
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 40
 number_of_ctc_epochs: 10
 batch_size: 16
@@ -71,7 +72,7 @@ test_dataloader_opts:
    batch_size: !ref <batch_size>
    num_workers: !ref <num_workers>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -108,7 +109,6 @@ scorer_beam_scale: 0.5
 # which Chinese writing normally does not do.
 # If remove_spaces, spaces are removed
 # from the transcript before computing CER.
-# (e.g., 祝 可爱 的 你 —> 祝可爱的你)
 remove_spaces: True
 split_tokens: !apply:operator.not_ [!ref <remove_spaces>]
 
@@ -118,6 +118,8 @@ epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
 normalize: !new:speechbrain.processing.features.InputNormalization
    norm_type: global
 
+############################## Augmentations ###################################
+
 compute_features: !new:speechbrain.lobes.features.Fbank
    sample_rate: !ref <sample_rate>
    n_fft: !ref <n_fft>
@@ -132,57 +134,37 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
 
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
    csv_file: !ref <noise_annotation>
-   snr_low: !ref <snr_low>
-   snr_high: !ref <snr_high>
+   snr_low: 0
+   snr_high: 15
    noise_sample_rate: !ref <sample_rate>
    clean_sample_rate: !ref <sample_rate>
    num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
    orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
+   speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
 
-# Augmenter: Combines previously defined augmentations to perform data augmentation
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
    concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 4
    max_augmentations: 4
    augment_prob: 1.0
@@ -192,6 +174,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
       !ref <drop_freq>,
       !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
    input_shape: [null, null, !ref <n_mels>]
    activation: !ref <activation>
@@ -268,7 +252,8 @@ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
    paths:
       tokenizer: !ref <tokenizer_file>
 
-# Scorer
+############################## Decoding ########################################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
    eos_index: !ref <eos_index>
    blank_index: !ref <blank_index>
@@ -305,6 +290,8 @@ lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.8
    patient: 0
 
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/AISHELL-1/ASR/seq2seq/train.py b/recipes/AISHELL-1/ASR/seq2seq/train.py
index 69d2e75d6b2bd88008671f0ce4e5ed7eb237dce0..bc2c49b888ce0cb997d03a87da151a0cc524d35d 100644
--- a/recipes/AISHELL-1/ASR/seq2seq/train.py
+++ b/recipes/AISHELL-1/ASR/seq2seq/train.py
@@ -29,10 +29,6 @@ class ASR(sb.Brain):
 
         # Forward pass
         feats = self.hparams.compute_features(wavs)
-
-        if stage == sb.Stage.TRAIN and hasattr(self.hparams, "fea_augment"):
-            feats, fea_lens = self.hparams.fea_augment(feats, wav_lens)
-
         feats = self.modules.normalize(feats, wav_lens)
         x = self.modules.enc(feats.detach())
         e_in = self.modules.emb(tokens_bos)  # y_in bos + tokens
@@ -65,12 +61,16 @@ class ASR(sb.Brain):
         tokens_eos, tokens_eos_lens = batch.tokens_eos
         tokens, tokens_lens = batch.tokens
 
+        # Labels must be extended if parallel augmentation or concatenated
+        # augmentation was performed on the input (increasing the time dimension)
         if stage == sb.Stage.TRAIN and hasattr(self.hparams, "wav_augment"):
-            tokens = self.hparams.wav_augment.replicate_labels(tokens)
-            tokens_lens = self.hparams.wav_augment.replicate_labels(tokens_lens)
-            tokens_eos = self.hparams.wav_augment.replicate_labels(tokens_eos)
-            tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                tokens_eos_lens
+            (
+                tokens,
+                tokens_lens,
+                tokens_eos,
+                tokens_eos_lens,
+            ) = self.hparams.wav_augment.replicate_multiple_labels(
+                tokens, tokens_lens, tokens_eos, tokens_eos_lens
             )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer.yaml b/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer.yaml
index b98e371b2d32c131949f8255b3a42042c5479877..408c9e68008f74950c1916b53b6b9c29da7efdec 100644
--- a/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer.yaml
+++ b/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer.yaml
@@ -30,7 +30,8 @@ test_data: !ref <save_folder>/test.csv
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 tokenizer_file: speechbrain/asr-transformer-aishell/tokenizer.ckpt
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 50
 batch_size: 8
 ctc_weight: 0.3
@@ -77,7 +78,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: !ref <num_workers>
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
 # Transformer
 d_model: 256
 nhead: 4
@@ -103,7 +104,7 @@ valid_beam_size: 10
 test_beam_size: 10
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -157,7 +158,8 @@ SGD: !name:torch.optim.SGD
     momentum: 0.99
     nesterov: True
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -227,7 +229,7 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
-# ----- WAVEFORM AUGMENTATION ----- #
+############################## Augmentation ####################################
 
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -238,75 +240,43 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
 
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 1
     max_augmentations: 1
     augment_prob: 1.0
     augmentations: [
         !ref <add_noise>]
 
-
- # ----- FEATURE AUGMENTATION ----- #
-time_drop_length_low: 0  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 100  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 2  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 2  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
-
 # Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-freq_drop_length_low: 30  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 40  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
+    drop_length_low: 0
+    drop_length_high: 100
+    drop_count_low: 2
+    drop_count_high: 2
 
 # Frequency Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 30
+    drop_length_high: 40
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 # Time warp
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 1
     max_augmentations: 1
     augment_start_index: !ref <batch_size> # This leaves unchanges original inputs
@@ -317,6 +287,8 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <freq_drop>,
         !ref <time_warp>]
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
@@ -324,7 +296,6 @@ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
 # which Chinese writing normally does not do.
 # If remove_spaces, spaces are removed
 # from the transcript before computing CER.
-# (e.g., 祝 可爱 的 你 —> 祝可爱的你)
 remove_spaces: True
 split_tokens: !apply:operator.not_ [!ref <remove_spaces>]
 
diff --git a/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer_with_wav2vect.yaml b/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer_with_wav2vect.yaml
index 13a7826ade7debc99f9c99b3d69e7eed526152b1..a196afc5842ef5235d0f6e9c5179984319c327bf 100644
--- a/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer_with_wav2vect.yaml
+++ b/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer_with_wav2vect.yaml
@@ -30,7 +30,8 @@ wav2vec2_hub: facebook/wav2vec2-large-100k-voxpopuli
 wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 freeze_wav2vec: False
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 80
 batch_size: 2
 grad_accumulation_factor: 16
@@ -72,7 +73,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: !ref <num_workers>
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
 # Transformer
 d_model: 256
 nhead: 4
@@ -98,7 +99,7 @@ valid_beam_size: 10
 test_beam_size: 10
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 wav2vec2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
     source: !ref <wav2vec2_hub>
@@ -140,44 +141,27 @@ model: !new:torch.nn.ModuleList
     - [!ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -186,6 +170,7 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Decoding & optimiser ############################
 
 # define two optimizers here for two-stage training
 Adam: !name:torch.optim.Adam
@@ -257,6 +242,7 @@ noam_annealing_wav2vect: !new:speechbrain.nnet.schedulers.NoamScheduler
     n_warmup_steps: 25000
     model_size: !ref <d_model>
 
+############################## Logging and Pretrainer ##########################
 
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
     checkpoints_dir: !ref <save_folder>
@@ -278,7 +264,6 @@ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
 # which Chinese writing normally does not do.
 # If remove_spaces, spaces are removed
 # from the transcript before computing CER.
-# (e.g., 祝 可爱 的 你 —> 祝可爱的你)
 remove_spaces: True
 split_tokens: !apply:operator.not_ [!ref <remove_spaces>]
 
diff --git a/recipes/AISHELL-1/ASR/transformer/train.py b/recipes/AISHELL-1/ASR/transformer/train.py
index 977563ac8ec772c69faa801e8d6bb8fb92726cec..63361bf0dd9d597664ce175238529ad10f6442be 100644
--- a/recipes/AISHELL-1/ASR/transformer/train.py
+++ b/recipes/AISHELL-1/ASR/transformer/train.py
@@ -82,28 +82,26 @@ class ASR(sb.core.Brain):
         tokens, tokens_lens = batch.tokens
 
         if stage == sb.Stage.TRAIN:
+            # Labels must be extended if parallel augmentation or concatenated
+            # augmentation was performed on the input (increasing the time dimension)
             if hasattr(self.hparams, "wav_augment"):
-                tokens = self.hparams.wav_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.wav_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
 
             if hasattr(self.hparams, "fea_augment"):
-                tokens = self.hparams.fea_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.fea_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/AISHELL-1/ASR/transformer/train_with_wav2vect.py b/recipes/AISHELL-1/ASR/transformer/train_with_wav2vect.py
index 94196ea7b846d773c2a800a06bb60b377c886261..53aa47375146d3707224211740ad31ccbdc00c66 100644
--- a/recipes/AISHELL-1/ASR/transformer/train_with_wav2vect.py
+++ b/recipes/AISHELL-1/ASR/transformer/train_with_wav2vect.py
@@ -74,16 +74,16 @@ class ASR(sb.core.Brain):
         tokens, tokens_lens = batch.tokens
 
         if stage == sb.Stage.TRAIN:
+            # Labels must be extended if parallel augmentation or concatenated
+            # augmentation was performed on the input (increasing the time dimension)
             if hasattr(self.hparams, "wav_augment"):
-                tokens = self.hparams.wav_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.wav_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/AISHELL-1/Tokenizer/hparams/tokenizer_bpe5000.yaml b/recipes/AISHELL-1/Tokenizer/hparams/tokenizer_bpe5000.yaml
index 886d22bda076a76cfe78ffad4a35071784f6f215..d2cb230189911a83d54c5b6caedf7fffa334c57b 100644
--- a/recipes/AISHELL-1/Tokenizer/hparams/tokenizer_bpe5000.yaml
+++ b/recipes/AISHELL-1/Tokenizer/hparams/tokenizer_bpe5000.yaml
@@ -15,7 +15,7 @@ train_csv: !ref <output_folder>/train.csv
 valid_csv: !ref <output_folder>/dev.csv
 
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 5000  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/AISHELL-1/Tokenizer/hparams/train_transformer_tokenizer_bpe5000.yaml b/recipes/AISHELL-1/Tokenizer/hparams/train_transformer_tokenizer_bpe5000.yaml
index bc286156b73559a3c414efacf79a5b56fc54b657..973df9a1194b9828e6002062fdefbb871da0fc77 100644
--- a/recipes/AISHELL-1/Tokenizer/hparams/train_transformer_tokenizer_bpe5000.yaml
+++ b/recipes/AISHELL-1/Tokenizer/hparams/train_transformer_tokenizer_bpe5000.yaml
@@ -15,7 +15,7 @@ train_csv: !ref <output_folder>/train.csv
 valid_csv: !ref <output_folder>/dev.csv
 
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 5000  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2-wham.yaml b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2-wham.yaml
index 24571404fe0f1303075c768d3691e412d51d01b5..d3cb9493e4ce2a4a9a83ef00f7379fe6c58c2df4 100644
--- a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2-wham.yaml
+++ b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2-wham.yaml
@@ -40,7 +40,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2.yaml b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2.yaml
index d4689378aedbf08a33c3bc2eb81caf2e88d14d04..168471dbb1d2f12207ec51e1f7a2eb0e89afe096 100644
--- a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2.yaml
+++ b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2.yaml
@@ -40,7 +40,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3-wham.yaml b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3-wham.yaml
index 65add025a2eb96b84240089ac2c6daf061f36b80..834857ed77f61a2a2da292de742fae950c2bd05e 100644
--- a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3-wham.yaml
+++ b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3-wham.yaml
@@ -40,7 +40,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3.yaml b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3.yaml
index 3b27b796b6d2303b62520d8c6b3ad83cf91b66fd..d48fdecb214c6acef700f1186817142a90af2f0d 100644
--- a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3.yaml
+++ b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3.yaml
@@ -40,7 +40,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-cross.yaml b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-cross.yaml
index f609c746e9dc7d957d38173807f793dc3d8c78bf..043845aebd245c6ef40c8a41b9635cac018f02cd 100644
--- a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-cross.yaml
+++ b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-cross.yaml
@@ -43,7 +43,7 @@ save_audio: True # Save estimated sources on disk
 n_audio_to_save: 10
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-independent.yaml b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-independent.yaml
index c3483f5f281682ac99ca5c179543e969b1769862..164ccc45b17fd617f2c6a6342bbf23d26001f68f 100644
--- a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-independent.yaml
+++ b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-independent.yaml
@@ -43,7 +43,7 @@ save_audio: True # Save estimated sources on disk
 n_audio_to_save: 10
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-noise.yaml b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-noise.yaml
index 74941dfea9a48ed348af82a713bfc8d2206ed243..fef85267f644aad3f01047cc64100e7a55ba8beb 100644
--- a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-noise.yaml
+++ b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-noise.yaml
@@ -43,7 +43,7 @@ save_audio: True # Save estimated sources on disk
 n_audio_to_save: 10
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-reverb.yaml b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-reverb.yaml
index 6b1518d39a9f968c120fa088de0589e4bd2b2f23..4ec5054f982631250b29b6c03147ced3fbddb586 100644
--- a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-reverb.yaml
+++ b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-reverb.yaml
@@ -43,7 +43,7 @@ save_audio: True # Save estimated sources on disk
 n_audio_to_save: 10
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel.yaml b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel.yaml
index a5d6156790a661ec91b37e766d9bcea292801500..adb31ddc68fd3284774c58e4e60b3a28b65457c5 100644
--- a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel.yaml
+++ b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel.yaml
@@ -43,7 +43,7 @@ save_audio: True # Save estimated sources on disk
 n_audio_to_save: 10
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/CVSS/S2ST/hparams/train_fr-en.yaml b/recipes/CVSS/S2ST/hparams/train_fr-en.yaml
index 8ff7d59be93f292618273f757f31c6f51fbefe57..678dd1c178017fd8f71e7fbbbfccdf048c4cfdd9 100644
--- a/recipes/CVSS/S2ST/hparams/train_fr-en.yaml
+++ b/recipes/CVSS/S2ST/hparams/train_fr-en.yaml
@@ -59,7 +59,7 @@ wav2vec2_download_path: !ref <save_folder>/pretrained_models
 wav2vec2_frozen: False
 wav2vec2_freeze_steps: 10000
 
-# Training parameters
+####################### Training Parameters ####################################
 lr: 0.0005
 lr_wav2vec: 0.00001
 loss_reduction: batchmean
diff --git a/recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml b/recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml
index 0c4d91aa9c08caff1a76db8288dac522fa68d054..d4722f45ca06cc58946ff78f0aed3d3185b8c15c 100644
--- a/recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml
+++ b/recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml
@@ -38,10 +38,10 @@ error_stats: !name:speechbrain.utils.metric_stats.MetricStats
     metric: !name:speechbrain.nnet.losses.classification_error
         reduction: batch
 
+####################### Training Parameters ####################################
+
 # Feature parameters btw: 40 - 80
 n_mels: 80
-
-# Training Parameters
 sample_rate: 16000
 number_of_epochs: 30
 batch_size: 4
@@ -64,6 +64,8 @@ test_dataloader_options:
     batch_size: !ref <batch_size>
     shuffle: True
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -78,7 +80,6 @@ prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     ext: wav
     csv_file: !ref <rir_annotation>
 
-
 # Add reverberation to input signal
 add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     csv_file: !ref <rir_annotation>
@@ -87,27 +88,21 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [90, 100, 110]  # List of speed changes for time-stretching
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
     shuffle_augmentations: True
     min_augmentations: 1
     max_augmentations: 3
@@ -125,6 +120,8 @@ mean_var_norm_input: !new:speechbrain.processing.features.InputNormalization
     norm_type: sentence
     std_norm: False
 
+############################## Models ##########################################
+
 # To design a custom model, either just edit the simple CustomModel
 # class that's listed here, or replace this `!new` call with a line
 # pointing to a different file you've defined.
@@ -182,6 +179,8 @@ lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
     final_value: !ref <lr_final>
     epoch_count: !ref <number_of_epochs>
 
+############################## Logging and Pretrainer ##########################
+
 # This object is used for saving the state of training both so that it
 # can be resumed if it gets interrupted, and also so that the best checkpoint
 # can be later loaded for evaluation or inference.
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_ar_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_ar_with_wav2vec.yaml
index caf7f2d3b8f5ceeeda1c00e273b4332910fdebcb..643df09944098da647e4067ff7d6546b433ee036 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_ar_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_ar_with_wav2vec.yaml
@@ -32,7 +32,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +59,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -97,45 +98,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -144,6 +130,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_de_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_de_with_wav2vec.yaml
index dc4ae34b2e2c00b01262bfa053eb9f25f7a76d9d..adb8e5bb52626992dfd6899871a8f74a30bc62dd 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_de_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_de_with_wav2vec.yaml
@@ -33,7 +33,8 @@ skip_prep: False
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 45
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -61,7 +62,7 @@ test_dataloader_options:
 token_type: char # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 # activation: !name:torch.nn.LeakyReLU
 dnn_neurons: 1024
 wav2vec_output_dim: !ref <dnn_neurons>
@@ -95,45 +96,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -142,6 +128,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_en_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_en_with_wav2vec.yaml
index f3c68ee9b9bdfc354640ba9ca82d8948231fd6b1..d8aaea36e46f52efaf947e3cf10a14b339c8cb0f 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_en_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_en_with_wav2vec.yaml
@@ -32,7 +32,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +59,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 # activation: !name:torch.nn.LeakyReLU
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
@@ -95,45 +96,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -142,6 +128,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_es_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_es_with_wav2vec.yaml
index 8e2056f83735607bf555f5d1be0dfaa63fdb7888..e32a242d1a4f6d09b5d01b0b614e94df08298167 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_es_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_es_with_wav2vec.yaml
@@ -32,7 +32,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +59,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -96,45 +97,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -143,6 +129,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_fr_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_fr_with_wav2vec.yaml
index 44f1523f3a666643677ccf5f9808cecab39bcd72..079cfe73fce4ce69541d86017ac971f2daa4f2fc 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_fr_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_fr_with_wav2vec.yaml
@@ -32,7 +32,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +59,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 # activation: !name:torch.nn.LeakyReLU
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
@@ -94,45 +95,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -141,6 +127,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_it_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_it_with_wav2vec.yaml
index 4f39ad2a06a9d7ea3106cd4e0b6565379e8526c3..0332997523960e7d5ff2cb002335f464bb04a70b 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_it_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_it_with_wav2vec.yaml
@@ -33,7 +33,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 8.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 45
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -59,7 +60,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 # activation: !name:torch.nn.LeakyReLU
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
@@ -95,45 +96,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -141,6 +127,9 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <speed_perturb>,
         !ref <drop_freq>,
         !ref <drop_chunk>]
+
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_pt_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_pt_with_wav2vec.yaml
index da8a28de58c8c9f97ea2e2d10fd33928f13c9297..d4b703eb456fc66104146de3e1b45e986d6eb9b0 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_pt_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_pt_with_wav2vec.yaml
@@ -31,7 +31,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -57,7 +58,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -95,45 +96,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -142,6 +128,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_rw_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_rw_with_wav2vec.yaml
index f92d8ad137f5d2423032adfd5e8997f9ad006cf5..ed15a8aadada5e157843d953264dc518abed1658 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_rw_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_rw_with_wav2vec.yaml
@@ -32,7 +32,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -59,7 +60,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 # activation: !name:torch.nn.LeakyReLU
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
@@ -95,45 +96,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -142,6 +128,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_zh-CN_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_zh-CN_with_wav2vec.yaml
index 513d8d3246ab1aa7ae0454b65b58362d11c5b8a4..a1709931a46d9f6f2c7146e901749b2ac25b2ba1 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_zh-CN_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_zh-CN_with_wav2vec.yaml
@@ -32,7 +32,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -59,7 +60,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -97,45 +98,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -144,6 +130,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/seq2seq/hparams/train_de.yaml b/recipes/CommonVoice/ASR/seq2seq/hparams/train_de.yaml
index b66374147565ae932bd2a9674416a0de2809159a..cb6f2b3be095a6db5d948d76cc5beca09691d96f 100644
--- a/recipes/CommonVoice/ASR/seq2seq/hparams/train_de.yaml
+++ b/recipes/CommonVoice/ASR/seq2seq/hparams/train_de.yaml
@@ -30,7 +30,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 25
 number_of_ctc_epochs: 20
 lr: 1.0
@@ -62,7 +63,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 80
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -104,51 +105,27 @@ temperature: 1.50
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
 
  # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -165,6 +142,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/seq2seq/hparams/train_en.yaml b/recipes/CommonVoice/ASR/seq2seq/hparams/train_en.yaml
index c74d25c663d4b3a8cc06b1f323a3901f4f215970..49f9a0d2b76464c2dbc840a3a414fff0c8fd946e 100644
--- a/recipes/CommonVoice/ASR/seq2seq/hparams/train_en.yaml
+++ b/recipes/CommonVoice/ASR/seq2seq/hparams/train_en.yaml
@@ -29,7 +29,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 25
 number_of_ctc_epochs: 10
 lr: 1.0
@@ -60,7 +61,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 80
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -102,50 +103,27 @@ temperature: 1.50
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+ # Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -162,6 +140,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/seq2seq/hparams/train_es.yaml b/recipes/CommonVoice/ASR/seq2seq/hparams/train_es.yaml
index b56a75b697ac15ec55431ae278316d78982dad3b..b94373e9bd0028e1668b37da3736259516fe14e1 100644
--- a/recipes/CommonVoice/ASR/seq2seq/hparams/train_es.yaml
+++ b/recipes/CommonVoice/ASR/seq2seq/hparams/train_es.yaml
@@ -30,7 +30,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 25
 number_of_ctc_epochs: 20
 lr: 1.0
@@ -61,7 +62,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 80
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -103,50 +104,27 @@ temperature: 1.50
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+ # Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -163,6 +141,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/seq2seq/hparams/train_fr.yaml b/recipes/CommonVoice/ASR/seq2seq/hparams/train_fr.yaml
index 1c2a85ec8e08d1c7c3ac3d7ff8baf5930dbbd0ca..cc9b0aa995ece9f1e011d0671e59b7fa63780389 100644
--- a/recipes/CommonVoice/ASR/seq2seq/hparams/train_fr.yaml
+++ b/recipes/CommonVoice/ASR/seq2seq/hparams/train_fr.yaml
@@ -29,7 +29,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 25
 number_of_ctc_epochs: 20
 lr: 1.0
@@ -60,7 +61,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 80
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -102,50 +103,27 @@ temperature: 1.50
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+ # Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -162,6 +140,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/seq2seq/hparams/train_it.yaml b/recipes/CommonVoice/ASR/seq2seq/hparams/train_it.yaml
index bf9211caf5c728f89148c07ff2710c8120d8f823..2c0355ae55c9fa226a43f7081309c95fb3559814 100644
--- a/recipes/CommonVoice/ASR/seq2seq/hparams/train_it.yaml
+++ b/recipes/CommonVoice/ASR/seq2seq/hparams/train_it.yaml
@@ -29,7 +29,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 8.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 50
 number_of_ctc_epochs: 40
 lr: 1.0
@@ -59,7 +60,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 80
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -101,50 +102,27 @@ temperature: 1.50
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+ # Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -161,6 +139,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/seq2seq/hparams/train_rw.yaml b/recipes/CommonVoice/ASR/seq2seq/hparams/train_rw.yaml
index dd665ab24a6fa38c61b7fcc5bbcd43e521609865..8bc89c1c4bbced9dc62234b0dfbe0507ffd50374 100644
--- a/recipes/CommonVoice/ASR/seq2seq/hparams/train_rw.yaml
+++ b/recipes/CommonVoice/ASR/seq2seq/hparams/train_rw.yaml
@@ -29,7 +29,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 8.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 25
 number_of_ctc_epochs: 20
 lr: 1.0
@@ -59,7 +60,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 80
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -102,50 +103,27 @@ epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+ # Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -162,6 +140,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/transducer/hparams/train_de.yaml b/recipes/CommonVoice/ASR/transducer/hparams/train_de.yaml
index 8faaa805f53ce65bdd026115294a5cede6727d3c..9bbab166930f6e31a086be82c595a307961db551 100644
--- a/recipes/CommonVoice/ASR/transducer/hparams/train_de.yaml
+++ b/recipes/CommonVoice/ASR/transducer/hparams/train_de.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 batch_size: 6
 batch_size_valid: 1
@@ -71,7 +71,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <batch_size_valid>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -109,50 +109,28 @@ compute_features: !new:speechbrain.lobes.features.Fbank
    n_fft: !ref <n_fft>
    n_mels: !ref <n_mels>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <time_drop_length_low>
-   drop_length_high: !ref <time_drop_length_high>
-   drop_count_low: !ref <time_drop_count_low>
-   drop_count_high: !ref <time_drop_count_high>
-   replace: !ref <time_drop_replace>
-   dim: 1
+   drop_length_low: 15
+   drop_length_high: 25
+   drop_count_low: 5
+   drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <freq_drop_length_low>
-   drop_length_high: !ref <freq_drop_length_high>
-   drop_count_low: !ref <freq_drop_count_low>
-   drop_count_high: !ref <freq_drop_count_high>
-   replace: !ref <freq_drop_replace>
+   drop_length_low: 25
+   drop_length_high: 35
+   drop_count_low: 3
+   drop_count_high: 3
    dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
 
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-   warp_window: !ref <time_warp_window>
-   warp_mode: !ref <time_warp_mode>
-   dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: False
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 3
    max_augmentations: 3
    augment_prob: 1.0
@@ -161,6 +139,8 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
       !ref <freq_drop>,
       !ref <time_warp>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
    input_shape: [null, null, !ref <n_mels>]
    activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/transducer/hparams/train_fr.yaml b/recipes/CommonVoice/ASR/transducer/hparams/train_fr.yaml
index 6c3f0bc7d181b47f0462747281874f56795d4152..c96a0939466a88d7e364efe5d69226f020d702e2 100644
--- a/recipes/CommonVoice/ASR/transducer/hparams/train_fr.yaml
+++ b/recipes/CommonVoice/ASR/transducer/hparams/train_fr.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 batch_size: 6
 batch_size_valid: 1
@@ -71,7 +71,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <batch_size_valid>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -109,50 +109,28 @@ compute_features: !new:speechbrain.lobes.features.Fbank
    n_fft: !ref <n_fft>
    n_mels: !ref <n_mels>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <time_drop_length_low>
-   drop_length_high: !ref <time_drop_length_high>
-   drop_count_low: !ref <time_drop_count_low>
-   drop_count_high: !ref <time_drop_count_high>
-   replace: !ref <time_drop_replace>
-   dim: 1
+   drop_length_low: 15
+   drop_length_high: 25
+   drop_count_low: 5
+   drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <freq_drop_length_low>
-   drop_length_high: !ref <freq_drop_length_high>
-   drop_count_low: !ref <freq_drop_count_low>
-   drop_count_high: !ref <freq_drop_count_high>
-   replace: !ref <freq_drop_replace>
+   drop_length_low: 25
+   drop_length_high: 35
+   drop_count_low: 3
+   drop_count_high: 3
    dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
 
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-   warp_window: !ref <time_warp_window>
-   warp_mode: !ref <time_warp_mode>
-   dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: False
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 3
    max_augmentations: 3
    augment_prob: 1.0
@@ -161,6 +139,8 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
       !ref <freq_drop>,
       !ref <time_warp>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
    input_shape: [null, null, !ref <n_mels>]
    activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/transducer/hparams/train_it.yaml b/recipes/CommonVoice/ASR/transducer/hparams/train_it.yaml
index a645f981546215a71cea77bceca53503137a1c01..cf366205efffe515ddc91d4a31bb48d75f505bc3 100644
--- a/recipes/CommonVoice/ASR/transducer/hparams/train_it.yaml
+++ b/recipes/CommonVoice/ASR/transducer/hparams/train_it.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 batch_size: 6
 batch_size_valid: 1
@@ -71,7 +71,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <batch_size_valid>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -109,50 +109,28 @@ compute_features: !new:speechbrain.lobes.features.Fbank
    n_fft: !ref <n_fft>
    n_mels: !ref <n_mels>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <time_drop_length_low>
-   drop_length_high: !ref <time_drop_length_high>
-   drop_count_low: !ref <time_drop_count_low>
-   drop_count_high: !ref <time_drop_count_high>
-   replace: !ref <time_drop_replace>
-   dim: 1
+   drop_length_low: 15
+   drop_length_high: 25
+   drop_count_low: 5
+   drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <freq_drop_length_low>
-   drop_length_high: !ref <freq_drop_length_high>
-   drop_count_low: !ref <freq_drop_count_low>
-   drop_count_high: !ref <freq_drop_count_high>
-   replace: !ref <freq_drop_replace>
+   drop_length_low: 25
+   drop_length_high: 35
+   drop_count_low: 3
+   drop_count_high: 3
    dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
 
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-   warp_window: !ref <time_warp_window>
-   warp_mode: !ref <time_warp_mode>
-   dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: False
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 3
    max_augmentations: 3
    augment_prob: 1.0
@@ -161,6 +139,8 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
       !ref <freq_drop>,
       !ref <time_warp>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
    input_shape: [null, null, !ref <n_mels>]
    activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/transducer/train.py b/recipes/CommonVoice/ASR/transducer/train.py
index 1782408b80c8d1305a99b7cb02c0de7f9df02d51..0304aabc810bfba7df541982bbc5d62c8c3f83b2 100644
--- a/recipes/CommonVoice/ASR/transducer/train.py
+++ b/recipes/CommonVoice/ASR/transducer/train.py
@@ -134,26 +134,22 @@ class ASR(sb.Brain):
 
         if stage == sb.Stage.TRAIN:
             if hasattr(self.hparams, "wav_augment"):
-                tokens = self.hparams.wav_augment.replicate_labels(tokens)
-                token_lens = self.hparams.wav_augment.replicate_labels(
-                    token_lens
-                )
-                tokens_eos = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos
-                )
-                token_eos_lens = self.hparams.wav_augment.replicate_labels(
-                    token_eos_lens
+                (
+                    tokens,
+                    token_lens,
+                    tokens_eos,
+                    token_eos_lens,
+                ) = self.hparams.wav_augment.replicate_multiple_labels(
+                    tokens, token_lens, tokens_eos, token_eos_lens
                 )
             if hasattr(self.hparams, "fea_augment"):
-                tokens = self.hparams.fea_augment.replicate_labels(tokens)
-                token_lens = self.hparams.fea_augment.replicate_labels(
-                    token_lens
-                )
-                tokens_eos = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos
-                )
-                token_eos_lens = self.hparams.fea_augment.replicate_labels(
-                    token_eos_lens
+                (
+                    tokens,
+                    token_lens,
+                    tokens_eos,
+                    token_eos_lens,
+                ) = self.hparams.fea_augment.replicate_multiple_labels(
+                    tokens, token_lens, tokens_eos, token_eos_lens
                 )
 
         if stage == sb.Stage.TRAIN:
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_ar_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_ar_hf_whisper.yaml
index 2b358e6d401b3b2acfde98dcccc28f77849b9ce4..d33c50c2bacb21e3bea276f428933d63f92cdb22 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_ar_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_ar_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -82,45 +82,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -129,6 +114,7 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_de.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_de.yaml
index dbeb56be51634d702208568dad9dd8a7cdd285e0..c5533e9bb30af916ba32d6554e7dd84ebf90f864 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_de.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_de.yaml
@@ -33,7 +33,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 32 # This works with a 32GB GPU ! (bs * nb_gpu * accum) > 128 !
 ctc_weight: 0.3
@@ -70,7 +70,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: 6
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 768
 nhead: 8
@@ -213,50 +213,27 @@ normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
     update_until_epoch: 3
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_fa_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_fa_hf_whisper.yaml
index bc01810449814b8f7ff293e4315aa4557c46f31f..bb23c98a6ede2b4aeedbba67a8fb4ba61071cf53 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_fa_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_fa_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -82,45 +82,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -129,6 +114,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_fr.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_fr.yaml
index 120305e73eb0a295fd1824600e6cc5d8c40ccc29..e62d9c390a91a49253f93e6c6e1587ff18f67639 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_fr.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_fr.yaml
@@ -33,7 +33,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 32 # This works with a 32GB GPU ! (bs * nb_gpu * accum) > 128 !
 ctc_weight: 0.3
@@ -70,7 +70,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: 6
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 768
 nhead: 8
@@ -213,50 +213,27 @@ normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
     update_until_epoch: 3
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_fr_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_fr_hf_whisper.yaml
index da5cbd28fbf685b9d37f982901bae38e16d20f0b..62363bdada17730280ee59bc48ad90b5ed17f3a7 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_fr_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_fr_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -82,45 +82,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -129,6 +114,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_hi_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_hi_hf_whisper.yaml
index 8b130a83d4fae7e448408b06497ce9fdd075d84b..e21852639b9b75437dc27e1dd9e10a963de7f0b9 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_hi_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_hi_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -82,45 +82,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -129,6 +114,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_it.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_it.yaml
index d937359b54b33c6c8ed4f77a20ea45f54d59d70a..d95fbaffae6d07fda47183b3a97d192b487bf649 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_it.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_it.yaml
@@ -33,7 +33,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 32 # This works with a 32GB GPU ! (bs * nb_gpu * accum) > 128 !
 ctc_weight: 0.3
@@ -70,7 +70,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: 6
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 768
 nhead: 8
@@ -213,50 +213,27 @@ normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
     update_until_epoch: 3
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_it_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_it_hf_whisper.yaml
index 5670f4fe813f5915174f7e7593bd951453952eab..e1fc08263096249cc2a2396263ed07029769c8e4 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_it_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_it_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -82,45 +82,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -129,6 +114,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_mn_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_mn_hf_whisper.yaml
index 9ffdc95fd6a7cfe58245e1fb3179a7d21d92a7fe..fe4fd6f17314fd33df1cce88f14f5ae5330c6fb8 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_mn_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_mn_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -82,45 +82,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -129,6 +114,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_sr_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_sr_hf_whisper.yaml
index 4f257094d23ed50c6a12720d88fb41987beeb6df..d7390d9a59374591e54f9e69cd40dbf08b0f7ef7 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_sr_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_sr_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -83,45 +83,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -130,6 +115,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/train.py b/recipes/CommonVoice/ASR/transformer/train.py
index 0aee6735efe0f21aaf46bcc2a248477e254233f2..89847d352e28d65dd2b23717b2dc134f89ed65dc 100644
--- a/recipes/CommonVoice/ASR/transformer/train.py
+++ b/recipes/CommonVoice/ASR/transformer/train.py
@@ -107,27 +107,25 @@ class ASR(sb.core.Brain):
 
         # Augment Labels
         if stage == sb.Stage.TRAIN:
+            # Labels must be extended if parallel augmentation or concatenated
+            # augmentation was performed on the input (increasing the time dimension)
             if hasattr(self.hparams, "wav_augment"):
-                tokens = self.hparams.wav_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.wav_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
             if hasattr(self.hparams, "fea_augment"):
-                tokens = self.hparams.fea_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.fea_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/CommonVoice/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml b/recipes/CommonVoice/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
index 5ead6dbc0616594e2c7e6d9225d51895795398d6..e7ceed4f5cf9c0b0c47bc1ed433c9cb8dfe90282 100644
--- a/recipes/CommonVoice/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
+++ b/recipes/CommonVoice/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
@@ -27,11 +27,11 @@ skip_prep: False
 
 
 # We remove utterance slonger than 10s in the train/dev/test sets as
-# longer sentences certainly correspond to "open microphones".
+# longer sentences certainly correspond to open microphones.
 avoid_if_longer_than: 10.0
 avoid_if_shorter_than: 1.0
 
-# Training parameters
+####################### Training Parameters ####################################
 # Parameters are corresponding the the ones reported in the official wav2vec2
 # paper (for the masking).
 mask_length: 10
@@ -52,8 +52,8 @@ ckpt_interval_minutes: 30 # save checkpoint every N min
 # IMPORTANT: To train w2v2 model, we recommand to have the effective batch_size
 # higher than 100 (batch_size * nb_gpu * grad_accumulation_factor)
 # Examples are:
-# 32 Tesla V100 32GB — 12 * 32 * 1
-# 4 Tesla V100 32GB — 12 * 4 * {6-8}
+# 32 Tesla V100 32GB = 12 * 32 * 1
+# 4 Tesla V100 32GB = 12 * 4 * (6-8)
 batch_size: 12
 test_batch_size: 8
 grad_accumulation_factor: 8
@@ -104,7 +104,7 @@ modules:
     wav2vec2: !ref <wav2vec2>
 
 opt_class: !name:torch.optim.AdamW
-    lr: 0 # Will be changed by the scheduler, but we start at 0!
+    lr: 0 # Will be changed by the scheduler, but we start at 0
     betas: (0.9, 0.98)
     eps: 0.000000001
     weight_decay: !ref <weight_decay>
diff --git a/recipes/DNS/enhancement/hparams/sepformer-dns-16k.yaml b/recipes/DNS/enhancement/hparams/sepformer-dns-16k.yaml
index d43a2f0a951de5a2be2a1c6e0a509cd57ee4fc10..87a07c97aba76e5bf0d0e3f500ee261f1b935cec 100644
--- a/recipes/DNS/enhancement/hparams/sepformer-dns-16k.yaml
+++ b/recipes/DNS/enhancement/hparams/sepformer-dns-16k.yaml
@@ -39,7 +39,7 @@ sample_rate: 16000
 audio_length: 4 # seconds
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 100
 batch_size: 4
 batch_size_test: 1
diff --git a/recipes/DVoice/ASR/CTC/hparams/train_amh_with_wav2vec.yaml b/recipes/DVoice/ASR/CTC/hparams/train_amh_with_wav2vec.yaml
index 551ba2c19e1a64e8b0ab5467ec938851b0da86be..e9e1f43100f823790eec7e5d2649688dbb5d5574 100644
--- a/recipes/DVoice/ASR/CTC/hparams/train_amh_with_wav2vec.yaml
+++ b/recipes/DVoice/ASR/CTC/hparams/train_amh_with_wav2vec.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 15.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +58,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -78,45 +78,31 @@ eos_index: 2
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -125,6 +111,7 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
 
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
diff --git a/recipes/DVoice/ASR/CTC/hparams/train_dar_with_wav2vec.yaml b/recipes/DVoice/ASR/CTC/hparams/train_dar_with_wav2vec.yaml
index 0b3647705fde6caf51a350bb86313a24ba657b7a..d1e2c66842028ae18a908eb06ba949e019977d66 100644
--- a/recipes/DVoice/ASR/CTC/hparams/train_dar_with_wav2vec.yaml
+++ b/recipes/DVoice/ASR/CTC/hparams/train_dar_with_wav2vec.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 15.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +58,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -78,45 +78,31 @@ eos_index: 2
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -125,6 +111,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
diff --git a/recipes/DVoice/ASR/CTC/hparams/train_fon_with_wav2vec.yaml b/recipes/DVoice/ASR/CTC/hparams/train_fon_with_wav2vec.yaml
index 946ca0b6ff62e1c9ab049a2e7f59a09f3e1ab491..fca0230de227bd68b964653f04808e43d0e484c9 100644
--- a/recipes/DVoice/ASR/CTC/hparams/train_fon_with_wav2vec.yaml
+++ b/recipes/DVoice/ASR/CTC/hparams/train_fon_with_wav2vec.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 15.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +58,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -78,45 +78,31 @@ eos_index: 2
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -125,6 +111,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
diff --git a/recipes/DVoice/ASR/CTC/hparams/train_multi_with_wav2vec.yaml b/recipes/DVoice/ASR/CTC/hparams/train_multi_with_wav2vec.yaml
index 14aef36c9cd16da21ae4a997ae8e9e47a84186ef..89fedade8f51ce16f72897b6d763993ed0f36d04 100644
--- a/recipes/DVoice/ASR/CTC/hparams/train_multi_with_wav2vec.yaml
+++ b/recipes/DVoice/ASR/CTC/hparams/train_multi_with_wav2vec.yaml
@@ -31,7 +31,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 15.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -57,7 +57,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -77,45 +77,31 @@ eos_index: 2
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -124,6 +110,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
diff --git a/recipes/DVoice/ASR/CTC/hparams/train_sw_with_wav2vec.yaml b/recipes/DVoice/ASR/CTC/hparams/train_sw_with_wav2vec.yaml
index f00e330a4484731402641ba5ff4882bf2ab2b83f..0194fd8776e231be71a999e9f87d5c5b675cdf69 100644
--- a/recipes/DVoice/ASR/CTC/hparams/train_sw_with_wav2vec.yaml
+++ b/recipes/DVoice/ASR/CTC/hparams/train_sw_with_wav2vec.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 15.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +58,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -78,45 +78,31 @@ eos_index: 2
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -125,6 +111,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
diff --git a/recipes/DVoice/ASR/CTC/hparams/train_wol_with_wav2vec.yaml b/recipes/DVoice/ASR/CTC/hparams/train_wol_with_wav2vec.yaml
index b1188dcb9905f182d580f7654cef50e5a3ae05f7..8470ce3a1c81b603cd54093f763c8323a28032cc 100644
--- a/recipes/DVoice/ASR/CTC/hparams/train_wol_with_wav2vec.yaml
+++ b/recipes/DVoice/ASR/CTC/hparams/train_wol_with_wav2vec.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 15.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +58,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -78,45 +78,31 @@ eos_index: 2
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -125,6 +111,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/ESC50/classification/hparams/cnn14_classifier.yaml b/recipes/ESC50/classification/hparams/cnn14_classifier.yaml
index e8034bfdd169bd85bf10eda4504bd5f46a9a8656..bc0a83bbd7415221e3f4c5610363cb0fdfc0ffde 100644
--- a/recipes/ESC50/classification/hparams/cnn14_classifier.yaml
+++ b/recipes/ESC50/classification/hparams/cnn14_classifier.yaml
@@ -41,7 +41,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 200
 batch_size: 32
 lr: 0.0002
diff --git a/recipes/ESC50/classification/hparams/conv2d_classifier.yaml b/recipes/ESC50/classification/hparams/conv2d_classifier.yaml
index 2b0a49bcd36172932655a544e2c436d528392988..284d5681fc5799e1d84bd7ce3c865e60e5b92e46 100644
--- a/recipes/ESC50/classification/hparams/conv2d_classifier.yaml
+++ b/recipes/ESC50/classification/hparams/conv2d_classifier.yaml
@@ -41,7 +41,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 200
 batch_size: 32
 lr: 0.00002
diff --git a/recipes/ESC50/interpret/hparams/l2i_cnn14.yaml b/recipes/ESC50/interpret/hparams/l2i_cnn14.yaml
index 6f57a843b87ddf401379f3fa37bcab852ed7287f..00acd1ff372cb8b3a97e13a11f531181dfcb3502 100644
--- a/recipes/ESC50/interpret/hparams/l2i_cnn14.yaml
+++ b/recipes/ESC50/interpret/hparams/l2i_cnn14.yaml
@@ -39,7 +39,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 200
 batch_size: 2
 lr: 0.0001
diff --git a/recipes/ESC50/interpret/hparams/l2i_conv2dclassifier.yaml b/recipes/ESC50/interpret/hparams/l2i_conv2dclassifier.yaml
index 7292f89affe52105159e68260d673a972580fe39..4f6cb9b909871f749b60b8be054b9ed68b64c6e9 100644
--- a/recipes/ESC50/interpret/hparams/l2i_conv2dclassifier.yaml
+++ b/recipes/ESC50/interpret/hparams/l2i_conv2dclassifier.yaml
@@ -39,7 +39,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 200
 batch_size: 16
 lr: 0.0002
diff --git a/recipes/ESC50/interpret/hparams/nmf.yaml b/recipes/ESC50/interpret/hparams/nmf.yaml
index 7b6c9905da9e1a356adca52933b05aead8986004..e4da313ba1a107174e8e584541fce92d2e811ba0 100644
--- a/recipes/ESC50/interpret/hparams/nmf.yaml
+++ b/recipes/ESC50/interpret/hparams/nmf.yaml
@@ -40,7 +40,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 100
 batch_size: 2
 lr: 0.0002
diff --git a/recipes/ESC50/interpret/hparams/piq.yaml b/recipes/ESC50/interpret/hparams/piq.yaml
index c45f50a20c4a903e9177392213bb03a651c23827..68f8c06deb3f2ab2c13c19b93d31f119b650416a 100644
--- a/recipes/ESC50/interpret/hparams/piq.yaml
+++ b/recipes/ESC50/interpret/hparams/piq.yaml
@@ -42,7 +42,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 200
 batch_size: 16
 lr: 0.0002
diff --git a/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/conformer.yaml b/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/conformer.yaml
index ec8653adeb3e1cf5354cc4d9c0f09ee773dbcf6e..49a7321f79c6268d9058ab16b2a920543e707a9f 100644
--- a/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/conformer.yaml
+++ b/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/conformer.yaml
@@ -81,7 +81,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: !ref <num_workers>
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 256
 nhead: 4
diff --git a/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/transformer.yaml b/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/transformer.yaml
index 59c3782e1282ace82e5b20b6b3f209481c2849e2..4310e2d6b5cc19f3fd8766f062b41a367c27cd1a 100644
--- a/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/transformer.yaml
+++ b/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/transformer.yaml
@@ -91,7 +91,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: !ref <num_workers>
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 256
 nhead: 4
diff --git a/recipes/Google-speech-commands/hparams/xvect.yaml b/recipes/Google-speech-commands/hparams/xvect.yaml
index 8eb842ba9accf3211240bb8fb40c97cc61507190..417cecfdf3b8bd2be10529bcdd4c2515d930cb5b 100644
--- a/recipes/Google-speech-commands/hparams/xvect.yaml
+++ b/recipes/Google-speech-commands/hparams/xvect.yaml
@@ -40,7 +40,7 @@ percentage_silence: 10 # Set this to 0 for the V2 35 task
 skip_prep: False
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 100
 batch_size: 32
 lr: 0.001
diff --git a/recipes/Google-speech-commands/hparams/xvect_leaf.yaml b/recipes/Google-speech-commands/hparams/xvect_leaf.yaml
index e06101850b4a213b43f3dd8764083720ec78c211..f2897af22c1252385255b99cf4dfa5e4fa0a03e3 100644
--- a/recipes/Google-speech-commands/hparams/xvect_leaf.yaml
+++ b/recipes/Google-speech-commands/hparams/xvect_leaf.yaml
@@ -42,7 +42,7 @@ percentage_silence: 10 # Set this to 0 for the V2 35 task
 skip_prep: False
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 100
 batch_size: 32
 lr: 0.001
diff --git a/recipes/IEMOCAP/emotion_recognition/hparams/train_with_wav2vec2.yaml b/recipes/IEMOCAP/emotion_recognition/hparams/train_with_wav2vec2.yaml
index ae34528828cb70df824c2a815ad8a73a501ac01b..d1b63d7bf65549ce4d301b89d86aa2c3686ff3da 100644
--- a/recipes/IEMOCAP/emotion_recognition/hparams/train_with_wav2vec2.yaml
+++ b/recipes/IEMOCAP/emotion_recognition/hparams/train_with_wav2vec2.yaml
@@ -38,7 +38,7 @@ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 batch_size: 4
 lr: 0.0001
@@ -50,7 +50,7 @@ freeze_wav2vec2: False
 # We see an improvement of 2% with freezing CNNs
 freeze_wav2vec2_conv: True
 
-# Model parameters
+####################### Model Parameters #######################################
 encoder_dim: 768
 
 # Number of emotions
diff --git a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu.yaml b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu.yaml
index 05bca5c1ea37f37f166626920433ad10c26ddb9e..3901391a5480ca4f6ad9ddba1a951ef442262402 100644
--- a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu.yaml
+++ b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu.yaml
@@ -30,7 +30,7 @@ wav2vec2_hub: LIA-AvignonUniversity/IWSLT2022-tamasheq-only
 # wav2vec 2.0 specific parameters
 wav2vec2_frozen: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 100
 lr: 0.001
 lr_wav2vec: 0.00001
diff --git a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_mbart_st.yaml b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_mbart_st.yaml
index a3a2f1c994107b1c993c6b5f391b516fc58324e1..6887c3a4084cd6f4d8eff06de83bcf620d2aa939 100644
--- a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_mbart_st.yaml
+++ b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_mbart_st.yaml
@@ -36,7 +36,7 @@ wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 # wav2vec 2.0 specific parameters
 wav2vec2_frozen: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 500
 lr: 0.001
 lr_wav2vec: 0.0001
diff --git a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_nllb_st.yaml b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_nllb_st.yaml
index 11ebc937a7a1edf93d70ee894b2b8c191c57a11b..b86cef685336bf528a8e1bd65941870283121c8e 100644
--- a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_nllb_st.yaml
+++ b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_nllb_st.yaml
@@ -36,7 +36,7 @@ wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 # wav2vec 2.0 specific parameters
 wav2vec2_frozen: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 500
 lr: 0.001
 lr_wav2vec: 0.0001
diff --git a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_mbart_st.yaml b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_mbart_st.yaml
index 68f74d9b4a04e82d927e4a3c5f5749362bb09803..77b7c8cd6ceb36b004fb81e798a65a08dce43025 100644
--- a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_mbart_st.yaml
+++ b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_mbart_st.yaml
@@ -36,7 +36,7 @@ wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 # wav2vec 2.0 specific parameters
 wav2vec2_frozen: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 500
 lr: 0.001
 lr_wav2vec: 0.0001
diff --git a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_nllb_st.yaml b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_nllb_st.yaml
index b62e366f1e05b77ea79a5f25cadc933371f7be31..d384bf3a86cbbe391c645bc4eabd49d1d74dc1cd 100644
--- a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_nllb_st.yaml
+++ b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_nllb_st.yaml
@@ -36,7 +36,7 @@ wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 # wav2vec 2.0 specific parameters
 wav2vec2_frozen: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 500
 lr: 0.001
 lr_wav2vec: 0.0001
diff --git a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_st.yaml b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_st.yaml
index 6bfb9db12708e14546670e9dc87c5d7dd487c773..beafeba864cf0113598622bfe70b390bafc6f18d 100644
--- a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_st.yaml
+++ b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_st.yaml
@@ -36,7 +36,7 @@ wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 wav2vec2_frozen: False
 keep_n_layers: 6 # keep first N layers from the Transformer Encoder stack inside the wav2vec 2.0 model
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 100
 lr: 0.001
 lr_wav2vec: 0.00001
diff --git a/recipes/KsponSpeech/ASR/transformer/hparams/conformer_medium.yaml b/recipes/KsponSpeech/ASR/transformer/hparams/conformer_medium.yaml
index aee256973f98a3d35022ce9ac87470c0412cb281..3c0d43e2ad36b0ee39ca69b1c4b967d530e4fb18 100644
--- a/recipes/KsponSpeech/ASR/transformer/hparams/conformer_medium.yaml
+++ b/recipes/KsponSpeech/ASR/transformer/hparams/conformer_medium.yaml
@@ -34,7 +34,7 @@ test_csv:
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -78,7 +78,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 256
 nhead: 4
diff --git a/recipes/KsponSpeech/LM/hparams/transformer.yaml b/recipes/KsponSpeech/LM/hparams/transformer.yaml
index cd9685e28a2bc016c73a12835491c3f89509ec63..5b64cc196c4d87d6453f152c3b537be168ab1e6f 100644
--- a/recipes/KsponSpeech/LM/hparams/transformer.yaml
+++ b/recipes/KsponSpeech/LM/hparams/transformer.yaml
@@ -24,7 +24,7 @@ test_csv:
 # Tokenizer model
 tokenizer_file: ddwkim/asr-conformer-transformerlm-ksponspeech/tokenizer.ckpt
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 batch_size: 256
 lr: 0.1
diff --git a/recipes/KsponSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml b/recipes/KsponSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml
index dd7cd490646b4b900fe74bc61867ee3cd7135d3a..04ef0ebfd9cf041c20668e89801433d6d5518387 100644
--- a/recipes/KsponSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml
+++ b/recipes/KsponSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml
@@ -16,7 +16,7 @@ skip_prep: False
 train_csv: !ref <output_folder>/train.csv
 valid_csv: !ref <output_folder>/dev.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 5000  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/LibriMix/separation/hparams/sepformer-libri2mix.yaml b/recipes/LibriMix/separation/hparams/sepformer-libri2mix.yaml
index 8fb195c034aa3adc6e82951782d9509f53c589ee..ffa5a1ef2edde5bb435f812248d6b78ee278155a 100644
--- a/recipes/LibriMix/separation/hparams/sepformer-libri2mix.yaml
+++ b/recipes/LibriMix/separation/hparams/sepformer-libri2mix.yaml
@@ -37,7 +37,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/LibriMix/separation/hparams/sepformer-libri3mix.yaml b/recipes/LibriMix/separation/hparams/sepformer-libri3mix.yaml
index cf68e9a8110596af9e6bf2d6b206ba0d975d600f..abc9c76c7e07df2847eea7e551cd7270fa14ea00 100644
--- a/recipes/LibriMix/separation/hparams/sepformer-libri3mix.yaml
+++ b/recipes/LibriMix/separation/hparams/sepformer-libri3mix.yaml
@@ -37,7 +37,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/LibriParty/VAD/hparams/train.yaml b/recipes/LibriParty/VAD/hparams/train.yaml
index e07258c44b4b3a8886e9157c6033ac32e5b7277c..be91916855eb846b941aecf5d2b648ce26acbf2a 100644
--- a/recipes/LibriParty/VAD/hparams/train.yaml
+++ b/recipes/LibriParty/VAD/hparams/train.yaml
@@ -41,7 +41,7 @@ speech_csv: !ref <save_folder>/speech.csv
 multilang_speech_csv: !ref <save_folder>/multilang_speech.csv
 skip_prep: False # Skip data preparation
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 100
 lr: 1.0
 lr_final: 0.1
@@ -65,7 +65,7 @@ test_dataloader_opts:
 n_fft: 400
 n_mels: 40
 
-# Model parameters
+####################### Model Parameters #######################################
 # activation: !name:torch.nn.LeakyReLU
 # dropout: 0.15
 # cnn_blocks: 2
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_average_downsampling.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_average_downsampling.yaml
index b609bb766291635878dae6b4311712c88302243b..fdbd7e86d8364dc02fecae7127a94927800c8b13 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_average_downsampling.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_average_downsampling.yaml
@@ -33,7 +33,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 1
 lr: 0.9
 lr_wav2vec: 0.0001
@@ -62,7 +63,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -73,76 +75,12 @@ ctc_neurons: 29
 output_neurons: 29  # Characters size, index(blank/eos/bos) = 0
 blank_index: 0
 
-# Decoding parameters
-test_beam_search:
-   beam_size: 200
-   topk: 1
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -10.0
-   token_prune_min_logp: -5
-   prune_history: True
-   alpha: 0.5
-   beta: 1.5
-   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
-   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
-   # If you don't want to use an LM, comment it out or set it to null
-   kenlm_model_path: null
-
 #
 # Functions and classes
 #
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
-
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
    input_shape: [null, null, 1024]
    activation: !ref <activation>
@@ -211,8 +149,60 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.9
    patient: 0
 
+############################## Decoding ########################################
+
+test_beam_search:
+   beam_size: 200
+   topk: 1
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -10.0
+   token_prune_min_logp: -5
+   prune_history: True
+   alpha: 0.5
+   beta: 1.5
+   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
+   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
+   # If you don't want to use an LM, comment it out or set it to null
+   kenlm_model_path: null
+
 label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
 
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_conv_downsampling.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_conv_downsampling.yaml
index f92f4f8fca27d3ef438377575b6c5881c0d80ade..1b84596dcd99a075af3d2266b44d04395031d619 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_conv_downsampling.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_conv_downsampling.yaml
@@ -34,7 +34,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 1
 lr: 0.9
 lr_wav2vec: 0.0001
@@ -63,7 +64,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -74,76 +76,12 @@ ctc_neurons: 29
 output_neurons: 29  # Characters size, index(blank/eos/bos) = 0
 blank_index: 0
 
-# Decoding parameters
-test_beam_search:
-   beam_size: 200
-   topk: 1
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -10.0
-   token_prune_min_logp: -5
-   prune_history: True
-   alpha: 0.5
-   beta: 1.5
-   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
-   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
-   # If you don't want to use an LM, comment it out or set it to null
-   kenlm_model_path: null
-
-
 #
 # Functions and classes
 #
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
    input_shape: [null, null, 1024]
@@ -214,8 +152,60 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.9
    patient: 0
 
+############################## Decoding ########################################
+
+test_beam_search:
+   beam_size: 200
+   topk: 1
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -10.0
+   token_prune_min_logp: -5
+   prune_history: True
+   alpha: 0.5
+   beta: 1.5
+   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
+   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
+   # If you don't want to use an LM, comment it out or set it to null
+   kenlm_model_path: null
+
 label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
 
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_signal_downsampling.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_signal_downsampling.yaml
index 6c0e7207df65fafc4630b400a98800fa9989e245..d0daf5b77759c5b87410893abd71e77ea00725c2 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_signal_downsampling.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_signal_downsampling.yaml
@@ -33,7 +33,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 1
 lr: 0.9
 lr_wav2vec: 0.0001
@@ -61,7 +62,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -72,75 +74,12 @@ ctc_neurons: 58 # Twice bigger than the  number of characters for upsampling
 output_neurons: 29  # Characters size, index(blank/eos/bos) = 0
 blank_index: 0
 
-# Decoding parameters
-test_beam_search:
-   beam_size: 200
-   topk: 1
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -10.0
-   token_prune_min_logp: -5
-   prune_history: True
-   alpha: 0.5
-   beta: 1.5
-   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
-   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
-   # If you don't want to use an LM, comment it out or set it to null
-   kenlm_model_path: null
-
 #
 # Functions and classes
 #
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
    input_shape: [null, null, 1024]
@@ -210,8 +149,58 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.9
    patient: 0
 
+############################## Decoding ########################################
+
+test_beam_search:
+   beam_size: 200
+   topk: 1
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -10.0
+   token_prune_min_logp: -5
+   prune_history: True
+   alpha: 0.5
+   beta: 1.5
+   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
+   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
+   # If you don't want to use an LM, comment it out or set it to null
+   kenlm_model_path: null
+
 label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
 
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec.yaml
index 2d91909f229b7f816f058bc5f8e608d2e4041c5f..1d860a29f1cf0955b277e934748cde06db9d516f 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec.yaml
@@ -32,7 +32,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 1
 lr: 0.9
 lr_wav2vec: 0.0001
@@ -56,7 +57,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -66,75 +67,14 @@ freeze_wav2vec: True
 output_neurons: 29  # BPE size, index(blank/eos/bos) = 0
 blank_index: 0
 
-# Decoding parameters
-test_beam_search:
-   beam_size: 143
-   topk: 1
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -12.0
-   token_prune_min_logp: -1.2
-   prune_history: True
-   alpha: 0.8
-   beta: 1.2
-   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
-   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
-   # If you don't want to use an LM, comment it out or set it to null
-   kenlm_model_path: null
-
 #
 # Functions and classes
 #
-epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
-   limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
+label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
 
+epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
+   limit: !ref <number_of_epochs>
 
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
    input_shape: [null, null, 1024]
@@ -198,7 +138,58 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.9
    patient: 0
 
-label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Decoding ########################################
+
+# Decoding parameters
+test_beam_search:
+   beam_size: 143
+   topk: 1
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -12.0
+   token_prune_min_logp: -1.2
+   prune_history: True
+   alpha: 0.8
+   beta: 1.2
+   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
+   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
+   # If you don't want to use an LM, comment it out or set it to null
+   kenlm_model_path: null
+
+############################## Logging and Pretrainer ##########################
 
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_rnn_rescoring.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_rnn_rescoring.yaml
index 01a31cdd86b0e8da7db2bc59c8802955b5161612..c946b024314e5c26813bf5d4253f1bf86e9ce870 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_rnn_rescoring.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_rnn_rescoring.yaml
@@ -32,7 +32,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 1
 lr: 0.9
 lr_wav2vec: 0.0001
@@ -56,7 +57,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -68,49 +70,6 @@ output_neurons: 29  # BPE size, index(blank/eos/bos) = 0
 
 pretrained_lm_tokenizer_path: speechbrain/asr-crdnn-rnnlm-librispeech
 
-# This is the RNNLM that is used according to the Huggingface repository
-# NB: It has to match the pre-trained RNNLM!!
-lm_model: !new:speechbrain.lobes.models.RNNLM.RNNLM
-   output_neurons: 1000
-   embedding_dim: 128
-   activation: !name:torch.nn.LeakyReLU
-   dropout: 0.0
-   rnn_layers: 2
-   rnn_neurons: 2048
-   dnn_blocks: 1
-   dnn_neurons: 512
-   return_hidden: True  # For inference
-
-tokenizer: !new:sentencepiece.SentencePieceProcessor
-
-# Decoding parameters
-lm_weight: 0.5
-blank_index: 0
-# topk is the number of hypotheses that will be rescored in the rescorer
-# lowering this value might decrease the wer, but will increase speed.
-
-test_beam_search:
-   beam_size: 20
-   topk: 20
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -12.0
-   token_prune_min_logp: -12.0
-   prune_history: False
-   alpha: 0.8
-   beta: 1.2
-
-rnnlm: !new:speechbrain.decoders.scorer.RNNLMRescorer
-   language_model: !ref <lm_model>
-   tokenizer: !ref <tokenizer>
-   bos_index: 0
-   eos_index: 0
-   pad_index: 0
-
-rescorer: !new:speechbrain.decoders.scorer.RescorerBuilder
-   rescorers: [!ref <rnnlm>]
-   weights:
-      rnnlm: !ref <lm_weight>
 
 #
 # Functions and classes
@@ -118,53 +77,6 @@ rescorer: !new:speechbrain.decoders.scorer.RescorerBuilder
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
    input_shape: [null, null, 1024]
@@ -230,6 +142,84 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
 
 label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
 
+# This is the RNNLM that is used according to the Huggingface repository
+# NB: It has to match the pre-trained RNNLM!!
+lm_model: !new:speechbrain.lobes.models.RNNLM.RNNLM
+   output_neurons: 1000
+   embedding_dim: 128
+   activation: !name:torch.nn.LeakyReLU
+   dropout: 0.0
+   rnn_layers: 2
+   rnn_neurons: 2048
+   dnn_blocks: 1
+   dnn_neurons: 512
+   return_hidden: True  # For inference
+
+
+tokenizer: !new:sentencepiece.SentencePieceProcessor
+
+############################## Decoding ########################################
+
+# topk is the number of hypotheses that will be rescored in the rescorer
+# lowering this value might decrease the wer, but will increase speed.
+test_beam_search:
+   beam_size: 20
+   topk: 20
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -12.0
+   token_prune_min_logp: -12.0
+   prune_history: False
+   alpha: 0.8
+   beta: 1.2
+
+rnnlm: !new:speechbrain.decoders.scorer.RNNLMRescorer
+   language_model: !ref <lm_model>
+   tokenizer: !ref <tokenizer>
+   bos_index: 0
+   eos_index: 0
+   pad_index: 0
+
+rescorer: !new:speechbrain.decoders.scorer.RescorerBuilder
+   rescorers: [!ref <rnnlm>]
+   weights:
+      rnnlm: !ref <lm_weight>
+
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_transformer_rescoring.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_transformer_rescoring.yaml
index 724c3bf1eafe4d7ade5cc231c878afcb94db3162..d806b20cfd5ebf408d0f8b60e83909521563288a 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_transformer_rescoring.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_transformer_rescoring.yaml
@@ -32,7 +32,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 1
 lr: 0.9
 lr_wav2vec: 0.0001
@@ -56,7 +57,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -88,30 +89,6 @@ tokenizer: !new:sentencepiece.SentencePieceProcessor
 # Decoding parameters
 lm_weight: 0.5
 blank_index: 0
-# topk is the number of hypotheses that will be rescored in the rescorer
-# lowering this value might decrease the wer, but will increase speed.
-test_beam_search:
-   beam_size: 20
-   topk: 20
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -12.0
-   token_prune_min_logp: -12.0
-   prune_history: False
-   alpha: 0.8
-   beta: 1.2
-
-transformerlm: !new:speechbrain.decoders.scorer.TransformerLMRescorer
-   language_model: !ref <lm_model>
-   tokenizer: !ref <tokenizer>
-   pad_index: 0
-   bos_index: 1
-   eos_index: 2
-
-rescorer: !new:speechbrain.decoders.scorer.RescorerBuilder
-   rescorers: [!ref <transformerlm>]
-   weights:
-      transformerlm: !ref <lm_weight>
 
 #
 # Functions and classes
@@ -119,53 +96,6 @@ rescorer: !new:speechbrain.decoders.scorer.RescorerBuilder
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
    input_shape: [null, null, 1024]
@@ -229,8 +159,68 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.9
    patient: 0
 
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
 label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
 
+############################## Decoding ########################################
+
+# topk is the number of hypotheses that will be rescored in the rescorer
+# lowering this value might decrease the wer, but will increase speed.
+test_beam_search:
+   beam_size: 20
+   topk: 20
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -12.0
+   token_prune_min_logp: -12.0
+   prune_history: False
+   alpha: 0.8
+   beta: 1.2
+
+transformerlm: !new:speechbrain.decoders.scorer.TransformerLMRescorer
+   language_model: !ref <lm_model>
+   tokenizer: !ref <tokenizer>
+   pad_index: 0
+   bos_index: 1
+   eos_index: 2
+
+rescorer: !new:speechbrain.decoders.scorer.RescorerBuilder
+   rescorers: [!ref <transformerlm>]
+   weights:
+      transformerlm: !ref <lm_weight>
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_whisper_encoder.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_whisper_encoder.yaml
index 735b29db919073fd3062ce5a184c42c2e23a27cf..ba20bf2acfc55dc14ff95b92d070f69261321761 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_whisper_encoder.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_whisper_encoder.yaml
@@ -31,7 +31,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 15
 warmup_steps: 1000 # We freeze whisper for 1000 steps to let the CTC adapt
 lr: 0.0008
@@ -61,7 +62,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
 dnn_neurons: 1024
 freeze_whisper: False
 whisper_output_dim: 512
@@ -71,74 +72,12 @@ whisper_output_dim: 512
 output_neurons: 29  # BPE size, index(blank/eos/bos) = 0
 blank_index: 0
 
-# Decoding parameters
-test_beam_search:
-   beam_size: 143
-   topk: 1
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -12.0
-   token_prune_min_logp: -1.2
-   prune_history: True
-   alpha: 0.8
-   beta: 1.2
-   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
-   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
-   # If you don't want to use an LM, comment it out or set it to null
-   kenlm_model_path: null
 #
 # Functions and classes
 #
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 
 enc: !new:speechbrain.nnet.containers.Sequential
    input_shape: [null, null, !ref <whisper_output_dim>]
@@ -204,6 +143,57 @@ lr_annealing_whisper: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.75
    patient: 0
 
+############################## Decoding ########################################
+
+test_beam_search:
+   beam_size: 143
+   topk: 1
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -12.0
+   token_prune_min_logp: -1.2
+   prune_history: True
+   alpha: 0.8
+   beta: 1.2
+   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
+   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
+   # If you don't want to use an LM, comment it out or set it to null
+   kenlm_model_path: null
+
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
 
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/train_sb_wav2vec.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/train_sb_wav2vec.yaml
index cf9cf7ec89110d79e0e7223e44d8ffbbfac75081..1b281b35c5afc35265592e26d4f3ff04d03a8d5b 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/train_sb_wav2vec.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/train_sb_wav2vec.yaml
@@ -33,7 +33,7 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 0.0003
 lr_wav2vec: 0.00005
@@ -58,7 +58,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
 dnn_activation: !new:torch.nn.LeakyReLU
 dnn_neurons: 1280
 dnn_dropout: 0.15
@@ -68,75 +68,12 @@ freeze_wav2vec: False
 output_neurons: 29  # BPE size, index(blank/eos/bos) = 0
 blank_index: 0
 
-# Decoding parameters
-test_beam_search:
-   beam_size: 200
-   topk: 1
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -10.0
-   token_prune_min_logp: -5.0
-   prune_history: True
-   alpha: 0.8
-   beta: 1.2
-   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
-   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
-   # If you don't want to use an LM, comment it out or set it to null
-   kenlm_model_path: null
-
 #
 # Functions and classes
 #
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 
 enc: !new:speechbrain.nnet.containers.Sequential
    input_shape: [null, null, !ref <wav2vec_output_dim>]
@@ -227,6 +164,58 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.7
    patient: 0
 
+############################## Decoding ########################################
+
+test_beam_search:
+   beam_size: 200
+   topk: 1
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -10.0
+   token_prune_min_logp: -5.0
+   prune_history: True
+   alpha: 0.8
+   beta: 1.2
+   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
+   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
+   # If you don't want to use an LM, comment it out or set it to null
+   kenlm_model_path: null
+
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/CTC/train_with_wav2vec.py b/recipes/LibriSpeech/ASR/CTC/train_with_wav2vec.py
index f2b6373b2ae5d476dc1c372c0513564b94c2191c..1f4ccdd2c6bdcdf62563989fb2c4d2e5de916fd6 100644
--- a/recipes/LibriSpeech/ASR/CTC/train_with_wav2vec.py
+++ b/recipes/LibriSpeech/ASR/CTC/train_with_wav2vec.py
@@ -101,10 +101,15 @@ class ASR(sb.Brain):
         ids = batch.id
         tokens, tokens_lens = batch.tokens
 
-        # Label Augmentation
+        # Labels must be extended if parallel augmentation or concatenated
+        # augmentation was performed on the input (increasing the time dimension)
         if stage == sb.Stage.TRAIN and hasattr(self.hparams, "wav_augment"):
-            tokens = self.hparams.wav_augment.replicate_labels(tokens)
-            tokens_lens = self.hparams.wav_augment.replicate_labels(tokens_lens)
+            (
+                tokens,
+                tokens_lens,
+            ) = self.hparams.wav_augment.replicate_multiple_labels(
+                tokens, tokens_lens
+            )
 
         loss_ctc = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
         loss = loss_ctc
diff --git a/recipes/LibriSpeech/ASR/CTC/train_with_whisper.py b/recipes/LibriSpeech/ASR/CTC/train_with_whisper.py
index e3db36334d2ec34cab95d858ea427cd00993b9e1..d575265e86f0749f1cec8f30e26241b08c07d281 100644
--- a/recipes/LibriSpeech/ASR/CTC/train_with_whisper.py
+++ b/recipes/LibriSpeech/ASR/CTC/train_with_whisper.py
@@ -72,10 +72,15 @@ class ASR(sb.Brain):
         ids = batch.id
         tokens, tokens_lens = batch.tokens
 
-        # Label Augmentation
+        # Labels must be extended if parallel augmentation or concatenated
+        # augmentation was performed on the input (increasing the time dimension)
         if stage == sb.Stage.TRAIN and hasattr(self.hparams, "wav_augment"):
-            tokens = self.hparams.wav_augment.replicate_labels(tokens)
-            tokens_lens = self.hparams.wav_augment.replicate_labels(tokens_lens)
+            (
+                tokens,
+                tokens_lens,
+            ) = self.hparams.wav_augment.replicate_multiple_labels(
+                tokens, tokens_lens
+            )
 
         loss_ctc = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
         loss = loss_ctc
diff --git a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000.yaml b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000.yaml
index e38b545fb3901046869bfaf96f7e3cb2a96422f4..3d0aaa200486f70767bec1c3e20c9058a0c978f1 100644
--- a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000.yaml
+++ b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000.yaml
@@ -44,7 +44,8 @@ data_folder_noise: !ref <data_folder>/noise # The noisy sequencies for data augm
 NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 15
 number_of_ctc_epochs: 5
 batch_size: 8
@@ -89,7 +90,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: 1
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -128,74 +130,6 @@ coverage_penalty: 1.5
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Download and prepare the dataset of noisy sequences for augmentation
-prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
-   URL: !ref <NOISE_DATASET_URL>
-   dest_folder: !ref <data_folder_noise>
-   ext: wav
-   csv_file: !ref <noise_annotation>
-
-
-# Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
-add_noise: !new:speechbrain.augment.time_domain.AddNoise
-   csv_file: !ref <noise_annotation>
-   snr_low: !ref <snr_low>
-   snr_high: !ref <snr_high>
-   noise_sample_rate: !ref <sample_rate>
-   clean_sample_rate: !ref <sample_rate>
-   num_workers: !ref <num_workers>
-
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <add_noise>,
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 normalize: !new:speechbrain.processing.features.InputNormalization
    norm_type: global
 
@@ -288,7 +222,8 @@ modules:
 model: !new:torch.nn.ModuleList
    - [!ref <enc>, !ref <emb>, !ref <dec>, !ref <ctc_lin>, !ref <seq_lin>]
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 coverage_scorer: !new:speechbrain.decoders.scorer.CoverageScorer
    vocab_size: !ref <output_neurons>
 
@@ -339,6 +274,57 @@ lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.8
    patient: 0
 
+############################## Augmentations ###################################
+
+prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
+   URL: !ref <NOISE_DATASET_URL>
+   dest_folder: !ref <data_folder_noise>
+   ext: wav
+   csv_file: !ref <noise_annotation>
+
+# Add noise to input signal
+add_noise: !new:speechbrain.augment.time_domain.AddNoise
+   csv_file: !ref <noise_annotation>
+   snr_low: 0
+   snr_high: 15
+   noise_sample_rate: !ref <sample_rate>
+   clean_sample_rate: !ref <sample_rate>
+   num_workers: !ref <num_workers>
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <add_noise>,
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000_sligru.yaml b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000_sligru.yaml
index 164f1ffe7431875b3c4ae5485b21be4c643beb34..355c49d36be26aeed18eac40ef91ea9249d13905 100644
--- a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000_sligru.yaml
+++ b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000_sligru.yaml
@@ -44,7 +44,8 @@ data_folder_noise: !ref <data_folder>/noise # The noisy sequencies for data augm
 NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 15
 number_of_ctc_epochs: 15
 batch_size: 24
@@ -89,7 +90,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: 1
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -126,75 +128,6 @@ temperature_lm: 1.25
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Download and prepare the dataset of noisy sequences for augmentation
-prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
-   URL: !ref <NOISE_DATASET_URL>
-   dest_folder: !ref <data_folder_noise>
-   ext: wav
-   csv_file: !ref <noise_annotation>
-
-
-# Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
-add_noise: !new:speechbrain.augment.time_domain.AddNoise
-   csv_file: !ref <noise_annotation>
-   snr_low: !ref <snr_low>
-   snr_high: !ref <snr_high>
-   noise_sample_rate: !ref <sample_rate>
-   clean_sample_rate: !ref <sample_rate>
-   num_workers: !ref <num_workers>
-
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <add_noise>,
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
-
 normalize: !new:speechbrain.processing.features.InputNormalization
    norm_type: global
 
@@ -286,7 +219,8 @@ modules:
 model: !new:torch.nn.ModuleList
    - [!ref <enc>, !ref <emb>, !ref <dec>, !ref <ctc_lin>, !ref <seq_lin>]
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 coverage_scorer: !new:speechbrain.decoders.scorer.CoverageScorer
    vocab_size: !ref <output_neurons>
 
@@ -337,6 +271,57 @@ lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.8
    patient: 0
 
+############################## Augmentations ###################################
+
+prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
+   URL: !ref <NOISE_DATASET_URL>
+   dest_folder: !ref <data_folder_noise>
+   ext: wav
+   csv_file: !ref <noise_annotation>
+
+# Add noise to input signal
+add_noise: !new:speechbrain.augment.time_domain.AddNoise
+   csv_file: !ref <noise_annotation>
+   snr_low: 0
+   snr_high: 15
+   noise_sample_rate: !ref <sample_rate>
+   clean_sample_rate: !ref <sample_rate>
+   num_workers: !ref <num_workers>
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <add_noise>,
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_5000.yaml b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_5000.yaml
index cc0647562067be9cd70dfb69a03d40f2c68287ab..3046dfea80643d7c90025e21e33a830f6b615613 100644
--- a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_5000.yaml
+++ b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_5000.yaml
@@ -45,7 +45,8 @@ data_folder_noise: !ref <data_folder>/noise # The noisy sequencies for data augm
 NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 25
 number_of_ctc_epochs: 25
 batch_size: 8
@@ -90,7 +91,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: 1
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -132,75 +134,6 @@ coverage_penalty: 1.5
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Download and prepare the dataset of noisy sequences for augmentation
-prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
-   URL: !ref <NOISE_DATASET_URL>
-   dest_folder: !ref <data_folder_noise>
-   ext: wav
-   csv_file: !ref <noise_annotation>
-
-
-# Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
-add_noise: !new:speechbrain.augment.time_domain.AddNoise
-   csv_file: !ref <noise_annotation>
-   snr_low: !ref <snr_low>
-   snr_high: !ref <snr_high>
-   noise_sample_rate: !ref <sample_rate>
-   clean_sample_rate: !ref <sample_rate>
-   num_workers: !ref <num_workers>
-
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <add_noise>,
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
-
 normalize: !new:speechbrain.processing.features.InputNormalization
    norm_type: global
 
@@ -294,7 +227,8 @@ modules:
 model: !new:torch.nn.ModuleList
    - [!ref <enc>, !ref <emb>, !ref <dec>, !ref <ctc_lin>, !ref <seq_lin>]
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
    eos_index: !ref <eos_index>
    blank_index: !ref <blank_index>
@@ -360,6 +294,57 @@ lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.8
    patient: 0
 
+############################## Augmentations ###################################
+
+prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
+   URL: !ref <NOISE_DATASET_URL>
+   dest_folder: !ref <data_folder_noise>
+   ext: wav
+   csv_file: !ref <noise_annotation>
+
+# Add noise to input signal
+add_noise: !new:speechbrain.augment.time_domain.AddNoise
+   csv_file: !ref <noise_annotation>
+   snr_low: 0
+   snr_high: 15
+   noise_sample_rate: !ref <sample_rate>
+   clean_sample_rate: !ref <sample_rate>
+   num_workers: !ref <num_workers>
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <add_noise>,
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/seq2seq/train.py b/recipes/LibriSpeech/ASR/seq2seq/train.py
index b3adaa67ab1d799211ebda84c01653a2a59d825f..7f535100876f9b05a935b30af641bc66f47dc168 100644
--- a/recipes/LibriSpeech/ASR/seq2seq/train.py
+++ b/recipes/LibriSpeech/ASR/seq2seq/train.py
@@ -97,12 +97,16 @@ class ASR(sb.Brain):
         tokens_eos, tokens_eos_lens = batch.tokens_eos
         tokens, tokens_lens = batch.tokens
 
+        # Labels must be extended if parallel augmentation or concatenated
+        # augmentation was performed on the input (increasing the time dimension)
         if stage == sb.Stage.TRAIN and hasattr(self.hparams, "wav_augment"):
-            tokens = self.hparams.wav_augment.replicate_labels(tokens)
-            tokens_lens = self.hparams.wav_augment.replicate_labels(tokens_lens)
-            tokens_eos = self.hparams.wav_augment.replicate_labels(tokens_eos)
-            tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                tokens_eos_lens
+            (
+                tokens,
+                tokens_lens,
+                tokens_eos,
+                tokens_eos_lens,
+            ) = self.hparams.wav_augment.replicate_multiple_labels(
+                tokens, tokens_lens, tokens_eos, tokens_eos_lens
             )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/LibriSpeech/ASR/transducer/hparams/conformer_transducer.yaml b/recipes/LibriSpeech/ASR/transducer/hparams/conformer_transducer.yaml
index c7ad99c6386a4626a4c5a29e75920818d197ebf0..e9757e2080cc29edbd96c459f2b5552d0d17655c 100644
--- a/recipes/LibriSpeech/ASR/transducer/hparams/conformer_transducer.yaml
+++ b/recipes/LibriSpeech/ASR/transducer/hparams/conformer_transducer.yaml
@@ -40,7 +40,8 @@ test_csv:
 skip_prep: False
 ckpt_interval_minutes: 5 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -120,7 +121,8 @@ dynamic_batch_sampler:
    batch_ordering: random
    max_batch_ex: 256
 
-# Model parameters
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 512
 joint_dim: 640
@@ -164,18 +166,15 @@ compute_features: !new:speechbrain.lobes.features.Fbank
    n_mels: !ref <n_mels>
    win_length: !ref <win_length>
 
+############################## Augmentations ###################################
+
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
    orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
+   speeds: [95, 100, 105]
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: False
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 1
    max_augmentations: 1
    augment_prob: 1.0
@@ -183,43 +182,24 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
 
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <time_drop_length_low>
-   drop_length_high: !ref <time_drop_length_high>
-   drop_count_low: !ref <time_drop_count_low>
-   drop_count_high: !ref <time_drop_count_high>
-   replace: !ref <time_drop_replace>
-   dim: 1
+   drop_length_low: 15
+   drop_length_high: 25
+   drop_count_low: 5
+   drop_count_high: 5
+   replace: "zeros"
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <freq_drop_length_low>
-   drop_length_high: !ref <freq_drop_length_high>
-   drop_count_low: !ref <freq_drop_count_low>
-   drop_count_high: !ref <freq_drop_count_high>
-   replace: !ref <freq_drop_replace>
+   drop_length_low: 25
+   drop_length_high: 35
+   drop_count_low: 2
+   drop_count_high: 2
+   replace: "zeros"
    dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-   warp_window: !ref <time_warp_window>
-   warp_mode: !ref <time_warp_mode>
-   dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
    parallel_augment: False
@@ -234,6 +214,8 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
       !ref <freq_drop>,
       !ref <time_warp>]
 
+############################## Models ##########################################
+
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
    input_shape: (8, 10, 80)
    num_blocks: 2
@@ -355,6 +337,8 @@ modules:
 model: !new:torch.nn.ModuleList
    - [!ref <CNN>, !ref <enc>, !ref <emb>, !ref <dec>, !ref <proj_enc>, !ref <proj_dec>, !ref <proj_ctc>, !ref <transducer_lin>]
 
+############################## Decoding & optimiser ############################
+
 # Tokenizer initialization
 tokenizer: !new:sentencepiece.SentencePieceProcessor
 
@@ -388,6 +372,8 @@ noam_annealing: !new:speechbrain.nnet.schedulers.NoamScheduler
    lr_initial: !ref <lr>
    n_warmup_steps: !ref <warmup_steps>
 
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/transducer/train.py b/recipes/LibriSpeech/ASR/transducer/train.py
index 497912c83b97e10cfea19ec78974195e065123a2..84d7e05ffa3701c172be2c79765a59bade434d30 100644
--- a/recipes/LibriSpeech/ASR/transducer/train.py
+++ b/recipes/LibriSpeech/ASR/transducer/train.py
@@ -155,27 +155,16 @@ class ASR(sb.Brain):
             logits_transducer, wav_lens, predicted_tokens = predictions
 
         if stage == sb.Stage.TRAIN:
-            if hasattr(self.hparams, "wav_augment"):
-                tokens = self.hparams.wav_augment.replicate_labels(tokens)
-                token_lens = self.hparams.wav_augment.replicate_labels(
-                    token_lens
-                )
-                tokens_eos = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos
-                )
-                token_eos_lens = self.hparams.wav_augment.replicate_labels(
-                    token_eos_lens
-                )
+            # Labels must be extended if parallel augmentation or concatenated
+            # augmentation was performed on the input (increasing the time dimension)
             if hasattr(self.hparams, "fea_augment"):
-                tokens = self.hparams.fea_augment.replicate_labels(tokens)
-                token_lens = self.hparams.fea_augment.replicate_labels(
-                    token_lens
-                )
-                tokens_eos = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos
-                )
-                token_eos_lens = self.hparams.fea_augment.replicate_labels(
-                    token_eos_lens
+                (
+                    tokens,
+                    token_lens,
+                    tokens_eos,
+                    token_eos_lens,
+                ) = self.hparams.fea_augment.replicate_multiple_labels(
+                    tokens, token_lens, tokens_eos, token_eos_lens
                 )
 
         if stage == sb.Stage.TRAIN:
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/bayesspeech.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/bayesspeech.yaml
index 7772517e905a8482527334a7f923dbefa6cc85b0..2eee3646ecc87e806ae067ba4417178a17dbb021 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/bayesspeech.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/bayesspeech.yaml
@@ -42,7 +42,8 @@ test_csv:
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -116,7 +117,7 @@ test_dataloader_opts:
         padding_kwargs:
             value: !ref <pad_index>
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
 # Transformer
 d_model: 512
 nhead: 4
@@ -148,7 +149,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -216,7 +217,8 @@ Adam: !name:torch.optim.Adam
     eps: 0.000000001
 
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -290,57 +292,34 @@ normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
     update_until_epoch: 4
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
 # Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -354,6 +333,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/branchformer_large.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/branchformer_large.yaml
index c3f66ec9f3faa95c53320d5dec12e3ceba1d9a61..02fc2eac46b99dfa8801fb986f0fbf0a507ffde9 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/branchformer_large.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/branchformer_large.yaml
@@ -41,9 +41,11 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
-# The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
+# The global batch size is computed as batch_size * n_gpus *
+# grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
 # Please, set your parameters accordingly.
 number_of_epochs: 120
@@ -103,7 +105,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 512
 nhead: 8
@@ -131,7 +134,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -204,7 +207,8 @@ Adam: !name:torch.optim.AdamW
     eps: 0.000000001
     weight_decay: !ref <weight_decay>
 
-# Scorer
+####################### Decoding & optimiser ###################################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -275,57 +279,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+####################### Augmentations ###########################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -340,6 +321,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     win_length: !ref <win_length>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/conformer_large.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/conformer_large.yaml
index 5d252a4b59f3d6603bd6b4a7ef95cd79df19d21a..7cdd4c06f91cac155d273ba9a9f9edd7dcc52885 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/conformer_large.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/conformer_large.yaml
@@ -41,7 +41,8 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -102,7 +103,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 512
 nhead: 8
@@ -129,7 +131,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -200,7 +202,8 @@ Adam: !name:torch.optim.AdamW
 model: !new:torch.nn.ModuleList
     - [!ref <CNN>, !ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
 
-# Scorer
+####################### Decoding & optimiser ###########################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -271,57 +274,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -336,6 +316,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_mels: !ref <n_mels>
     win_length: !ref <win_length>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/conformer_small.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/conformer_small.yaml
index eddc967800ccabb834d3c3751feba187b0262ad0..a24e6649a28488cf2d3cdf4a29242717134d6ae8 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/conformer_small.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/conformer_small.yaml
@@ -40,7 +40,8 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -102,7 +103,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 144
 nhead: 4
@@ -129,7 +131,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -200,12 +202,8 @@ Adam: !name:torch.optim.Adam
     betas: (0.9, 0.98)
     eps: 0.000000001
 
-#SGD: !name:torch.optim.SGD
-#    lr: !ref <lr_sgd>
-#    momentum: 0.99
-#    nesterov: True
+############################## Decoding & optimiser ############################
 
-# Scorer
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -277,57 +275,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -341,6 +316,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_13M.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_13M.yaml
index 7b2912ec33a8d0b6efe84a29048654ee1004442b..4b6ca718f306a9d81a406d4c92c6d8536aa9721a 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_13M.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_13M.yaml
@@ -40,7 +40,8 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+############################## Training Parameters #############################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -99,7 +100,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 144
 nhead: 8
@@ -133,7 +135,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -206,7 +208,8 @@ Adam: !name:torch.optim.Adam
     betas: (0.9, 0.98)
     eps: 0.000000001
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -276,57 +279,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -340,6 +320,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_25M.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_25M.yaml
index 71d97cdb9f2920b8cb6a86a554e7d59b06148370..2e0242e311442c5ece15bdf30b2287f054a1475d 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_25M.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_25M.yaml
@@ -40,7 +40,8 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+############################## Training Parameters #############################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -100,7 +101,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
 # Transformer
 d_model: 256
 nhead: 8
@@ -134,7 +135,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -207,7 +208,8 @@ Adam: !name:torch.optim.Adam
     betas: (0.9, 0.98)
     eps: 0.000000001
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -277,57 +279,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -335,11 +314,14 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <time_drop>,
         !ref <freq_drop>,
         !ref <time_warp>]
+
 compute_features: !new:speechbrain.lobes.features.Fbank
     sample_rate: !ref <sample_rate>
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_22M.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_22M.yaml
index fdf65fde380ce806eb6216bf2fb5cb308c30a011..6e165ed5c65424d1432c3c7c39f6615175830ed5 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_22M.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_22M.yaml
@@ -40,7 +40,8 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -87,6 +88,7 @@ dynamic_batch_sampler_valid:
     batch_ordering: !ref <batch_ordering>
     max_batch_ex: !ref <max_batch_ex>
 
+
 # Dataloader options
 train_dataloader_opts:
     batch_size: !ref <batch_size>
@@ -99,7 +101,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 256
 nhead: 8
@@ -132,7 +135,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -203,7 +206,8 @@ Adam: !name:torch.optim.Adam
     betas: (0.9, 0.98)
     eps: 0.000000001
 
-# Scorer
+####################### Decoding & optimiser ###################################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -273,57 +277,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -337,6 +318,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_8M.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_8M.yaml
index 47053b5fff683fd287339d20a5b5cd86e1488a65..fe3bd599c7712a95d2d55300d0073329a068a7e5 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_8M.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_8M.yaml
@@ -40,7 +40,8 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -100,7 +101,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 144
 nhead: 8
@@ -133,7 +135,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -204,7 +206,8 @@ Adam: !name:torch.optim.Adam
     betas: (0.9, 0.98)
     eps: 0.000000001
 
-# Scorer
+####################### Decoding & optimiser ###########################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -274,57 +277,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -338,6 +318,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/train_hf_whisper.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/train_hf_whisper.yaml
index 4805d7c6c543ab7369ef6c8b66751c2e90a1ec7d..4891ca61746a15f7af937e5e6bf53ef7ed1c372f 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/train_hf_whisper.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/train_hf_whisper.yaml
@@ -35,7 +35,8 @@ test_csv:
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+############################## Training Parameters #############################
+
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -61,7 +62,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 
 
@@ -74,52 +75,34 @@ valid_loader_kwargs:
 test_loader_kwargs:
     batch_size: !ref <test_batch_size>
 
-
-#
-# Functions and classes
-#
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0  # Min frequency band dropout probability
+    drop_freq_high: 1  # Max frequency band dropout probability
+    drop_freq_count_low: 1  # Min number of frequency bands to drop
+    drop_freq_count_high: 3  # Max number of frequency bands to drop
+    drop_freq_width: 0.05  # Width of frequency bands to drop
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1
+    drop_length_high: 5
+    drop_count_low: 1000
+    drop_count_high: 2000
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -128,6 +111,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
     freeze: !ref <freeze_whisper>
@@ -142,6 +127,8 @@ nll_loss: !name:speechbrain.nnet.losses.nll_loss
 modules:
     whisper: !ref <whisper>
 
+############################## Decoding & optimiser ############################
+
 whisper_opt_class: !name:torch.optim.AdamW
     lr: !ref <lr_whisper>
     weight_decay: 0.01
@@ -167,6 +154,8 @@ lr_annealing_whisper: !new:speechbrain.nnet.schedulers.NewBobScheduler
     annealing_factor: 0.9
     patient: 0
 
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
     checkpoints_dir: !ref <save_folder>
     recoverables:
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/transformer.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/transformer.yaml
index 36626f0d9e3c958ac7f25afbfe69a66e3a42f143..173453e9d4b992e109f5314261aeb129e14ddc79 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/transformer.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/transformer.yaml
@@ -42,7 +42,8 @@ test_csv:
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -116,7 +117,8 @@ test_dataloader_opts:
         padding_kwargs:
             value: !ref <pad_index>
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 512
 nhead: 4
@@ -142,7 +144,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -210,7 +212,8 @@ Adam: !name:torch.optim.Adam
     eps: 0.000000001
 
 
-# Scorer
+####################### Decoding & optimiser ###################################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -284,57 +287,34 @@ normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
     update_until_epoch: 4
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -348,6 +328,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/train.py b/recipes/LibriSpeech/ASR/transformer/train.py
index b69763e26fc4b21809c90d3c2c4f46039f75d8f4..292d7cc4249f6108f0cfba6f732073260ab80d9c 100644
--- a/recipes/LibriSpeech/ASR/transformer/train.py
+++ b/recipes/LibriSpeech/ASR/transformer/train.py
@@ -114,16 +114,16 @@ class ASR(sb.core.Brain):
         tokens, tokens_lens = batch.tokens
 
         if stage == sb.Stage.TRAIN:
+            # Labels must be extended if parallel augmentation or concatenated
+            # augmentation was performed on the input (increasing the time dimension)
             if hasattr(self.hparams, "fea_augment"):
-                tokens = self.hparams.fea_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.fea_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/LibriSpeech/G2P/hparams/hparams_g2p_rnn.yaml b/recipes/LibriSpeech/G2P/hparams/hparams_g2p_rnn.yaml
index a565905c84d8c912f93657ba4515374b087fad3e..f487ffbe1ce7aa22c06cfd7fd11e4c6568371a68 100644
--- a/recipes/LibriSpeech/G2P/hparams/hparams_g2p_rnn.yaml
+++ b/recipes/LibriSpeech/G2P/hparams/hparams_g2p_rnn.yaml
@@ -95,7 +95,7 @@ homograph_loss_weight: 2.0
 lr: 0.002
 save_for_pretrained: True
 
-# Model parameters
+####################### Model Parameters #######################################
 output_neurons: !apply:speechbrain.utils.hparams.choice
     value: !ref <phn_tokenize>
     choices:
diff --git a/recipes/LibriSpeech/G2P/hparams/hparams_g2p_transformer.yaml b/recipes/LibriSpeech/G2P/hparams/hparams_g2p_transformer.yaml
index c75cd97bfb2cc5bdbfd847fe6c1a185a3d4ca033..e1c0f44c79ad48982beb34c64f81cff2ae326deb 100644
--- a/recipes/LibriSpeech/G2P/hparams/hparams_g2p_transformer.yaml
+++ b/recipes/LibriSpeech/G2P/hparams/hparams_g2p_transformer.yaml
@@ -95,7 +95,7 @@ lr_dont_halve_until_epoch: 1
 lr_patience: 1
 save_for_pretrained: True
 
-# Model parameters
+####################### Model Parameters #######################################
 output_neurons: !apply:speechbrain.utils.hparams.choice
     value: !ref <phn_tokenize>
     choices:
diff --git a/recipes/LibriSpeech/G2P/hparams/hparams_lm_rnn.yaml b/recipes/LibriSpeech/G2P/hparams/hparams_lm_rnn.yaml
index dcb76825915b84f5b2f253bd4f40e37b481b617a..7e1b7bc4af0d06d9258a2c5b85be9d7074e3ce38 100644
--- a/recipes/LibriSpeech/G2P/hparams/hparams_lm_rnn.yaml
+++ b/recipes/LibriSpeech/G2P/hparams/hparams_lm_rnn.yaml
@@ -50,7 +50,7 @@ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
 tokenizer_file: <output_folder>/save/phoneme_tokenizer.model
 
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 80
 lr: 0.001
@@ -68,7 +68,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-# Model parameters
+####################### Model Parameters #######################################
 model_dim: !apply:speechbrain.utils.hparams.choice
     value: !ref <phn_tokenize>
     choices:
diff --git a/recipes/LibriSpeech/G2P/hparams/hparams_lm_transformer.yaml b/recipes/LibriSpeech/G2P/hparams/hparams_lm_transformer.yaml
index 2a9a434d68bc2c6db3e299cdc3f37d3d7733f416..5e319e3d861df3c7a832f6219aaf142d35832a80 100644
--- a/recipes/LibriSpeech/G2P/hparams/hparams_lm_transformer.yaml
+++ b/recipes/LibriSpeech/G2P/hparams/hparams_lm_transformer.yaml
@@ -39,7 +39,7 @@ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
 # Tokenizer model (you must use the same tokenizer for LM and ASR training)
 tokenizer_file: <output_folder>/save/phoneme_tokenizer.model
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 80
 lr: 0.001
@@ -57,7 +57,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-# Model parameters
+####################### Model Parameters #######################################
 
 emb_dim: 256 # dimension of the embeddings
 transformer_num_heads: 4
diff --git a/recipes/LibriSpeech/LM/hparams/RNNLM.yaml b/recipes/LibriSpeech/LM/hparams/RNNLM.yaml
index b061b4fccad7572ba5ddc125e2586b527c7733e8..0896de96032620c15d8b0e0cf19960aed1b953c8 100644
--- a/recipes/LibriSpeech/LM/hparams/RNNLM.yaml
+++ b/recipes/LibriSpeech/LM/hparams/RNNLM.yaml
@@ -29,7 +29,7 @@ test_transcripts_pattern: "test*/**/*.trans.txt"
 # Tokenizer model
 tokenizer_file: https://www.dropbox.com/s/o7gnouwdoqchotj/1000_unigram.model?dl=1
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 80
 lr: 0.001
@@ -47,7 +47,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-# Model parameters
+####################### Model Parameters #######################################
 emb_size: 128
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.0
diff --git a/recipes/LibriSpeech/LM/hparams/transformer.yaml b/recipes/LibriSpeech/LM/hparams/transformer.yaml
index c79ef576963eb3550fbfdbe56843fcc0302c6e5f..50123a4c3cdbfabbc53f006aa6b132fa3392e9e0 100644
--- a/recipes/LibriSpeech/LM/hparams/transformer.yaml
+++ b/recipes/LibriSpeech/LM/hparams/transformer.yaml
@@ -29,7 +29,7 @@ test_transcripts_pattern: "test*/**/*.trans.txt"
 # Tokenizer model
 tokenizer_file: speechbrain/asr-transformer-transformerlm-librispeech/tokenizer.ckpt
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 16
 lr: 10
diff --git a/recipes/LibriSpeech/Tokenizer/hparams/1K_unigram_subword_bpe.yaml b/recipes/LibriSpeech/Tokenizer/hparams/1K_unigram_subword_bpe.yaml
index b5a9fa60e66b29ad6104311a54917ab0b7ad0421..9dda21f82781ccbcd5bda6fa686a8c5f93eb2cfc 100644
--- a/recipes/LibriSpeech/Tokenizer/hparams/1K_unigram_subword_bpe.yaml
+++ b/recipes/LibriSpeech/Tokenizer/hparams/1K_unigram_subword_bpe.yaml
@@ -16,7 +16,7 @@ skip_prep: False
 train_csv: !ref <output_folder>/train.csv
 valid_csv: !ref <output_folder>/dev-clean.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 1000  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/LibriSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml b/recipes/LibriSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml
index c312ce5bbc3c8bc2843966c77d7fc5e97e0a5831..1f328c6f1682dcf2f25b34632b96ecdb96e5b45a 100644
--- a/recipes/LibriSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml
+++ b/recipes/LibriSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml
@@ -16,7 +16,7 @@ skip_prep: False
 train_csv: !ref <output_folder>/train.csv
 valid_csv: !ref <output_folder>/dev-clean.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 5000  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/LibriSpeech/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml b/recipes/LibriSpeech/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
index 4806f3e064abaf88de5dc1879f898f3e63b6af77..13ce0d2203fd62dfc84afa29a9ddd7c04cfd66d3 100644
--- a/recipes/LibriSpeech/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
+++ b/recipes/LibriSpeech/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
@@ -48,7 +48,7 @@ test_dataloader_options:
    batch_size: 8 # DynamicBatching not used at testing time
    num_workers: 4
 
-# Training parameters
+####################### Training Parameters ####################################
 lr: 0.0005
 warmup: 30000
 # This is equivalent to optimizer_step_limit - warmup
@@ -63,7 +63,7 @@ mask_prob: 0.65
 mask_length: 10
 num_negatives: 100
 
-# Model parameters
+####################### Model Parameters #######################################
 embedding_dim: 768
 extractor_dim: 512
 final_dim: 256
diff --git a/recipes/MEDIA/ASR/CTC/hparams/train_hf_wav2vec.yaml b/recipes/MEDIA/ASR/CTC/hparams/train_hf_wav2vec.yaml
index 924579a9c3f122db9fea7d150c6e5ef13c897c2d..70ef38de7090847faa4b5ead4d5bf721ccd399ee 100644
--- a/recipes/MEDIA/ASR/CTC/hparams/train_hf_wav2vec.yaml
+++ b/recipes/MEDIA/ASR/CTC/hparams/train_hf_wav2vec.yaml
@@ -55,7 +55,7 @@ test_dataloader_options:
 sample_rate: 16000
 feats_dim: 1024
 
-# Training parameters:
+####################### Training Parameters ####################################:
 number_of_epochs: 30
 lr: 1
 lr_wav2vec: 0.0001
@@ -67,7 +67,7 @@ patient: 0
 patient_wav2vec: 0
 sorting: ascending
 
-# Model parameters:
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_blocks: 3
 dnn_neurons: 512
diff --git a/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_full.yaml b/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_full.yaml
index 7be6f6b94757115d810c6cc07f7c9907ca0e15ed..4f9bad2e7ebead66974cd6f164c3a92f2b6504b9 100644
--- a/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_full.yaml
+++ b/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_full.yaml
@@ -57,7 +57,7 @@ test_dataloader_options:
 sample_rate: 16000
 feats_dim: 1024
 
-# Training parameters:
+####################### Training Parameters ####################################:
 number_of_epochs: 30
 lr: 1
 lr_wav2vec: 0.0001
@@ -69,7 +69,7 @@ patient: 0
 patient_wav2vec: 0
 sorting: ascending
 
-# Model parameters:
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_blocks: 3
 dnn_neurons: 512
diff --git a/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_relax.yaml b/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_relax.yaml
index d631a6da8add16c74ffb98b8b5a20b95075173c9..8631e6e885f0e31dfb9448b51a86acf7b40d918a 100644
--- a/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_relax.yaml
+++ b/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_relax.yaml
@@ -57,7 +57,7 @@ test_dataloader_options:
 sample_rate: 16000
 feats_dim: 1024
 
-# Training parameters:
+####################### Training Parameters ####################################:
 number_of_epochs: 30
 lr: 1
 lr_wav2vec: 0.0001
@@ -69,7 +69,7 @@ patient: 0
 patient_wav2vec: 0
 sorting: ascending
 
-# Model parameters:
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_blocks: 3
 dnn_neurons: 512
diff --git a/recipes/MultiWOZ/response_generation/gpt/hparams/train_gpt.yaml b/recipes/MultiWOZ/response_generation/gpt/hparams/train_gpt.yaml
index 05b16bea3f4af7afee0afd4aafe1d375f61c74f0..5bb3b8ed8fa30c797cd1ad1a3f62bdaac535a0a6 100644
--- a/recipes/MultiWOZ/response_generation/gpt/hparams/train_gpt.yaml
+++ b/recipes/MultiWOZ/response_generation/gpt/hparams/train_gpt.yaml
@@ -58,7 +58,7 @@ max_history: 5
 ignore_index: -100
 label_smoothing: 0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 4
 batch_size: 8
 test_batch_size: 4
diff --git a/recipes/MultiWOZ/response_generation/llama2/hparams/train_llama2.yaml b/recipes/MultiWOZ/response_generation/llama2/hparams/train_llama2.yaml
index f7fd2b087b2b66ade39816c3245e7b67eb62fcc2..507115e832948e06abc8a8501d66606df252525b 100644
--- a/recipes/MultiWOZ/response_generation/llama2/hparams/train_llama2.yaml
+++ b/recipes/MultiWOZ/response_generation/llama2/hparams/train_llama2.yaml
@@ -40,7 +40,7 @@ max_history: 2
 ignore_index: -100
 label_smoothing: 0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 4
 batch_size: 1
 test_batch_size: 1
diff --git a/recipes/REAL-M/sisnr-estimation/hparams/pool_sisnrestimator.yaml b/recipes/REAL-M/sisnr-estimation/hparams/pool_sisnrestimator.yaml
index 3f1da09190314e5c8a956eba60f2c94d1be75833..c23c11c53524db10e8e8110202af01e00cfa611e 100644
--- a/recipes/REAL-M/sisnr-estimation/hparams/pool_sisnrestimator.yaml
+++ b/recipes/REAL-M/sisnr-estimation/hparams/pool_sisnrestimator.yaml
@@ -67,7 +67,7 @@ num_spks: 2
 noprogressbar: False
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.0001
diff --git a/recipes/RescueSpeech/ASR/noise-robust/hparams/robust_asr_16k.yaml b/recipes/RescueSpeech/ASR/noise-robust/hparams/robust_asr_16k.yaml
index 302dabe57530db6df8f33ec147450e4f75b10fef..10e8e58e4a05a72f94d4c2f9d63aada185b3e236 100644
--- a/recipes/RescueSpeech/ASR/noise-robust/hparams/robust_asr_16k.yaml
+++ b/recipes/RescueSpeech/ASR/noise-robust/hparams/robust_asr_16k.yaml
@@ -41,7 +41,7 @@ skip_prep: False
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-## Model parameters- Enhance model
+## Model Parameters- Enhance model
 dereverberate: False
 save_audio: True
 sample_rate: 16000
@@ -54,7 +54,7 @@ use_rand_shift: False
 min_shift: -8000
 max_shift: 8000
 
-## Training parameters- ASR
+######################## Training Parameters ####################################- ASR
 number_of_epochs: 10
 lr_whisper: 0.00003
 sorting: ascending
diff --git a/recipes/SLURP/NLU/hparams/train.yaml b/recipes/SLURP/NLU/hparams/train.yaml
index e2201d96b05c4e94846555b39d65c4b4a1f6c517..7d88d62a9baa3b307c7a9c1c497c67f606146521 100644
--- a/recipes/SLURP/NLU/hparams/train.yaml
+++ b/recipes/SLURP/NLU/hparams/train.yaml
@@ -28,14 +28,14 @@ asr_tokenizer_file: https://www.dropbox.com/s/o7gnouwdoqchotj/1000_unigram.model
 slu_tokenizer_file: https://www.dropbox.com/s/tmwq12r5vgcsif9/58_unigram.model?dl=1
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 16
 lr: 0.0003
 # token_type: unigram # ["unigram", "bpe", "char"]
 sorting: random
 
-# Model parameters
+####################### Model Parameters #######################################
 # sample_rate: 1600
 emb_size: 128
 dec_neurons: 512
diff --git a/recipes/SLURP/Tokenizer/hparams/tokenizer_bpe58.yaml b/recipes/SLURP/Tokenizer/hparams/tokenizer_bpe58.yaml
index 51f805b078449a7594c1a205be73af6c6778e147..bf935024a739e665dad65a5260a57f393f36aa48 100644
--- a/recipes/SLURP/Tokenizer/hparams/tokenizer_bpe58.yaml
+++ b/recipes/SLURP/Tokenizer/hparams/tokenizer_bpe58.yaml
@@ -14,7 +14,7 @@ train_csv: !ref <output_folder>/train-type=direct.csv
 valid_csv: !ref <output_folder>/devel-type=direct.csv
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 58  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/SLURP/direct/hparams/train.yaml b/recipes/SLURP/direct/hparams/train.yaml
index 5a42c738c2c823c8d9eac5d6cd1c1c59abb6950f..038d2e59ea32bf9322087d0df6c4fe4cb3380878 100644
--- a/recipes/SLURP/direct/hparams/train.yaml
+++ b/recipes/SLURP/direct/hparams/train.yaml
@@ -34,7 +34,7 @@ rir_annotation: !ref <save_folder>/rir.csv
 tokenizer_file: https://www.dropbox.com/s/tmwq12r5vgcsif9/58_unigram.model?dl=1
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 16
 lr: 0.0003
@@ -42,7 +42,7 @@ lr: 0.0003
 sorting: random
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Model parameters
+####################### Model Parameters #######################################
 sample_rate: 16000
 emb_size: 128
 dec_neurons: 512
@@ -100,41 +100,31 @@ add_noise: !new:speechbrain.augment.time_domain.AddNoise
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
+############################## Augmentations ###################################
+
 # Speed perturbation
-speed_changes: [90, 95, 105, 110]  # List of speed changes for time-stretching
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [90, 95, 105, 110]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 3  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 3
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
     shuffle_augmentations: True
     min_augmentations: 1
     max_augmentations: 4
@@ -146,7 +136,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
-# Models
+############################## Models ##########################################
+
 asr_model_source: speechbrain/asr-crdnn-rnnlm-librispeech
 
 slu_enc: !new:speechbrain.nnet.containers.Sequential
diff --git a/recipes/SLURP/direct/hparams/train_with_wav2vec2.yaml b/recipes/SLURP/direct/hparams/train_with_wav2vec2.yaml
index b383da5cbededb020a662caf4784ad7663761eaa..84222db5f430c3c4df1e5446279b48cd5ceb3eb5 100644
--- a/recipes/SLURP/direct/hparams/train_with_wav2vec2.yaml
+++ b/recipes/SLURP/direct/hparams/train_with_wav2vec2.yaml
@@ -32,7 +32,7 @@ skip_prep: False
 # URL for the wav2vec2 model, you can change to benchmark diffrenet models
 wav2vec2_hub: "facebook/hubert-base-ls960"
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 35
 batch_size: 6
 lr: 0.0003
@@ -47,7 +47,7 @@ freeze_wav2vec2: False
 #set to true to freeze the CONV part of the wav2vec2 model
 freeze_wav2vec2_conv: True
 
-# Model parameters
+####################### Model Parameters #######################################
 sample_rate: 16000
 emb_size: 128
 dec_neurons: 512
@@ -96,45 +96,31 @@ seq_lin: !new:speechbrain.nnet.linear.Linear
     input_size: !ref <dec_neurons>
     n_neurons: !ref <output_neurons>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
diff --git a/recipes/Switchboard/ASR/CTC/hparams/train_with_wav2vec.yaml b/recipes/Switchboard/ASR/CTC/hparams/train_with_wav2vec.yaml
index 2933e0fd4878dc17ec3dde112c13f73d067d65b6..7741680bdbb25e81dd9c1f0216e8d275d9af44aa 100644
--- a/recipes/Switchboard/ASR/CTC/hparams/train_with_wav2vec.yaml
+++ b/recipes/Switchboard/ASR/CTC/hparams/train_with_wav2vec.yaml
@@ -49,7 +49,7 @@ test_csv:
   - !ref <output_folder>/test_callhome.csv
   - !ref <output_folder>/test.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -74,7 +74,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -109,45 +109,31 @@ kenlm_model_path: null
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
   limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
   orig_freq: !ref <sample_rate>
-  speeds: !ref <speed_changes>
+  speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-  drop_freq_low: !ref <drop_freq_low>
-  drop_freq_high: !ref <drop_freq_high>
-  drop_freq_count_low: !ref <drop_freq_count_low>
-  drop_freq_count_high: !ref <drop_freq_count_high>
-  drop_freq_width: !ref <drop_freq_width>
+  drop_freq_low: 0
+  drop_freq_high: 1
+  drop_freq_count_low: 1
+  drop_freq_count_high: 3
+  drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-  drop_length_low: !ref <drop_chunk_length_low>
-  drop_length_high: !ref <drop_chunk_length_high>
-  drop_count_low: !ref <drop_chunk_count_low>
-  drop_count_high: !ref <drop_chunk_count_high>
+  drop_length_low: 1000
+  drop_length_high: 2000
+  drop_count_low: 1
+  drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-  parallel_augment: False
   concat_original: True
-  repeat_augment: 1
-  shuffle_augmentations: False
   min_augmentations: 3
   max_augmentations: 3
   augment_prob: 1.0
@@ -156,6 +142,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
     !ref <drop_freq>,
     !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
   input_shape: [null, null, !ref <wav2vec_output_dim>]
   linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/Switchboard/ASR/seq2seq/hparams/train_BPE_2000.yaml b/recipes/Switchboard/ASR/seq2seq/hparams/train_BPE_2000.yaml
index d20001f1cf5b550c1eb6638c86198b2fb06f9394..743467bcf2ffd6b0ceacb7fc873259b8ddb8f2c1 100644
--- a/recipes/Switchboard/ASR/seq2seq/hparams/train_BPE_2000.yaml
+++ b/recipes/Switchboard/ASR/seq2seq/hparams/train_BPE_2000.yaml
@@ -57,7 +57,7 @@ test_csv:
    - !ref <save_folder>/test_callhome.csv
    - !ref <save_folder>/test.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 number_of_ctc_epochs: 5
 batch_size: 10
@@ -103,7 +103,7 @@ test_dataloader_opts:
    num_workers: !ref <num_workers>
    batch_size: !ref <batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -144,57 +144,40 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
    ext: wav
    csv_file: !ref <noise_annotation>
 
-# Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
+############################## Augmentations ###################################
 
+# Add noise to input signal
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
    csv_file: !ref <noise_annotation>
-   snr_low: !ref <snr_low>
-   snr_high: !ref <snr_high>
+   snr_low: 0
+   snr_high: 15
    noise_sample_rate: !ref <sample_rate>
    clean_sample_rate: !ref <sample_rate>
    num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
    orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
+   speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
    concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 4
    max_augmentations: 4
    augment_prob: 1.0
@@ -215,6 +198,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
    n_fft: !ref <n_fft>
    n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
    input_shape: [null, null, !ref <n_mels>]
    activation: !ref <activation>
diff --git a/recipes/Switchboard/ASR/seq2seq/train.py b/recipes/Switchboard/ASR/seq2seq/train.py
index 57ce5c9667156577040358760c65794d58f968f2..d0cd3ce91e3248345c76a7f662fe010ad1d92e02 100644
--- a/recipes/Switchboard/ASR/seq2seq/train.py
+++ b/recipes/Switchboard/ASR/seq2seq/train.py
@@ -127,12 +127,16 @@ class ASR(sb.Brain):
         tokens_eos, tokens_eos_lens = batch.tokens_eos
         tokens, tokens_lens = batch.tokens
 
+        # Labels must be extended if parallel augmentation or concatenated
+        # augmentation was performed on the input (increasing the time dimension)
         if stage == sb.Stage.TRAIN and hasattr(self.hparams, "wav_augment"):
-            tokens = self.hparams.wav_augment.replicate_labels(tokens)
-            tokens_lens = self.hparams.wav_augment.replicate_labels(tokens_lens)
-            tokens_eos = self.hparams.wav_augment.replicate_labels(tokens_eos)
-            tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                tokens_eos_lens
+            (
+                tokens,
+                tokens_lens,
+                tokens_eos,
+                tokens_eos_lens,
+            ) = self.hparams.wav_augment.replicate_multiple_labels(
+                tokens, tokens_lens, tokens_eos, tokens_eos_lens
             )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/Switchboard/ASR/transformer/hparams/transformer.yaml b/recipes/Switchboard/ASR/transformer/hparams/transformer.yaml
index bd84a8a19ec109f5c4f331a53fb2aecbf3dee43c..674c037199f6d654512df99d8c32f08aa82cf977 100644
--- a/recipes/Switchboard/ASR/transformer/hparams/transformer.yaml
+++ b/recipes/Switchboard/ASR/transformer/hparams/transformer.yaml
@@ -51,7 +51,7 @@ test_csv:
 
 ckpt_interval_minutes: 30  # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 # To make Transformers converge, the global batch size should be large enough.
 # The global batch size is computed as:
 # batch_size * n_gpus * grad_accumulation_factor.
@@ -96,7 +96,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
   batch_size: 1
 
-####################### Model parameters  ###########################
+####################### Model Parameters  ###########################
 # Transformer
 transformer_input_size: 1280
 d_model: 256
@@ -271,50 +271,32 @@ normalize: !new:speechbrain.processing.features.InputNormalization
   norm_type: global
   update_until_epoch: 4
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+  orig_freq: !ref <sample_rate>
+  speeds: [95, 100, 105]
+
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-  drop_length_low: !ref <time_drop_length_low>
-  drop_length_high: !ref <time_drop_length_high>
-  drop_count_low: !ref <time_drop_count_low>
-  drop_count_high: !ref <time_drop_count_high>
-  replace: !ref <time_drop_replace>
-  dim: 1
+  drop_length_low: 15
+  drop_length_high: 25
+  drop_count_low: 5
+  drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-  drop_length_low: !ref <freq_drop_length_low>
-  drop_length_high: !ref <freq_drop_length_high>
-  drop_count_low: !ref <freq_drop_count_low>
-  drop_count_high: !ref <freq_drop_count_high>
-  replace: !ref <freq_drop_replace>
+  drop_length_low: 25
+  drop_length_high: 35
+  drop_count_low: 2
+  drop_count_high: 2
   dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-  warp_window: !ref <time_warp_window>
-  warp_mode: !ref <time_warp_mode>
-  dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-  parallel_augment: False
-  concat_original: False
-  repeat_augment: 1
-  shuffle_augmentations: False
   min_augmentations: 3
   max_augmentations: 3
   augment_prob: 1.0
@@ -323,14 +305,7 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
     !ref <freq_drop>,
     !ref <time_warp>]
 
-
-# Speed perturbation
 do_speed_perturb: True
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-  orig_freq: !ref <sample_rate>
-  speeds: !ref <speed_changes>
 
 compute_features: !new:speechbrain.lobes.features.Fbank
   sample_rate: !ref <sample_rate>
diff --git a/recipes/Switchboard/ASR/transformer/hparams/transformer_finetuned_LM.yaml b/recipes/Switchboard/ASR/transformer/hparams/transformer_finetuned_LM.yaml
index 45a765cd1238f0aa2c882dfb46731b3b3c4e3adb..8dd221ca407d4e68e34479cbce9e87f49e6ca542 100644
--- a/recipes/Switchboard/ASR/transformer/hparams/transformer_finetuned_LM.yaml
+++ b/recipes/Switchboard/ASR/transformer/hparams/transformer_finetuned_LM.yaml
@@ -51,7 +51,7 @@ test_csv:
 
 ckpt_interval_minutes: 30  # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 # To make Transformers converge, the global batch size should be large enough.
 # The global batch size is computed as:
 # batch_size * n_gpus * grad_accumulation_factor.
@@ -96,7 +96,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
   batch_size: 1
 
-####################### Model parameters  ###########################
+####################### Model Parameters  ###########################
 # Transformer
 d_model: 512
 nhead: 4
@@ -126,7 +126,7 @@ lm_weight: 0.60
 ctc_weight_decode: 0.40
 temperature: 1.15
 temperature_lm: 1.15
-############################## models  ################################
+############################## Models  ################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
   input_shape: (8, 10, 80)
@@ -258,57 +258,32 @@ normalize: !new:speechbrain.processing.features.InputNormalization
   norm_type: global
   update_until_epoch: 4
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
   orig_freq: !ref <sample_rate>
-  speeds: !ref <speed_changes>
+  speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-  drop_length_low: !ref <time_drop_length_low>
-  drop_length_high: !ref <time_drop_length_high>
-  drop_count_low: !ref <time_drop_count_low>
-  drop_count_high: !ref <time_drop_count_high>
-  replace: !ref <time_drop_replace>
-  dim: 1
+  drop_length_low: 15
+  drop_length_high: 25
+  drop_count_low: 5
+  drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-  drop_length_low: !ref <freq_drop_length_low>
-  drop_length_high: !ref <freq_drop_length_high>
-  drop_count_low: !ref <freq_drop_count_low>
-  drop_count_high: !ref <freq_drop_count_high>
-  replace: !ref <freq_drop_replace>
+  drop_length_low: 25
+  drop_length_high: 35
+  drop_count_low: 2
+  drop_count_high: 2
   dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-  warp_window: !ref <time_warp_window>
-  warp_mode: !ref <time_warp_mode>
-  dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-  parallel_augment: False
-  concat_original: False
-  repeat_augment: 1
-  shuffle_augmentations: False
   min_augmentations: 3
   max_augmentations: 3
   augment_prob: 1.0
diff --git a/recipes/Switchboard/ASR/transformer/train.py b/recipes/Switchboard/ASR/transformer/train.py
index 5fb6ebc47dbe64b3ddadd0ee7506282aa10138e6..dcc5279523a545aaa30d958d18b5962e85e03460 100644
--- a/recipes/Switchboard/ASR/transformer/train.py
+++ b/recipes/Switchboard/ASR/transformer/train.py
@@ -133,16 +133,16 @@ class ASR(sb.core.Brain):
         tokens, tokens_lens = batch.tokens
 
         if stage == sb.Stage.TRAIN:
+            # Labels must be extended if parallel augmentation or concatenated
+            # augmentation was performed on the input (increasing the time dimension)
             if hasattr(self.hparams, "fea_augment"):
-                tokens = self.hparams.fea_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.fea_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/Switchboard/LM/hparams/transformer.yaml b/recipes/Switchboard/LM/hparams/transformer.yaml
index 2f27463afa4214e0c31893e57f595e5bab1fbdf8..b501faf55cf7d8830f06abafa2145b02a9d6a465 100644
--- a/recipes/Switchboard/LM/hparams/transformer.yaml
+++ b/recipes/Switchboard/LM/hparams/transformer.yaml
@@ -36,7 +36,7 @@ test_csv: !ref <save_folder>/test.csv
 # (e.g. /path/to/2000_unigram.model)
 tokenizer_file: !PLACEHOLDER
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 100
 batch_size: 164
 lr: 1
diff --git a/recipes/Switchboard/LM/hparams/transformer_finetune.yaml b/recipes/Switchboard/LM/hparams/transformer_finetune.yaml
index f5657c76c2625fe76fac251d9b2e94086b4db23e..5b0860e41a432252b513ccb23afe81508d42ad23 100644
--- a/recipes/Switchboard/LM/hparams/transformer_finetune.yaml
+++ b/recipes/Switchboard/LM/hparams/transformer_finetune.yaml
@@ -39,7 +39,7 @@ test_csv: !ref <save_folder>/test.csv
 # instead. E.g if you want to use your own LM / tokenizer.
 pretrained_lm_tokenizer_path: speechbrain/asr-transformer-transformerlm-librispeech
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 5
 batch_size: 128
 lr: 2
diff --git a/recipes/Switchboard/Tokenizer/hparams/2K_unigram_subword_bpe.yaml b/recipes/Switchboard/Tokenizer/hparams/2K_unigram_subword_bpe.yaml
index e6c546bdf9deec07bffa0c0546401f1990c8b33a..d07d83e707b0ae05a49e3c2813a79a1a0baec88e 100644
--- a/recipes/Switchboard/Tokenizer/hparams/2K_unigram_subword_bpe.yaml
+++ b/recipes/Switchboard/Tokenizer/hparams/2K_unigram_subword_bpe.yaml
@@ -20,7 +20,7 @@ train_csv: !ref <output_folder>/train_lm.csv
 valid_csv: !ref <output_folder>/dev.csv
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 2000  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/TIMIT/ASR/CTC/hparams/train.yaml b/recipes/TIMIT/ASR/CTC/hparams/train.yaml
index dce350b7e2f2edc7621b7f223c0e83b70a41a301..145fa1a3e362af75bd80e3d435bf0c4ce0ac0d7d 100644
--- a/recipes/TIMIT/ASR/CTC/hparams/train.yaml
+++ b/recipes/TIMIT/ASR/CTC/hparams/train.yaml
@@ -25,7 +25,7 @@ data_folder_noise: !ref <data_folder>/noise # The noisy sequencies for data augm
 NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 8
 lr: 1.0
@@ -36,7 +36,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 40
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -69,6 +69,8 @@ test_dataloader_opts:
 normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -76,58 +78,38 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
     ext: wav
     csv_file: !ref <noise_annotation>
 
-
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -137,6 +119,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
diff --git a/recipes/TIMIT/ASR/seq2seq/hparams/train.yaml b/recipes/TIMIT/ASR/seq2seq/hparams/train.yaml
index cc3276506d547eb6b02fd2f830d902730164a38f..d61179fa9787d06cdfe128ff4942485725648033 100644
--- a/recipes/TIMIT/ASR/seq2seq/hparams/train.yaml
+++ b/recipes/TIMIT/ASR/seq2seq/hparams/train.yaml
@@ -22,7 +22,7 @@ test_annotation: !ref <save_folder>/test.json
 skip_prep: False # Skip data preparation
 uppercase: False # Must be True when the TIMIT dataset is in the upper-case version
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 8 # Used if dynamic_batching is False
 lr: 0.0003
@@ -34,7 +34,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 40
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -88,45 +88,30 @@ dynamic_batch_sampler:
     shuffle: !ref <shuffle>
     batch_ordering: !ref <batch_ordering>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -135,6 +120,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
diff --git a/recipes/TIMIT/ASR/seq2seq/hparams/train_with_wav2vec2.yaml b/recipes/TIMIT/ASR/seq2seq/hparams/train_with_wav2vec2.yaml
index 9756e9356b2676c1d3f32d4b0f9cf6362e2c013b..705f79e9a8cefc25b3138070b6fcd10682381c03 100644
--- a/recipes/TIMIT/ASR/seq2seq/hparams/train_with_wav2vec2.yaml
+++ b/recipes/TIMIT/ASR/seq2seq/hparams/train_with_wav2vec2.yaml
@@ -23,7 +23,7 @@ test_annotation: !ref <save_folder>/test.json
 skip_prep: False # Skip data preparation
 uppercase: False # Must be True when the TIMIT dataset is in the upper-case version
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 8
 lr: 0.0003
@@ -33,7 +33,7 @@ sorting: ascending
 precision: fp32 # bf16, fp16 or fp32
 sample_rate: 16000
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -66,45 +66,30 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: !ref <batch_size>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -113,6 +98,7 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
 
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
diff --git a/recipes/TIMIT/ASR/transducer/hparams/train.yaml b/recipes/TIMIT/ASR/transducer/hparams/train.yaml
index 5b8e53809d679c0829bba2c8662ef2491db5acae..204297dc68591031a53ac15128d3194a551071c4 100644
--- a/recipes/TIMIT/ASR/transducer/hparams/train.yaml
+++ b/recipes/TIMIT/ASR/transducer/hparams/train.yaml
@@ -28,7 +28,7 @@ data_folder_noise: !ref <data_folder>/noise # The noisy sequencies for data augm
 NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 8
 lr: 1.0
@@ -40,7 +40,7 @@ n_fft: 400
 n_mels: 40
 
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -92,6 +92,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
 normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -99,58 +101,38 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
     ext: wav
     csv_file: !ref <noise_annotation>
 
-
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -160,6 +142,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
diff --git a/recipes/TIMIT/ASR/transducer/hparams/train_wav2vec.yaml b/recipes/TIMIT/ASR/transducer/hparams/train_wav2vec.yaml
index 90f899d26bfb469234b4856c8e5af7a9f81b38f9..9ead09f56ecc8d959455233397cc28ab11d875b6 100644
--- a/recipes/TIMIT/ASR/transducer/hparams/train_wav2vec.yaml
+++ b/recipes/TIMIT/ASR/transducer/hparams/train_wav2vec.yaml
@@ -28,7 +28,7 @@ test_annotation: !ref <save_folder>/test.json
 skip_prep: False # Skip data preparation
 uppercase: False # Must be True when the TIMIT dataset is in the upper-case version
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 8
 lr: 0.0003
@@ -41,7 +41,7 @@ sample_rate: 16000
 # n_fft: 400
 # n_mels: 40
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 # dropout: 0.15
 dnn_blocks: 1
@@ -74,45 +74,30 @@ test_dataloader_opts:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -121,6 +106,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 wav2vec2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
     source: !ref <wav2vec2_hub>
     output_norm: True
diff --git a/recipes/TIMIT/Alignment/hparams/train.yaml b/recipes/TIMIT/Alignment/hparams/train.yaml
index 7a2a581d9719c29a3bce62925feebf21630c3b0d..aaf06b7ffc0f61291d5de751c9fe3094876fdef7 100644
--- a/recipes/TIMIT/Alignment/hparams/train.yaml
+++ b/recipes/TIMIT/Alignment/hparams/train.yaml
@@ -20,7 +20,7 @@ valid_annotation: !ref <data_folder>/dev.json
 test_annotation: !ref <data_folder>/test.json
 skip_prep: False # Skip data prep
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 10
 batch_size: 256
 lr: 0.0003
@@ -40,7 +40,7 @@ phn_set: 60 # {60, 48, 39}
 output_neurons: 183
 blank_index: 182
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_blocks: 1
 dnn_neurons: 2000
diff --git a/recipes/Tedlium2/ASR/transformer/hparams/branchformer_large.yaml b/recipes/Tedlium2/ASR/transformer/hparams/branchformer_large.yaml
index 6ffad0b0041d9de2ff5768dd303c29f7b2d81855..2f9f924c71069e6dadffcd638d25692efd1597c5 100644
--- a/recipes/Tedlium2/ASR/transformer/hparams/branchformer_large.yaml
+++ b/recipes/Tedlium2/ASR/transformer/hparams/branchformer_large.yaml
@@ -34,7 +34,7 @@ valid_csv: !ref <output_folder>/dev/dev.csv
 test_csv:
     - !ref <output_folder>/test/test.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -97,7 +97,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 512
 nhead: 8
diff --git a/recipes/Tedlium2/Tokenizer/hparams/tedlium2_500_bpe.yaml b/recipes/Tedlium2/Tokenizer/hparams/tedlium2_500_bpe.yaml
index a97290f13f49d2cb09271a388529d4ee652f3015..03c91b12692c4415e1c3cdb71b0802467b2505f6 100644
--- a/recipes/Tedlium2/Tokenizer/hparams/tedlium2_500_bpe.yaml
+++ b/recipes/Tedlium2/Tokenizer/hparams/tedlium2_500_bpe.yaml
@@ -14,7 +14,7 @@ skip_prep: False
 train_csv: !ref <output_folder>/train/train.csv
 valid_csv: !ref <output_folder>/dev/dev.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: bpe  # ["unigram", "bpe", "char"]
 token_output: 500  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/UrbanSound8k/SoundClassification/hparams/train_ecapa_tdnn.yaml b/recipes/UrbanSound8k/SoundClassification/hparams/train_ecapa_tdnn.yaml
index 1d9613a165bb25336112a8c15f38343d9edb52ae..3ecb1119b460be4c313dd61fc7d6a2a0321bc61f 100644
--- a/recipes/UrbanSound8k/SoundClassification/hparams/train_ecapa_tdnn.yaml
+++ b/recipes/UrbanSound8k/SoundClassification/hparams/train_ecapa_tdnn.yaml
@@ -48,7 +48,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 25
 batch_size: 32
 lr: 0.001
diff --git a/recipes/Voicebank/ASR/CTC/hparams/train.yaml b/recipes/Voicebank/ASR/CTC/hparams/train.yaml
index 65b833a0bcc33915a60fbe278de1fe2c814c0384..a49bae5fa4caebfd13f87b4fc4a8f24db47af522 100644
--- a/recipes/Voicebank/ASR/CTC/hparams/train.yaml
+++ b/recipes/Voicebank/ASR/CTC/hparams/train.yaml
@@ -20,7 +20,7 @@ valid_annotation: !ref <output_folder>/valid.json
 test_annotation: !ref <output_folder>/test.json
 skip_prep: False # Skip data preparation
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 8
 sorting: ascending
@@ -37,7 +37,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 40
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -61,45 +61,31 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -108,6 +94,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 model: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/Voicebank/MTL/ASR_enhance/hparams/enhance_mimic.yaml b/recipes/Voicebank/MTL/ASR_enhance/hparams/enhance_mimic.yaml
index 2a96e25db7460d473460889dc326f6589fd2eceb..c3391498a7ad7792331fce172dd820cd15ce9cb1 100644
--- a/recipes/Voicebank/MTL/ASR_enhance/hparams/enhance_mimic.yaml
+++ b/recipes/Voicebank/MTL/ASR_enhance/hparams/enhance_mimic.yaml
@@ -18,7 +18,7 @@ valid_annotation: !ref <data_folder>/valid.json
 test_annotation: !ref <data_folder>/test.json
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 8
 lr: 0.0001
diff --git a/recipes/Voicebank/MTL/ASR_enhance/hparams/pretrain_perceptual.yaml b/recipes/Voicebank/MTL/ASR_enhance/hparams/pretrain_perceptual.yaml
index a86fbc4ccf1cec27c6c4979fe386b146b7418e65..d384d026ae5bb20de42f48f9beb572e93bd65a00 100644
--- a/recipes/Voicebank/MTL/ASR_enhance/hparams/pretrain_perceptual.yaml
+++ b/recipes/Voicebank/MTL/ASR_enhance/hparams/pretrain_perceptual.yaml
@@ -18,7 +18,7 @@ valid_annotation: !ref <data_folder>/valid.json
 test_annotation: !ref <data_folder>/test.json
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 ctc_epochs: 4
 batch_size: 8
diff --git a/recipes/Voicebank/MTL/ASR_enhance/hparams/robust_asr.yaml b/recipes/Voicebank/MTL/ASR_enhance/hparams/robust_asr.yaml
index 1bf087d8558d2c788c5829f9f0d5b0b0b68f5cbc..1835342c30d97c7335927fd2f6c17f7e8fe2a18d 100644
--- a/recipes/Voicebank/MTL/ASR_enhance/hparams/robust_asr.yaml
+++ b/recipes/Voicebank/MTL/ASR_enhance/hparams/robust_asr.yaml
@@ -24,7 +24,7 @@ test_annotation: !ref <data_folder>/test.json
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 ctc_epochs: 0
 batch_size: 8
@@ -141,6 +141,8 @@ compute_stft: !new:speechbrain.processing.features.STFT
 spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude
     power: 0.5
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -148,58 +150,38 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
     ext: wav
     csv_file: !ref <noise_annotation>
 
-
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -209,7 +191,6 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
-
 fbank: !new:speechbrain.lobes.features.Fbank
     n_mels: !ref <n_mels>
     sample_rate: !ref <sample_rate>
diff --git a/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn.yaml b/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn.yaml
index d7d2be081e1f4f6997b2bcdb6b0ebbbf6953ec40..c93ba21ecb1109e9cf67ce5c02a4255fff16192a 100644
--- a/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn.yaml
+++ b/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn.yaml
@@ -85,6 +85,7 @@ classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
 
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -95,18 +96,14 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
 
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
-
 # Download and prepare the dataset of room impulse responses for augmentation
 prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <RIR_DATASET_URL>
@@ -122,37 +119,24 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
     parallel_augment: True
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
diff --git a/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn_mel_spec.yaml b/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn_mel_spec.yaml
index 6cb17c6bc7b92a5eaf1592b4d66584f01438ae15..becd8e4d41511e073e5f35f1ba258cff8e8466a4 100644
--- a/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn_mel_spec.yaml
+++ b/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn_mel_spec.yaml
@@ -104,6 +104,7 @@ classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
 
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -114,18 +115,14 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
 
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
-
 # Download and prepare the dataset of room impulse responses for augmentation
 prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <RIR_DATASET_URL>
@@ -141,37 +138,24 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
     parallel_augment: True
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
diff --git a/recipes/VoxCeleb/SpeakerRec/hparams/train_resnet.yaml b/recipes/VoxCeleb/SpeakerRec/hparams/train_resnet.yaml
index 617457f1a4bf8bd7432f63c71f94c1b3a1d5b2cb..a20786574601e7d61b37848db29cf66493d2255e 100644
--- a/recipes/VoxCeleb/SpeakerRec/hparams/train_resnet.yaml
+++ b/recipes/VoxCeleb/SpeakerRec/hparams/train_resnet.yaml
@@ -55,7 +55,7 @@ right_frames: 0
 deltas: False
 
 # Number of speakers
-#　1211 for vox1, 5994 for vox2, 7205 for vox1+vox2
+# 1211 for vox1, 5994 for vox2, 7205 for vox1+vox2
 out_n_neurons: 7205
 
 num_workers: 4
@@ -85,6 +85,7 @@ classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
 
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -95,18 +96,14 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
 
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
-
 # Download and prepare the dataset of room impulse responses for augmentation
 prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <RIR_DATASET_URL>
@@ -122,37 +119,24 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
     parallel_augment: True
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
diff --git a/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml b/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
index 8a70462c5ac9ccc35900016a3f810631e0b38819..ab628c681b9729ea2cf431593f524c151c599570 100644
--- a/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
+++ b/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
@@ -88,6 +88,7 @@ classifier: !new:speechbrain.lobes.models.Xvector.Classifier
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
 
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -98,18 +99,14 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
 
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
-
 # Download and prepare the dataset of room impulse responses for augmentation
 prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <RIR_DATASET_URL>
@@ -125,37 +122,24 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
     parallel_augment: True
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
diff --git a/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml b/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
index a2d87248629eba9f9d985ca49962eef04fd32cf4..db29a301b80588ce8b2f0d82b0f4d4da16afa24b 100644
--- a/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
+++ b/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
@@ -58,6 +58,8 @@ val_dataloader_options:
     num_workers: 1
     batch_size: !ref <batch_size_val>
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -72,7 +74,6 @@ prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     ext: wav
     csv_file: !ref <rir_annotation>
 
-
 # Add reverberation to input signal
 add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     csv_file: !ref <rir_annotation>
@@ -81,27 +82,21 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [90, 100, 110]  # List of speed changes for time-stretching
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
     shuffle_augmentations: True
     min_augmentations: 1
     max_augmentations: 3
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-wham-DM.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-wham-DM.yaml
index a38bb8c885792abce888d34e5f0ab4138c3c1625..843c9fb0917fcb5a1aed07668a0426be2d59d558 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-wham-DM.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-wham-DM.yaml
@@ -45,7 +45,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 8000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 8
 lr: 0.0001
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-whamr-DM.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-whamr-DM.yaml
index af3fcb0d79f942c528b591cbf46d4f2cec0c8e88..b55f05a1169c344207f7fee4a885a425be200b5a 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-whamr-DM.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-whamr-DM.yaml
@@ -45,7 +45,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 8000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 8
 lr: 0.0001
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/convtasnet-whamr-DM.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/convtasnet-whamr-DM.yaml
index 7c2990442e0d631d0ce2b7a2fbb6e0f2edf33103..545428d5ac31f32ee81dd2af7a616040cfed231e 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/convtasnet-whamr-DM.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/convtasnet-whamr-DM.yaml
@@ -44,7 +44,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 8000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 10
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/dprnn-whamr-DM.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/dprnn-whamr-DM.yaml
index f3158a6252e95e9186f4cea07cdbacf78f14f3a1..d974b03c11edf0c0db0698f3b7f30d81a4c9a5a0 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/dprnn-whamr-DM.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/dprnn-whamr-DM.yaml
@@ -44,7 +44,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 8000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-wham.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-wham.yaml
index 75c90a0f108c62040ee1f01228d49a6505ab1210..df1935306b47512947cd04962744c0217192811f 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-wham.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-wham.yaml
@@ -44,7 +44,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k-DM.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k-DM.yaml
index 625801e5178f4383f7b4c65bc7f2c44ec7730e43..dc27834919fca3b9cff38b3a12e65fb56b18c653 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k-DM.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k-DM.yaml
@@ -46,7 +46,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 16000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k.yaml
index d42060642fd291292f01f862c8d337e180a68a98..d11332c7e0702ea5cae1c778a4f8b7f90c13ed58 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k.yaml
@@ -44,7 +44,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 16000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-DM.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-DM.yaml
index 2acd34370d777850006ac1d5c45830675c59c18f..3721698bced2416d6f97521978eb18934a0f3a6c 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-DM.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-DM.yaml
@@ -44,7 +44,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 8000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr.yaml
index 536a46f4960c264449a682b748c7c350b0847187..14a38a06b6b442e3d1b9086e6889a0125a884ef1 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr.yaml
@@ -44,7 +44,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 8000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/separation/hparams/sepformer-wham.yaml b/recipes/WHAMandWHAMR/separation/hparams/sepformer-wham.yaml
index 1661d68f271740a2af8c1160a137ca7823e6331b..db920e7fab8d969cbebe61a15b4884ce6f843da1 100644
--- a/recipes/WHAMandWHAMR/separation/hparams/sepformer-wham.yaml
+++ b/recipes/WHAMandWHAMR/separation/hparams/sepformer-wham.yaml
@@ -42,7 +42,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/separation/hparams/sepformer-whamr.yaml b/recipes/WHAMandWHAMR/separation/hparams/sepformer-whamr.yaml
index d4bc250d0c4bcddc8385717970e265729c993b3c..8538529a629e4ed99176930e46d778e9c8a12438 100644
--- a/recipes/WHAMandWHAMR/separation/hparams/sepformer-whamr.yaml
+++ b/recipes/WHAMandWHAMR/separation/hparams/sepformer-whamr.yaml
@@ -40,7 +40,7 @@ num_spks: 2 # set to 3 for wsj0-3mix
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/convtasnet.yaml b/recipes/WSJ0Mix/separation/hparams/convtasnet.yaml
index 03833050419273e7888f56e49ba34868c371a6fc..0305c6236d5cb7faa07739321428e4ad97e1a5a4 100644
--- a/recipes/WSJ0Mix/separation/hparams/convtasnet.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/convtasnet.yaml
@@ -36,7 +36,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/dprnn.yaml b/recipes/WSJ0Mix/separation/hparams/dprnn.yaml
index a78a782666f0cd93c8f696174948df1cc25cdef8..df1952d8c93d034db2f97749865a02a40c45ee22 100644
--- a/recipes/WSJ0Mix/separation/hparams/dprnn.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/dprnn.yaml
@@ -36,7 +36,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/resepformer.yaml b/recipes/WSJ0Mix/separation/hparams/resepformer.yaml
index 2b2711f52c0f5fdb9c1f0961d63561b6704ac289..406f2aa76510ddce75fed5fc8e40e2cee3aea08b 100644
--- a/recipes/WSJ0Mix/separation/hparams/resepformer.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/resepformer.yaml
@@ -37,7 +37,7 @@ num_spks: 2 # set to 3 for wsj0-3mix
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/sepformer-conformerintra.yaml b/recipes/WSJ0Mix/separation/hparams/sepformer-conformerintra.yaml
index 15550c9a24f77362c392cf637b0b13cb37ea21b3..2cf2b7ac551547a88df59a1f2f658cc895f0b4be 100644
--- a/recipes/WSJ0Mix/separation/hparams/sepformer-conformerintra.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/sepformer-conformerintra.yaml
@@ -37,7 +37,7 @@ num_spks: 2 # set to 3 for wsj0-3mix
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/sepformer-customdataset.yaml b/recipes/WSJ0Mix/separation/hparams/sepformer-customdataset.yaml
index 82a2d3009029882b4aa6726519cc08197d83fe00..c896f2dfd844ee1dbac3625f3eea38358fcc8d0d 100644
--- a/recipes/WSJ0Mix/separation/hparams/sepformer-customdataset.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/sepformer-customdataset.yaml
@@ -39,7 +39,7 @@ noprogressbar: False
 save_audio: True # Save estimated sources on disk
 sample_rate: 16000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/sepformer.yaml b/recipes/WSJ0Mix/separation/hparams/sepformer.yaml
index 4787fb3aa93a7854fc287774b28097b3b6cdbbb7..77319604d02669a6823e79acdb6aac75f70f88aa 100644
--- a/recipes/WSJ0Mix/separation/hparams/sepformer.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/sepformer.yaml
@@ -40,7 +40,7 @@ save_audio: True # Save estimated sources on disk
 n_audio_to_save: 20
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/skim.yaml b/recipes/WSJ0Mix/separation/hparams/skim.yaml
index 53b312efd5b1e076b6e7e77fc57879ac41e74110..606c7060a5115b863805c835575bd6a6db182cf7 100644
--- a/recipes/WSJ0Mix/separation/hparams/skim.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/skim.yaml
@@ -37,7 +37,7 @@ num_spks: 2 # set to 3 for wsj0-3mix
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/ZaionEmotionDataset/emotion_diarization/hparams/train.yaml b/recipes/ZaionEmotionDataset/emotion_diarization/hparams/train.yaml
index 644ca89a27255f1f37a10a15004294d010b78737..0d9601d2068e91db1819a2647a8628657bc0a47c 100644
--- a/recipes/ZaionEmotionDataset/emotion_diarization/hparams/train.yaml
+++ b/recipes/ZaionEmotionDataset/emotion_diarization/hparams/train.yaml
@@ -30,7 +30,7 @@ train_annotation: !ref <output_folder>/train.json
 valid_annotation: !ref <output_folder>/valid.json
 test_annotation: !ref <output_folder>/test.json
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 15
 lr: 0.0001
 lr_wav2vec: 0.00001
diff --git a/recipes/fluent-speech-commands/Tokenizer/hparams/tokenizer_bpe51.yaml b/recipes/fluent-speech-commands/Tokenizer/hparams/tokenizer_bpe51.yaml
index db7c1ddb7287627e1f59ac1374bc774e94f4e302..eff38c7bf42af12f0bb5945cc54209c12ba36551 100644
--- a/recipes/fluent-speech-commands/Tokenizer/hparams/tokenizer_bpe51.yaml
+++ b/recipes/fluent-speech-commands/Tokenizer/hparams/tokenizer_bpe51.yaml
@@ -13,7 +13,7 @@ train_csv: !ref <output_folder>/train.csv
 valid_csv: !ref <output_folder>/valid.csv
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 51  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/fluent-speech-commands/direct/hparams/train.yaml b/recipes/fluent-speech-commands/direct/hparams/train.yaml
index a7c072343f0a44a18910ef972ba3d63a34af4bf3..428faf144a9fb1e371bde9c3a6816bfb1e95138d 100644
--- a/recipes/fluent-speech-commands/direct/hparams/train.yaml
+++ b/recipes/fluent-speech-commands/direct/hparams/train.yaml
@@ -32,14 +32,14 @@ rir_annotation: !ref <save_folder>/rir.csv
 
 tokenizer_file: https://www.dropbox.com/s/hvf2huofnq0sjbn/51_unigram.model?dl=1
 skip_prep: False
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 6
 batch_size: 16
 lr: 0.0003
 # token_type: unigram # ["unigram", "bpe", "char"]
 sorting: random
 
-# Model parameters
+####################### Model Parameters #######################################
 sample_rate: 16000
 emb_size: 128
 dec_neurons: 512
@@ -65,6 +65,8 @@ dataloader_opts:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -87,45 +89,32 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 9
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 3  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 2
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
     shuffle_augmentations: True
     min_augmentations: 1
     max_augmentations: 4
@@ -136,7 +125,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
-# Models
+############################## Models ##########################################
+
 asr_model_source: speechbrain/asr-crdnn-rnnlm-librispeech
 
 slu_enc: !new:speechbrain.nnet.containers.Sequential
diff --git a/recipes/timers-and-such/LM/hparams/train.yaml b/recipes/timers-and-such/LM/hparams/train.yaml
index 485dd54265a57612f862b180d223c79149d3aa3b..f3ba652edbc6effb882d697e13ee3747d7badf9f 100644
--- a/recipes/timers-and-such/LM/hparams/train.yaml
+++ b/recipes/timers-and-such/LM/hparams/train.yaml
@@ -23,7 +23,7 @@ csv_test_synth: !ref <output_folder>/test-synth-type=decoupled.csv
 csv_test_real: !ref <output_folder>/test-real-type=decoupled.csv
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 10
 batch_size: 128
 lr: 0.0003
diff --git a/recipes/timers-and-such/Tokenizer/hparams/tokenizer_bpe51.yaml b/recipes/timers-and-such/Tokenizer/hparams/tokenizer_bpe51.yaml
index 7554a03421cfa18627b4809af2296f6c54d4ea13..2a9f39161d2237e228ee0a9d82def76728a52324 100644
--- a/recipes/timers-and-such/Tokenizer/hparams/tokenizer_bpe51.yaml
+++ b/recipes/timers-and-such/Tokenizer/hparams/tokenizer_bpe51.yaml
@@ -15,7 +15,7 @@ train_csv: !ref <output_folder>/train-type=direct.csv
 valid_csv: !ref <output_folder>/dev-real-type=direct.csv
 
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 51  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/timers-and-such/decoupled/hparams/train_LS_LM.yaml b/recipes/timers-and-such/decoupled/hparams/train_LS_LM.yaml
index 212752668aba1ecdabcfffab3831f906c34165b8..1ee56d5612f0a7c0cf18b4afbb14f56bb64523c3 100644
--- a/recipes/timers-and-such/decoupled/hparams/train_LS_LM.yaml
+++ b/recipes/timers-and-such/decoupled/hparams/train_LS_LM.yaml
@@ -34,7 +34,7 @@ skip_prep: False
 ckpt_interval_minutes: 15 # save checkpoint every N min
 test_on_all_real: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 batch_size: 16
 lr: 0.0003
diff --git a/recipes/timers-and-such/decoupled/hparams/train_TAS_LM.yaml b/recipes/timers-and-such/decoupled/hparams/train_TAS_LM.yaml
index fcf6393e68868e7f9b83283bcde5c0a107ab4882..5f0d93d09a22d524731862d3c83823911aea5ef8 100644
--- a/recipes/timers-and-such/decoupled/hparams/train_TAS_LM.yaml
+++ b/recipes/timers-and-such/decoupled/hparams/train_TAS_LM.yaml
@@ -34,7 +34,7 @@ skip_prep: False
 ckpt_interval_minutes: 15 # save checkpoint every N min
 test_on_all_real: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 batch_size: 16
 lr: 0.0003
diff --git a/recipes/timers-and-such/direct/hparams/train.yaml b/recipes/timers-and-such/direct/hparams/train.yaml
index 4fb574fc3d82c32575aadef338e63f8b757f1ea4..01909eb5b02891a6bc91b66ed5a360f6e24df5bd 100644
--- a/recipes/timers-and-such/direct/hparams/train.yaml
+++ b/recipes/timers-and-such/direct/hparams/train.yaml
@@ -38,14 +38,14 @@ data_folder_noise: !ref <data_folder>/noise # The noisy sequencies for data augm
 NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 batch_size: 16
 lr: 0.0003
 # token_type: unigram # ["unigram", "bpe", "char"]
 sorting: random
 
-# Model parameters
+####################### Model Parameters #######################################
 sample_rate: 16000
 emb_size: 128
 dec_neurons: 512
@@ -71,6 +71,7 @@ dataloader_opts:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
 
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -80,56 +81,37 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
     csv_file: !ref <noise_annotation>
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -139,8 +121,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
 
-# Models
 asr_model_source: speechbrain/asr-crdnn-rnnlm-librispeech
 
 slu_enc: !new:speechbrain.nnet.containers.Sequential
diff --git a/recipes/timers-and-such/direct/hparams/train_with_wav2vec2.yaml b/recipes/timers-and-such/direct/hparams/train_with_wav2vec2.yaml
index b9b451e910b9bda29dfd21491c48cf5a317b1619..b9ad3cfc2490fab5fc8f91930e6bac77fbd10bf6 100644
--- a/recipes/timers-and-such/direct/hparams/train_with_wav2vec2.yaml
+++ b/recipes/timers-and-such/direct/hparams/train_with_wav2vec2.yaml
@@ -37,7 +37,7 @@ ckpt_interval_minutes: 15 # save checkpoint every N min
 test_on_all_real: False
 
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 8
 lr: 0.0004
@@ -49,7 +49,7 @@ freeze_wav2vec: False
 # token_type: unigram # ["unigram", "bpe", "char"]
 sorting: ascending
 
-# Model parameters
+####################### Model Parameters #######################################
 sample_rate: 16000
 emb_size: 128
 dec_neurons: 512
@@ -171,45 +171,31 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
         lr_annealing_wav2vec2: !ref <lr_annealing_wav2vec2>
         counter: !ref <epoch_counter>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
diff --git a/recipes/timers-and-such/multistage/hparams/train_LS_LM.yaml b/recipes/timers-and-such/multistage/hparams/train_LS_LM.yaml
index 3d1c4e1566187ebe293242ac064c833fdb4fbce1..b804df9f952a3fb90accb9c9a9db0cb5201d2662 100644
--- a/recipes/timers-and-such/multistage/hparams/train_LS_LM.yaml
+++ b/recipes/timers-and-such/multistage/hparams/train_LS_LM.yaml
@@ -39,7 +39,7 @@ NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.z
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 batch_size: 16
 lr: 0.0003
@@ -73,6 +73,8 @@ dataloader_opts:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -81,57 +83,37 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
     csv_file: !ref <noise_annotation>
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
-# Augmenter: Combines previously defined augmentations to perform data augmentation
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -141,7 +123,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
-# Models
+############################## Models ##########################################
+
 asr_model: !apply:speechbrain.inference.ASR.EncoderDecoderASR.from_hparams
     source: speechbrain/asr-crdnn-rnnlm-librispeech
     run_opts: {"device":"cuda:0"}
diff --git a/recipes/timers-and-such/multistage/hparams/train_TAS_LM.yaml b/recipes/timers-and-such/multistage/hparams/train_TAS_LM.yaml
index d21f309c596a8a4bf6e8e1d09e1ab8434a535795..56eb59d20e532432e11da09f8f938762abd3d6fd 100644
--- a/recipes/timers-and-such/multistage/hparams/train_TAS_LM.yaml
+++ b/recipes/timers-and-such/multistage/hparams/train_TAS_LM.yaml
@@ -39,7 +39,7 @@ NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.z
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 batch_size: 16
 lr: 0.0003
@@ -73,6 +73,8 @@ dataloader_opts:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -81,57 +83,37 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
     csv_file: !ref <noise_annotation>
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
-# Augmenter: Combines previously defined augmentations to perform data augmentation
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -141,6 +123,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 # Models
 asr_model: !apply:speechbrain.inference.ASR.EncoderDecoderASR.from_hparams
     source: speechbrain/asr-crdnn-rnnlm-librispeech
diff --git a/speechbrain/augment/augmenter.py b/speechbrain/augment/augmenter.py
index 25ea174809a8086c56a5ad929854df3bc2b97e9c..55aee785cbc94f7ff996d5c350f2bbd1090029f7 100644
--- a/speechbrain/augment/augmenter.py
+++ b/speechbrain/augment/augmenter.py
@@ -438,20 +438,52 @@ class Augmenter(torch.nn.Module):
 
         return output, output_lengths
 
+    def replicate_multiple_labels(self, *args):
+        """
+        Replicates the labels along the batch axis a number of times that
+        corresponds to the number of augmentations. Indeed parallel and
+        concatenation augmentations alter the time dimension.
+
+        Arguments
+        ---------
+        args : torch.Tensor
+            Input label tensors to be replicated. Can be a uniq or a list of
+            Tensors.
+
+        Returns
+        -------
+        augmented_labels: torch.Tensor
+            Labels corresponding to the augmented input. Returns as many Tensor
+            as given in input.
+        """
+
+        # Determine whether to apply data augmentation
+        if not self.do_augment:
+            return args
+
+        list_of_augmented_labels = []
+
+        for labels in args:
+            list_of_augmented_labels.append(self.replicate_labels(labels))
+
+        return list_of_augmented_labels
+
     def replicate_labels(self, labels):
         """
         Replicates the labels along the batch axis a number of times that
-        corresponds to the number of augmentations.
+        corresponds to the number of augmentations. Indeed parallel and
+        concatenation augmentations alter the time dimension.
 
         Arguments
         ---------
         labels : torch.Tensor
-            Input label tensor to be replicated.
+            Input label tensors to be replicated.
 
         Returns
         -------
         augmented_labels: torch.Tensor
-            Labels corresponding to the augmented input.
+            Labels corresponding to the augmented input. Returns as many Tensor
+            as given in input.
         """
 
         # Determine whether to apply data augmentation
@@ -477,6 +509,7 @@ class Augmenter(torch.nn.Module):
         )
 
         augmented_labels = torch.cat(augmented_labels, dim=0)
+
         return augmented_labels
 
     def check_min_max_augmentations(self):