From ceefc8ddaa3af6ed21e65b9769f8831af92176c6 Mon Sep 17 00:00:00 2001
From: Parcollet Titouan <parcollet.titouan@gmail.com>
Date: Sun, 11 Feb 2024 21:52:39 +0000
Subject: [PATCH] Shorten the data augmentation in YAML + cleaning (code from
 Samsung AI Cambridge) (#2399)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* shorter augmentations in yaml

* layout to 80 char

* listed label replication

* listed label replication

* listed label replication

* Refact CTC

* Refact transducer

* Refact seq2seq

* call replicate label instead of duplication

* refactor aishell

* refactor aishell

* CommonLanuageÃ

* fix error + CV CTC

* Giga OOF

* Giga OOF

* Giga OOF

* Giga OOF

* Giga OOF

* Giga OOF

* Giga OOF

* Giga OOF

* Giga OOF

* Finishing OOF

* final touch LULZ

* fix tests

* Tests???Ã

* fix augment in some recipes

---------

Co-authored-by: Titouan Parcollet/Embedded AI /SRUK/Engineer/Samsung Electronics <t.parcollet@sruk-ccn4.eu.corp.samsungelectronics.net>
Co-authored-by: Mirco Ravanelli <mirco.ravanelli@gmail.com>
---
 .../ASR/CTC/hparams/train_with_wav2vec.yaml   |  57 ++----
 .../AISHELL-1/ASR/CTC/train_with_wav2vec.py   |   2 +
 .../AISHELL-1/ASR/seq2seq/hparams/train.yaml  |  59 +++---
 recipes/AISHELL-1/ASR/seq2seq/train.py        |  18 +-
 .../hparams/train_ASR_transformer.yaml        |  67 ++-----
 .../train_ASR_transformer_with_wav2vect.yaml  |  47 ++---
 recipes/AISHELL-1/ASR/transformer/train.py    |  34 ++--
 .../ASR/transformer/train_with_wav2vect.py    |  18 +-
 .../Tokenizer/hparams/tokenizer_bpe5000.yaml  |   2 +-
 .../train_transformer_tokenizer_bpe5000.yaml  |   2 +-
 .../hparams/sepformer-aishell1mix2-wham.yaml  |   2 +-
 .../hparams/sepformer-aishell1mix2.yaml       |   2 +-
 .../hparams/sepformer-aishell1mix3-wham.yaml  |   2 +-
 .../hparams/sepformer-aishell1mix3.yaml       |   2 +-
 .../separation/hparams/convtasnet-cross.yaml  |   2 +-
 .../hparams/convtasnet-independent.yaml       |   2 +-
 .../hparams/convtasnet-parallel-noise.yaml    |   2 +-
 .../hparams/convtasnet-parallel-reverb.yaml   |   2 +-
 .../hparams/convtasnet-parallel.yaml          |   2 +-
 recipes/CVSS/S2ST/hparams/train_fr-en.yaml    |   2 +-
 .../lang_id/hparams/train_ecapa_tdnn.yaml     |  21 +--
 .../CTC/hparams/train_ar_with_wav2vec.yaml    |  46 ++---
 .../CTC/hparams/train_de_with_wav2vec.yaml    |  46 ++---
 .../CTC/hparams/train_en_with_wav2vec.yaml    |  46 ++---
 .../CTC/hparams/train_es_with_wav2vec.yaml    |  46 ++---
 .../CTC/hparams/train_fr_with_wav2vec.yaml    |  46 ++---
 .../CTC/hparams/train_it_with_wav2vec.yaml    |  47 ++---
 .../CTC/hparams/train_pt_with_wav2vec.yaml    |  46 ++---
 .../CTC/hparams/train_rw_with_wav2vec.yaml    |  46 ++---
 .../CTC/hparams/train_zh-CN_with_wav2vec.yaml |  46 ++---
 .../ASR/seq2seq/hparams/train_de.yaml         |  49 ++---
 .../ASR/seq2seq/hparams/train_en.yaml         |  50 ++---
 .../ASR/seq2seq/hparams/train_es.yaml         |  50 ++---
 .../ASR/seq2seq/hparams/train_fr.yaml         |  50 ++---
 .../ASR/seq2seq/hparams/train_it.yaml         |  50 ++---
 .../ASR/seq2seq/hparams/train_rw.yaml         |  50 ++---
 .../ASR/transducer/hparams/train_de.yaml      |  48 ++---
 .../ASR/transducer/hparams/train_fr.yaml      |  48 ++---
 .../ASR/transducer/hparams/train_it.yaml      |  48 ++---
 recipes/CommonVoice/ASR/transducer/train.py   |  32 ++--
 .../hparams/train_ar_hf_whisper.yaml          |  44 ++---
 .../ASR/transformer/hparams/train_de.yaml     |  47 ++---
 .../hparams/train_fa_hf_whisper.yaml          |  45 ++---
 .../ASR/transformer/hparams/train_fr.yaml     |  47 ++---
 .../hparams/train_fr_hf_whisper.yaml          |  45 ++---
 .../hparams/train_hi_hf_whisper.yaml          |  45 ++---
 .../ASR/transformer/hparams/train_it.yaml     |  47 ++---
 .../hparams/train_it_hf_whisper.yaml          |  45 ++---
 .../hparams/train_mn_hf_whisper.yaml          |  45 ++---
 .../hparams/train_sr_hf_whisper.yaml          |  45 ++---
 recipes/CommonVoice/ASR/transformer/train.py  |  34 ++--
 .../wav2vec2/hparams/wav2vec2_base.yaml       |  10 +-
 .../hparams/sepformer-dns-16k.yaml            |   2 +-
 .../CTC/hparams/train_amh_with_wav2vec.yaml   |  43 ++---
 .../CTC/hparams/train_dar_with_wav2vec.yaml   |  44 ++---
 .../CTC/hparams/train_fon_with_wav2vec.yaml   |  44 ++---
 .../CTC/hparams/train_multi_with_wav2vec.yaml |  44 ++---
 .../CTC/hparams/train_sw_with_wav2vec.yaml    |  44 ++---
 .../CTC/hparams/train_wol_with_wav2vec.yaml   |  44 ++---
 .../hparams/cnn14_classifier.yaml             |   2 +-
 .../hparams/conv2d_classifier.yaml            |   2 +-
 .../ESC50/interpret/hparams/l2i_cnn14.yaml    |   2 +-
 .../hparams/l2i_conv2dclassifier.yaml         |   2 +-
 recipes/ESC50/interpret/hparams/nmf.yaml      |   2 +-
 recipes/ESC50/interpret/hparams/piq.yaml      |   2 +-
 .../ST/transformer/hparams/conformer.yaml     |   2 +-
 .../ST/transformer/hparams/transformer.yaml   |   2 +-
 .../Google-speech-commands/hparams/xvect.yaml |   2 +-
 .../hparams/xvect_leaf.yaml                   |   2 +-
 .../hparams/train_with_wav2vec2.yaml          |   4 +-
 .../AST/transformer/hparams/train_samu.yaml   |   2 +-
 .../hparams/train_samu_mbart_st.yaml          |   2 +-
 .../hparams/train_samu_nllb_st.yaml           |   2 +-
 .../hparams/train_w2v2_mbart_st.yaml          |   2 +-
 .../hparams/train_w2v2_nllb_st.yaml           |   2 +-
 .../transformer/hparams/train_w2v2_st.yaml    |   2 +-
 .../transformer/hparams/conformer_medium.yaml |   4 +-
 .../KsponSpeech/LM/hparams/transformer.yaml   |   2 +-
 .../hparams/5K_unigram_subword_bpe.yaml       |   2 +-
 .../hparams/sepformer-libri2mix.yaml          |   2 +-
 .../hparams/sepformer-libri3mix.yaml          |   2 +-
 recipes/LibriParty/VAD/hparams/train.yaml     |   4 +-
 .../train_hf_wavlm_average_downsampling.yaml  | 122 ++++++------
 .../train_hf_wavlm_conv_downsampling.yaml     | 122 ++++++------
 .../train_hf_wavlm_signal_downsampling.yaml   | 119 ++++++------
 .../ASR/CTC/hparams/train_hf_wav2vec.yaml     | 125 ++++++-------
 .../train_hf_wav2vec_rnn_rescoring.yaml       | 174 +++++++++---------
 ...rain_hf_wav2vec_transformer_rescoring.yaml | 136 +++++++-------
 .../CTC/hparams/train_hf_whisper_encoder.yaml | 118 ++++++------
 .../ASR/CTC/hparams/train_sb_wav2vec.yaml     | 119 ++++++------
 .../LibriSpeech/ASR/CTC/train_with_wav2vec.py |  11 +-
 .../LibriSpeech/ASR/CTC/train_with_whisper.py |  11 +-
 .../ASR/seq2seq/hparams/train_BPE_1000.yaml   | 128 ++++++-------
 .../hparams/train_BPE_1000_sligru.yaml        | 129 ++++++-------
 .../ASR/seq2seq/hparams/train_BPE_5000.yaml   | 129 ++++++-------
 recipes/LibriSpeech/ASR/seq2seq/train.py      |  14 +-
 .../hparams/conformer_transducer.yaml         |  60 +++---
 recipes/LibriSpeech/ASR/transducer/train.py   |  29 +--
 .../ASR/transformer/hparams/bayesspeech.yaml  |  61 +++---
 .../hparams/branchformer_large.yaml           |  67 +++----
 .../transformer/hparams/conformer_large.yaml  |  64 +++----
 .../transformer/hparams/conformer_small.yaml  |  67 +++----
 .../hparams/hyperbranchformer_13M.yaml        |  64 +++----
 .../hparams/hyperbranchformer_25M.yaml        |  64 +++----
 .../hparams/hyperconformer_22M.yaml           |  65 +++----
 .../hparams/hyperconformer_8M.yaml            |  64 +++----
 .../transformer/hparams/train_hf_whisper.yaml |  53 +++---
 .../ASR/transformer/hparams/transformer.yaml  |  64 +++----
 recipes/LibriSpeech/ASR/transformer/train.py  |  18 +-
 .../G2P/hparams/hparams_g2p_rnn.yaml          |   2 +-
 .../G2P/hparams/hparams_g2p_transformer.yaml  |   2 +-
 .../G2P/hparams/hparams_lm_rnn.yaml           |   4 +-
 .../G2P/hparams/hparams_lm_transformer.yaml   |   4 +-
 recipes/LibriSpeech/LM/hparams/RNNLM.yaml     |   4 +-
 .../LibriSpeech/LM/hparams/transformer.yaml   |   2 +-
 .../hparams/1K_unigram_subword_bpe.yaml       |   2 +-
 .../hparams/5K_unigram_subword_bpe.yaml       |   2 +-
 .../wav2vec2/hparams/wav2vec2_base.yaml       |   4 +-
 .../ASR/CTC/hparams/train_hf_wav2vec.yaml     |   4 +-
 .../CTC/hparams/train_hf_wav2vec_full.yaml    |   4 +-
 .../CTC/hparams/train_hf_wav2vec_relax.yaml   |   4 +-
 .../gpt/hparams/train_gpt.yaml                |   2 +-
 .../llama2/hparams/train_llama2.yaml          |   2 +-
 .../hparams/pool_sisnrestimator.yaml          |   2 +-
 .../noise-robust/hparams/robust_asr_16k.yaml  |   4 +-
 recipes/SLURP/NLU/hparams/train.yaml          |   4 +-
 .../Tokenizer/hparams/tokenizer_bpe58.yaml    |   2 +-
 recipes/SLURP/direct/hparams/train.yaml       |  41 ++---
 .../direct/hparams/train_with_wav2vec2.yaml   |  42 ++---
 .../ASR/CTC/hparams/train_with_wav2vec.yaml   |  44 ++---
 .../ASR/seq2seq/hparams/train_BPE_2000.yaml   |  51 ++---
 recipes/Switchboard/ASR/seq2seq/train.py      |  14 +-
 .../ASR/transformer/hparams/transformer.yaml  |  59 ++----
 .../hparams/transformer_finetuned_LM.yaml     |  53 ++----
 recipes/Switchboard/ASR/transformer/train.py  |  18 +-
 .../Switchboard/LM/hparams/transformer.yaml   |   2 +-
 .../LM/hparams/transformer_finetune.yaml      |   2 +-
 .../hparams/2K_unigram_subword_bpe.yaml       |   2 +-
 recipes/TIMIT/ASR/CTC/hparams/train.yaml      |  52 ++----
 recipes/TIMIT/ASR/seq2seq/hparams/train.yaml  |  45 ++---
 .../seq2seq/hparams/train_with_wav2vec2.yaml  |  44 ++---
 .../TIMIT/ASR/transducer/hparams/train.yaml   |  52 ++----
 .../ASR/transducer/hparams/train_wav2vec.yaml |  45 ++---
 recipes/TIMIT/Alignment/hparams/train.yaml    |   4 +-
 .../hparams/branchformer_large.yaml           |   4 +-
 .../Tokenizer/hparams/tedlium2_500_bpe.yaml   |   2 +-
 .../hparams/train_ecapa_tdnn.yaml             |   2 +-
 recipes/Voicebank/ASR/CTC/hparams/train.yaml  |  44 ++---
 .../ASR_enhance/hparams/enhance_mimic.yaml    |   2 +-
 .../hparams/pretrain_perceptual.yaml          |   2 +-
 .../MTL/ASR_enhance/hparams/robust_asr.yaml   |  49 ++---
 .../SpeakerRec/hparams/train_ecapa_tdnn.yaml  |  40 ++--
 .../hparams/train_ecapa_tdnn_mel_spec.yaml    |  40 ++--
 .../SpeakerRec/hparams/train_resnet.yaml      |  42 ++---
 .../SpeakerRec/hparams/train_x_vectors.yaml   |  40 ++--
 .../lang_id/hparams/train_ecapa.yaml          |  13 +-
 .../hparams/cnntransformer-wham-DM.yaml       |   2 +-
 .../hparams/cnntransformer-whamr-DM.yaml      |   2 +-
 .../hparams/convtasnet-whamr-DM.yaml          |   2 +-
 .../enhancement/hparams/dprnn-whamr-DM.yaml   |   2 +-
 .../enhancement/hparams/sepformer-wham.yaml   |   2 +-
 .../hparams/sepformer-whamr-16k-DM.yaml       |   2 +-
 .../hparams/sepformer-whamr-16k.yaml          |   2 +-
 .../hparams/sepformer-whamr-DM.yaml           |   2 +-
 .../enhancement/hparams/sepformer-whamr.yaml  |   2 +-
 .../separation/hparams/sepformer-wham.yaml    |   2 +-
 .../separation/hparams/sepformer-whamr.yaml   |   2 +-
 .../separation/hparams/convtasnet.yaml        |   2 +-
 recipes/WSJ0Mix/separation/hparams/dprnn.yaml |   2 +-
 .../separation/hparams/resepformer.yaml       |   2 +-
 .../hparams/sepformer-conformerintra.yaml     |   2 +-
 .../hparams/sepformer-customdataset.yaml      |   2 +-
 .../WSJ0Mix/separation/hparams/sepformer.yaml |   2 +-
 recipes/WSJ0Mix/separation/hparams/skim.yaml  |   2 +-
 .../emotion_diarization/hparams/train.yaml    |   2 +-
 .../Tokenizer/hparams/tokenizer_bpe51.yaml    |   2 +-
 .../direct/hparams/train.yaml                 |  44 ++---
 recipes/timers-and-such/LM/hparams/train.yaml |   2 +-
 .../Tokenizer/hparams/tokenizer_bpe51.yaml    |   2 +-
 .../decoupled/hparams/train_LS_LM.yaml        |   2 +-
 .../decoupled/hparams/train_TAS_LM.yaml       |   2 +-
 .../timers-and-such/direct/hparams/train.yaml |  50 ++---
 .../direct/hparams/train_with_wav2vec2.yaml   |  42 ++---
 .../multistage/hparams/train_LS_LM.yaml       |  51 ++---
 .../multistage/hparams/train_TAS_LM.yaml      |  50 ++---
 speechbrain/augment/augmenter.py              |  39 +++-
 186 files changed, 2124 insertions(+), 3354 deletions(-)

diff --git a/recipes/AISHELL-1/ASR/CTC/hparams/train_with_wav2vec.yaml b/recipes/AISHELL-1/ASR/CTC/hparams/train_with_wav2vec.yaml
index 9bd0b52d2..486685f25 100644
--- a/recipes/AISHELL-1/ASR/CTC/hparams/train_with_wav2vec.yaml
+++ b/recipes/AISHELL-1/ASR/CTC/hparams/train_with_wav2vec.yaml
@@ -28,7 +28,8 @@ test_data: !ref <output_folder>/test.csv
 wav2vec2_hub: TencentGameMate/chinese-wav2vec2-large
 wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 80
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -76,7 +77,8 @@ tokenizer: !apply:transformers.BertTokenizer.from_pretrained
 # bert-base-chinese tokens length
 output_neurons: 21128
 
-# Decoding parameters
+############################## Decoding ########################################
+
 # Be sure that the bos and eos index match with the BPEs ones
 # Decoding parameters
 test_searcher: !name:speechbrain.decoders.CTCBeamSearcher
@@ -98,64 +100,37 @@ beta: 0.5
 # which Chinese writing normally does not do.
 # If remove_spaces, spaces are removed
 # from the transcript before computing CER.
-# (e.g., 祝 可爱 的 你 —> 祝可爱的你)
 remove_spaces: True
 split_tokens: !apply:operator.not_ [!ref <remove_spaces>]
 
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [90, 100, 110]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
    orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
 
 # Time Drop
-time_drop_length_low: 35  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 45  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 2  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 2  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <time_drop_length_low>
-   drop_length_high: !ref <time_drop_length_high>
-   drop_count_low: !ref <time_drop_count_low>
-   drop_count_high: !ref <time_drop_count_high>
-   replace: !ref <time_drop_replace>
-   dim: 1
+   drop_length_low: 35
+   drop_length_high: 45
+   drop_count_low: 2
+   drop_count_high: 2
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <freq_drop_length_low>
-   drop_length_high: !ref <freq_drop_length_high>
-   drop_count_low: !ref <freq_drop_count_low>
-   drop_count_high: !ref <freq_drop_count_high>
-   replace: !ref <freq_drop_replace>
+   drop_length_low: 25
+   drop_length_high: 35
+   drop_count_low: 2
+   drop_count_high: 2
    dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-   warp_window: !ref <time_warp_window>
-   warp_mode: !ref <time_warp_mode>
-   dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: False
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 3
    max_augmentations: 3
    augment_prob: 1.0
@@ -164,6 +139,8 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
       !ref <freq_drop>,
       !ref <time_warp>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
    input_shape: [null, null, !ref <wav2vec_output_dim>]
    linear1: !name:speechbrain.nnet.linear.Linear
@@ -230,6 +207,8 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.9
    patient: 0
 
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/AISHELL-1/ASR/CTC/train_with_wav2vec.py b/recipes/AISHELL-1/ASR/CTC/train_with_wav2vec.py
index 227204f44..43783eed7 100644
--- a/recipes/AISHELL-1/ASR/CTC/train_with_wav2vec.py
+++ b/recipes/AISHELL-1/ASR/CTC/train_with_wav2vec.py
@@ -56,6 +56,8 @@ class ASR(sb.Brain):
         ids = batch.id
         tokens, tokens_lens = batch.tokens
 
+        # Labels must be extended if parallel augmentation or concatenated
+        # augmentation was performed on the input (increasing the time dimension)
         if stage == sb.Stage.TRAIN:
             if hasattr(self.hparams, "fea_augment"):
                 tokens = self.hparams.fea_augment.replicate_labels(tokens)
diff --git a/recipes/AISHELL-1/ASR/seq2seq/hparams/train.yaml b/recipes/AISHELL-1/ASR/seq2seq/hparams/train.yaml
index 75b303f66..e6fda7de2 100644
--- a/recipes/AISHELL-1/ASR/seq2seq/hparams/train.yaml
+++ b/recipes/AISHELL-1/ASR/seq2seq/hparams/train.yaml
@@ -29,7 +29,8 @@ test_data: !ref <output_folder>/test.csv
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 tokenizer_file: speechbrain/asr-transformer-aishell/tokenizer.ckpt
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 40
 number_of_ctc_epochs: 10
 batch_size: 16
@@ -71,7 +72,7 @@ test_dataloader_opts:
    batch_size: !ref <batch_size>
    num_workers: !ref <num_workers>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -108,7 +109,6 @@ scorer_beam_scale: 0.5
 # which Chinese writing normally does not do.
 # If remove_spaces, spaces are removed
 # from the transcript before computing CER.
-# (e.g., 祝 可爱 的 你 —> 祝可爱的你)
 remove_spaces: True
 split_tokens: !apply:operator.not_ [!ref <remove_spaces>]
 
@@ -118,6 +118,8 @@ epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
 normalize: !new:speechbrain.processing.features.InputNormalization
    norm_type: global
 
+############################## Augmentations ###################################
+
 compute_features: !new:speechbrain.lobes.features.Fbank
    sample_rate: !ref <sample_rate>
    n_fft: !ref <n_fft>
@@ -132,57 +134,37 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
 
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
    csv_file: !ref <noise_annotation>
-   snr_low: !ref <snr_low>
-   snr_high: !ref <snr_high>
+   snr_low: 0
+   snr_high: 15
    noise_sample_rate: !ref <sample_rate>
    clean_sample_rate: !ref <sample_rate>
    num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
    orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
+   speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
 
-# Augmenter: Combines previously defined augmentations to perform data augmentation
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
    concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 4
    max_augmentations: 4
    augment_prob: 1.0
@@ -192,6 +174,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
       !ref <drop_freq>,
       !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
    input_shape: [null, null, !ref <n_mels>]
    activation: !ref <activation>
@@ -268,7 +252,8 @@ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
    paths:
       tokenizer: !ref <tokenizer_file>
 
-# Scorer
+############################## Decoding ########################################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
    eos_index: !ref <eos_index>
    blank_index: !ref <blank_index>
@@ -305,6 +290,8 @@ lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.8
    patient: 0
 
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/AISHELL-1/ASR/seq2seq/train.py b/recipes/AISHELL-1/ASR/seq2seq/train.py
index 69d2e75d6..bc2c49b88 100644
--- a/recipes/AISHELL-1/ASR/seq2seq/train.py
+++ b/recipes/AISHELL-1/ASR/seq2seq/train.py
@@ -29,10 +29,6 @@ class ASR(sb.Brain):
 
         # Forward pass
         feats = self.hparams.compute_features(wavs)
-
-        if stage == sb.Stage.TRAIN and hasattr(self.hparams, "fea_augment"):
-            feats, fea_lens = self.hparams.fea_augment(feats, wav_lens)
-
         feats = self.modules.normalize(feats, wav_lens)
         x = self.modules.enc(feats.detach())
         e_in = self.modules.emb(tokens_bos)  # y_in bos + tokens
@@ -65,12 +61,16 @@ class ASR(sb.Brain):
         tokens_eos, tokens_eos_lens = batch.tokens_eos
         tokens, tokens_lens = batch.tokens
 
+        # Labels must be extended if parallel augmentation or concatenated
+        # augmentation was performed on the input (increasing the time dimension)
         if stage == sb.Stage.TRAIN and hasattr(self.hparams, "wav_augment"):
-            tokens = self.hparams.wav_augment.replicate_labels(tokens)
-            tokens_lens = self.hparams.wav_augment.replicate_labels(tokens_lens)
-            tokens_eos = self.hparams.wav_augment.replicate_labels(tokens_eos)
-            tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                tokens_eos_lens
+            (
+                tokens,
+                tokens_lens,
+                tokens_eos,
+                tokens_eos_lens,
+            ) = self.hparams.wav_augment.replicate_multiple_labels(
+                tokens, tokens_lens, tokens_eos, tokens_eos_lens
             )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer.yaml b/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer.yaml
index b98e371b2..408c9e680 100644
--- a/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer.yaml
+++ b/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer.yaml
@@ -30,7 +30,8 @@ test_data: !ref <save_folder>/test.csv
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 tokenizer_file: speechbrain/asr-transformer-aishell/tokenizer.ckpt
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 50
 batch_size: 8
 ctc_weight: 0.3
@@ -77,7 +78,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: !ref <num_workers>
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
 # Transformer
 d_model: 256
 nhead: 4
@@ -103,7 +104,7 @@ valid_beam_size: 10
 test_beam_size: 10
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -157,7 +158,8 @@ SGD: !name:torch.optim.SGD
     momentum: 0.99
     nesterov: True
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -227,7 +229,7 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
-# ----- WAVEFORM AUGMENTATION ----- #
+############################## Augmentation ####################################
 
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -238,75 +240,43 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
 
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 1
     max_augmentations: 1
     augment_prob: 1.0
     augmentations: [
         !ref <add_noise>]
 
-
- # ----- FEATURE AUGMENTATION ----- #
-time_drop_length_low: 0  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 100  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 2  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 2  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
-
 # Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-freq_drop_length_low: 30  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 40  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
+    drop_length_low: 0
+    drop_length_high: 100
+    drop_count_low: 2
+    drop_count_high: 2
 
 # Frequency Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 30
+    drop_length_high: 40
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 # Time warp
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 1
     max_augmentations: 1
     augment_start_index: !ref <batch_size> # This leaves unchanges original inputs
@@ -317,6 +287,8 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <freq_drop>,
         !ref <time_warp>]
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
@@ -324,7 +296,6 @@ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
 # which Chinese writing normally does not do.
 # If remove_spaces, spaces are removed
 # from the transcript before computing CER.
-# (e.g., 祝 可爱 的 你 —> 祝可爱的你)
 remove_spaces: True
 split_tokens: !apply:operator.not_ [!ref <remove_spaces>]
 
diff --git a/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer_with_wav2vect.yaml b/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer_with_wav2vect.yaml
index 13a7826ad..a196afc58 100644
--- a/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer_with_wav2vect.yaml
+++ b/recipes/AISHELL-1/ASR/transformer/hparams/train_ASR_transformer_with_wav2vect.yaml
@@ -30,7 +30,8 @@ wav2vec2_hub: facebook/wav2vec2-large-100k-voxpopuli
 wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 freeze_wav2vec: False
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 80
 batch_size: 2
 grad_accumulation_factor: 16
@@ -72,7 +73,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: !ref <num_workers>
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
 # Transformer
 d_model: 256
 nhead: 4
@@ -98,7 +99,7 @@ valid_beam_size: 10
 test_beam_size: 10
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 wav2vec2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
     source: !ref <wav2vec2_hub>
@@ -140,44 +141,27 @@ model: !new:torch.nn.ModuleList
     - [!ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -186,6 +170,7 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Decoding & optimiser ############################
 
 # define two optimizers here for two-stage training
 Adam: !name:torch.optim.Adam
@@ -257,6 +242,7 @@ noam_annealing_wav2vect: !new:speechbrain.nnet.schedulers.NoamScheduler
     n_warmup_steps: 25000
     model_size: !ref <d_model>
 
+############################## Logging and Pretrainer ##########################
 
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
     checkpoints_dir: !ref <save_folder>
@@ -278,7 +264,6 @@ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
 # which Chinese writing normally does not do.
 # If remove_spaces, spaces are removed
 # from the transcript before computing CER.
-# (e.g., 祝 可爱 的 你 —> 祝可爱的你)
 remove_spaces: True
 split_tokens: !apply:operator.not_ [!ref <remove_spaces>]
 
diff --git a/recipes/AISHELL-1/ASR/transformer/train.py b/recipes/AISHELL-1/ASR/transformer/train.py
index 977563ac8..63361bf0d 100644
--- a/recipes/AISHELL-1/ASR/transformer/train.py
+++ b/recipes/AISHELL-1/ASR/transformer/train.py
@@ -82,28 +82,26 @@ class ASR(sb.core.Brain):
         tokens, tokens_lens = batch.tokens
 
         if stage == sb.Stage.TRAIN:
+            # Labels must be extended if parallel augmentation or concatenated
+            # augmentation was performed on the input (increasing the time dimension)
             if hasattr(self.hparams, "wav_augment"):
-                tokens = self.hparams.wav_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.wav_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
 
             if hasattr(self.hparams, "fea_augment"):
-                tokens = self.hparams.fea_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.fea_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/AISHELL-1/ASR/transformer/train_with_wav2vect.py b/recipes/AISHELL-1/ASR/transformer/train_with_wav2vect.py
index 94196ea7b..53aa47375 100644
--- a/recipes/AISHELL-1/ASR/transformer/train_with_wav2vect.py
+++ b/recipes/AISHELL-1/ASR/transformer/train_with_wav2vect.py
@@ -74,16 +74,16 @@ class ASR(sb.core.Brain):
         tokens, tokens_lens = batch.tokens
 
         if stage == sb.Stage.TRAIN:
+            # Labels must be extended if parallel augmentation or concatenated
+            # augmentation was performed on the input (increasing the time dimension)
             if hasattr(self.hparams, "wav_augment"):
-                tokens = self.hparams.wav_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.wav_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/AISHELL-1/Tokenizer/hparams/tokenizer_bpe5000.yaml b/recipes/AISHELL-1/Tokenizer/hparams/tokenizer_bpe5000.yaml
index 886d22bda..d2cb23018 100644
--- a/recipes/AISHELL-1/Tokenizer/hparams/tokenizer_bpe5000.yaml
+++ b/recipes/AISHELL-1/Tokenizer/hparams/tokenizer_bpe5000.yaml
@@ -15,7 +15,7 @@ train_csv: !ref <output_folder>/train.csv
 valid_csv: !ref <output_folder>/dev.csv
 
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 5000  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/AISHELL-1/Tokenizer/hparams/train_transformer_tokenizer_bpe5000.yaml b/recipes/AISHELL-1/Tokenizer/hparams/train_transformer_tokenizer_bpe5000.yaml
index bc286156b..973df9a11 100644
--- a/recipes/AISHELL-1/Tokenizer/hparams/train_transformer_tokenizer_bpe5000.yaml
+++ b/recipes/AISHELL-1/Tokenizer/hparams/train_transformer_tokenizer_bpe5000.yaml
@@ -15,7 +15,7 @@ train_csv: !ref <output_folder>/train.csv
 valid_csv: !ref <output_folder>/dev.csv
 
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 5000  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2-wham.yaml b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2-wham.yaml
index 24571404f..d3cb9493e 100644
--- a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2-wham.yaml
+++ b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2-wham.yaml
@@ -40,7 +40,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2.yaml b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2.yaml
index d4689378a..168471dbb 100644
--- a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2.yaml
+++ b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix2.yaml
@@ -40,7 +40,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3-wham.yaml b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3-wham.yaml
index 65add025a..834857ed7 100644
--- a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3-wham.yaml
+++ b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3-wham.yaml
@@ -40,7 +40,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3.yaml b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3.yaml
index 3b27b796b..d48fdecb2 100644
--- a/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3.yaml
+++ b/recipes/Aishell1Mix/separation/hparams/sepformer-aishell1mix3.yaml
@@ -40,7 +40,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-cross.yaml b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-cross.yaml
index f609c746e..043845aeb 100644
--- a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-cross.yaml
+++ b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-cross.yaml
@@ -43,7 +43,7 @@ save_audio: True # Save estimated sources on disk
 n_audio_to_save: 10
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-independent.yaml b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-independent.yaml
index c3483f5f2..164ccc45b 100644
--- a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-independent.yaml
+++ b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-independent.yaml
@@ -43,7 +43,7 @@ save_audio: True # Save estimated sources on disk
 n_audio_to_save: 10
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-noise.yaml b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-noise.yaml
index 74941dfea..fef85267f 100644
--- a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-noise.yaml
+++ b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-noise.yaml
@@ -43,7 +43,7 @@ save_audio: True # Save estimated sources on disk
 n_audio_to_save: 10
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-reverb.yaml b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-reverb.yaml
index 6b1518d39..4ec5054f9 100644
--- a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-reverb.yaml
+++ b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel-reverb.yaml
@@ -43,7 +43,7 @@ save_audio: True # Save estimated sources on disk
 n_audio_to_save: 10
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel.yaml b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel.yaml
index a5d615679..adb31ddc6 100644
--- a/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel.yaml
+++ b/recipes/BinauralWSJ0Mix/separation/hparams/convtasnet-parallel.yaml
@@ -43,7 +43,7 @@ save_audio: True # Save estimated sources on disk
 n_audio_to_save: 10
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/CVSS/S2ST/hparams/train_fr-en.yaml b/recipes/CVSS/S2ST/hparams/train_fr-en.yaml
index 8ff7d59be..678dd1c17 100644
--- a/recipes/CVSS/S2ST/hparams/train_fr-en.yaml
+++ b/recipes/CVSS/S2ST/hparams/train_fr-en.yaml
@@ -59,7 +59,7 @@ wav2vec2_download_path: !ref <save_folder>/pretrained_models
 wav2vec2_frozen: False
 wav2vec2_freeze_steps: 10000
 
-# Training parameters
+####################### Training Parameters ####################################
 lr: 0.0005
 lr_wav2vec: 0.00001
 loss_reduction: batchmean
diff --git a/recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml b/recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml
index 0c4d91aa9..d4722f45c 100644
--- a/recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml
+++ b/recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml
@@ -38,10 +38,10 @@ error_stats: !name:speechbrain.utils.metric_stats.MetricStats
     metric: !name:speechbrain.nnet.losses.classification_error
         reduction: batch
 
+####################### Training Parameters ####################################
+
 # Feature parameters btw: 40 - 80
 n_mels: 80
-
-# Training Parameters
 sample_rate: 16000
 number_of_epochs: 30
 batch_size: 4
@@ -64,6 +64,8 @@ test_dataloader_options:
     batch_size: !ref <batch_size>
     shuffle: True
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -78,7 +80,6 @@ prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     ext: wav
     csv_file: !ref <rir_annotation>
 
-
 # Add reverberation to input signal
 add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     csv_file: !ref <rir_annotation>
@@ -87,27 +88,21 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [90, 100, 110]  # List of speed changes for time-stretching
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
     shuffle_augmentations: True
     min_augmentations: 1
     max_augmentations: 3
@@ -125,6 +120,8 @@ mean_var_norm_input: !new:speechbrain.processing.features.InputNormalization
     norm_type: sentence
     std_norm: False
 
+############################## Models ##########################################
+
 # To design a custom model, either just edit the simple CustomModel
 # class that's listed here, or replace this `!new` call with a line
 # pointing to a different file you've defined.
@@ -182,6 +179,8 @@ lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
     final_value: !ref <lr_final>
     epoch_count: !ref <number_of_epochs>
 
+############################## Logging and Pretrainer ##########################
+
 # This object is used for saving the state of training both so that it
 # can be resumed if it gets interrupted, and also so that the best checkpoint
 # can be later loaded for evaluation or inference.
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_ar_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_ar_with_wav2vec.yaml
index caf7f2d3b..643df0994 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_ar_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_ar_with_wav2vec.yaml
@@ -32,7 +32,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +59,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -97,45 +98,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -144,6 +130,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_de_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_de_with_wav2vec.yaml
index dc4ae34b2..adb8e5bb5 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_de_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_de_with_wav2vec.yaml
@@ -33,7 +33,8 @@ skip_prep: False
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 45
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -61,7 +62,7 @@ test_dataloader_options:
 token_type: char # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 # activation: !name:torch.nn.LeakyReLU
 dnn_neurons: 1024
 wav2vec_output_dim: !ref <dnn_neurons>
@@ -95,45 +96,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -142,6 +128,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_en_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_en_with_wav2vec.yaml
index f3c68ee9b..d8aaea36e 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_en_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_en_with_wav2vec.yaml
@@ -32,7 +32,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +59,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 # activation: !name:torch.nn.LeakyReLU
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
@@ -95,45 +96,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -142,6 +128,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_es_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_es_with_wav2vec.yaml
index 8e2056f83..e32a242d1 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_es_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_es_with_wav2vec.yaml
@@ -32,7 +32,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +59,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -96,45 +97,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -143,6 +129,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_fr_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_fr_with_wav2vec.yaml
index 44f1523f3..079cfe73f 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_fr_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_fr_with_wav2vec.yaml
@@ -32,7 +32,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +59,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 # activation: !name:torch.nn.LeakyReLU
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
@@ -94,45 +95,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -141,6 +127,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_it_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_it_with_wav2vec.yaml
index 4f39ad2a0..033299752 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_it_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_it_with_wav2vec.yaml
@@ -33,7 +33,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 8.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 45
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -59,7 +60,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 # activation: !name:torch.nn.LeakyReLU
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
@@ -95,45 +96,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -141,6 +127,9 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <speed_perturb>,
         !ref <drop_freq>,
         !ref <drop_chunk>]
+
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_pt_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_pt_with_wav2vec.yaml
index da8a28de5..d4b703eb4 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_pt_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_pt_with_wav2vec.yaml
@@ -31,7 +31,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -57,7 +58,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -95,45 +96,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -142,6 +128,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_rw_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_rw_with_wav2vec.yaml
index f92d8ad13..ed15a8aad 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_rw_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_rw_with_wav2vec.yaml
@@ -32,7 +32,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -59,7 +60,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 # activation: !name:torch.nn.LeakyReLU
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
@@ -95,45 +96,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -142,6 +128,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/CTC/hparams/train_zh-CN_with_wav2vec.yaml b/recipes/CommonVoice/ASR/CTC/hparams/train_zh-CN_with_wav2vec.yaml
index 513d8d324..a1709931a 100644
--- a/recipes/CommonVoice/ASR/CTC/hparams/train_zh-CN_with_wav2vec.yaml
+++ b/recipes/CommonVoice/ASR/CTC/hparams/train_zh-CN_with_wav2vec.yaml
@@ -32,7 +32,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -59,7 +60,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -97,45 +98,30 @@ test_beam_search:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -144,6 +130,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/CommonVoice/ASR/seq2seq/hparams/train_de.yaml b/recipes/CommonVoice/ASR/seq2seq/hparams/train_de.yaml
index b66374147..cb6f2b3be 100644
--- a/recipes/CommonVoice/ASR/seq2seq/hparams/train_de.yaml
+++ b/recipes/CommonVoice/ASR/seq2seq/hparams/train_de.yaml
@@ -30,7 +30,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 25
 number_of_ctc_epochs: 20
 lr: 1.0
@@ -62,7 +63,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 80
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -104,51 +105,27 @@ temperature: 1.50
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
 
  # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -165,6 +142,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/seq2seq/hparams/train_en.yaml b/recipes/CommonVoice/ASR/seq2seq/hparams/train_en.yaml
index c74d25c66..49f9a0d2b 100644
--- a/recipes/CommonVoice/ASR/seq2seq/hparams/train_en.yaml
+++ b/recipes/CommonVoice/ASR/seq2seq/hparams/train_en.yaml
@@ -29,7 +29,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 25
 number_of_ctc_epochs: 10
 lr: 1.0
@@ -60,7 +61,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 80
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -102,50 +103,27 @@ temperature: 1.50
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+ # Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -162,6 +140,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/seq2seq/hparams/train_es.yaml b/recipes/CommonVoice/ASR/seq2seq/hparams/train_es.yaml
index b56a75b69..b94373e9b 100644
--- a/recipes/CommonVoice/ASR/seq2seq/hparams/train_es.yaml
+++ b/recipes/CommonVoice/ASR/seq2seq/hparams/train_es.yaml
@@ -30,7 +30,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 25
 number_of_ctc_epochs: 20
 lr: 1.0
@@ -61,7 +62,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 80
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -103,50 +104,27 @@ temperature: 1.50
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+ # Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -163,6 +141,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/seq2seq/hparams/train_fr.yaml b/recipes/CommonVoice/ASR/seq2seq/hparams/train_fr.yaml
index 1c2a85ec8..cc9b0aa99 100644
--- a/recipes/CommonVoice/ASR/seq2seq/hparams/train_fr.yaml
+++ b/recipes/CommonVoice/ASR/seq2seq/hparams/train_fr.yaml
@@ -29,7 +29,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 25
 number_of_ctc_epochs: 20
 lr: 1.0
@@ -60,7 +61,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 80
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -102,50 +103,27 @@ temperature: 1.50
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+ # Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -162,6 +140,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/seq2seq/hparams/train_it.yaml b/recipes/CommonVoice/ASR/seq2seq/hparams/train_it.yaml
index bf9211caf..2c0355ae5 100644
--- a/recipes/CommonVoice/ASR/seq2seq/hparams/train_it.yaml
+++ b/recipes/CommonVoice/ASR/seq2seq/hparams/train_it.yaml
@@ -29,7 +29,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 8.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 50
 number_of_ctc_epochs: 40
 lr: 1.0
@@ -59,7 +60,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 80
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -101,50 +102,27 @@ temperature: 1.50
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+ # Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -161,6 +139,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/seq2seq/hparams/train_rw.yaml b/recipes/CommonVoice/ASR/seq2seq/hparams/train_rw.yaml
index dd665ab24..8bc89c1c4 100644
--- a/recipes/CommonVoice/ASR/seq2seq/hparams/train_rw.yaml
+++ b/recipes/CommonVoice/ASR/seq2seq/hparams/train_rw.yaml
@@ -29,7 +29,8 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 8.0
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 25
 number_of_ctc_epochs: 20
 lr: 1.0
@@ -59,7 +60,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 80
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -102,50 +103,27 @@ epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+ # Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -162,6 +140,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/transducer/hparams/train_de.yaml b/recipes/CommonVoice/ASR/transducer/hparams/train_de.yaml
index 8faaa805f..9bbab1669 100644
--- a/recipes/CommonVoice/ASR/transducer/hparams/train_de.yaml
+++ b/recipes/CommonVoice/ASR/transducer/hparams/train_de.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 batch_size: 6
 batch_size_valid: 1
@@ -71,7 +71,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <batch_size_valid>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -109,50 +109,28 @@ compute_features: !new:speechbrain.lobes.features.Fbank
    n_fft: !ref <n_fft>
    n_mels: !ref <n_mels>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <time_drop_length_low>
-   drop_length_high: !ref <time_drop_length_high>
-   drop_count_low: !ref <time_drop_count_low>
-   drop_count_high: !ref <time_drop_count_high>
-   replace: !ref <time_drop_replace>
-   dim: 1
+   drop_length_low: 15
+   drop_length_high: 25
+   drop_count_low: 5
+   drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <freq_drop_length_low>
-   drop_length_high: !ref <freq_drop_length_high>
-   drop_count_low: !ref <freq_drop_count_low>
-   drop_count_high: !ref <freq_drop_count_high>
-   replace: !ref <freq_drop_replace>
+   drop_length_low: 25
+   drop_length_high: 35
+   drop_count_low: 3
+   drop_count_high: 3
    dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
 
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-   warp_window: !ref <time_warp_window>
-   warp_mode: !ref <time_warp_mode>
-   dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: False
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 3
    max_augmentations: 3
    augment_prob: 1.0
@@ -161,6 +139,8 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
       !ref <freq_drop>,
       !ref <time_warp>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
    input_shape: [null, null, !ref <n_mels>]
    activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/transducer/hparams/train_fr.yaml b/recipes/CommonVoice/ASR/transducer/hparams/train_fr.yaml
index 6c3f0bc7d..c96a09394 100644
--- a/recipes/CommonVoice/ASR/transducer/hparams/train_fr.yaml
+++ b/recipes/CommonVoice/ASR/transducer/hparams/train_fr.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 batch_size: 6
 batch_size_valid: 1
@@ -71,7 +71,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <batch_size_valid>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -109,50 +109,28 @@ compute_features: !new:speechbrain.lobes.features.Fbank
    n_fft: !ref <n_fft>
    n_mels: !ref <n_mels>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <time_drop_length_low>
-   drop_length_high: !ref <time_drop_length_high>
-   drop_count_low: !ref <time_drop_count_low>
-   drop_count_high: !ref <time_drop_count_high>
-   replace: !ref <time_drop_replace>
-   dim: 1
+   drop_length_low: 15
+   drop_length_high: 25
+   drop_count_low: 5
+   drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <freq_drop_length_low>
-   drop_length_high: !ref <freq_drop_length_high>
-   drop_count_low: !ref <freq_drop_count_low>
-   drop_count_high: !ref <freq_drop_count_high>
-   replace: !ref <freq_drop_replace>
+   drop_length_low: 25
+   drop_length_high: 35
+   drop_count_low: 3
+   drop_count_high: 3
    dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
 
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-   warp_window: !ref <time_warp_window>
-   warp_mode: !ref <time_warp_mode>
-   dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: False
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 3
    max_augmentations: 3
    augment_prob: 1.0
@@ -161,6 +139,8 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
       !ref <freq_drop>,
       !ref <time_warp>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
    input_shape: [null, null, !ref <n_mels>]
    activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/transducer/hparams/train_it.yaml b/recipes/CommonVoice/ASR/transducer/hparams/train_it.yaml
index a645f9815..cf366205e 100644
--- a/recipes/CommonVoice/ASR/transducer/hparams/train_it.yaml
+++ b/recipes/CommonVoice/ASR/transducer/hparams/train_it.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 batch_size: 6
 batch_size_valid: 1
@@ -71,7 +71,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <batch_size_valid>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 3
@@ -109,50 +109,28 @@ compute_features: !new:speechbrain.lobes.features.Fbank
    n_fft: !ref <n_fft>
    n_mels: !ref <n_mels>
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <time_drop_length_low>
-   drop_length_high: !ref <time_drop_length_high>
-   drop_count_low: !ref <time_drop_count_low>
-   drop_count_high: !ref <time_drop_count_high>
-   replace: !ref <time_drop_replace>
-   dim: 1
+   drop_length_low: 15
+   drop_length_high: 25
+   drop_count_low: 5
+   drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <freq_drop_length_low>
-   drop_length_high: !ref <freq_drop_length_high>
-   drop_count_low: !ref <freq_drop_count_low>
-   drop_count_high: !ref <freq_drop_count_high>
-   replace: !ref <freq_drop_replace>
+   drop_length_low: 25
+   drop_length_high: 35
+   drop_count_low: 3
+   drop_count_high: 3
    dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
 
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-   warp_window: !ref <time_warp_window>
-   warp_mode: !ref <time_warp_mode>
-   dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: False
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 3
    max_augmentations: 3
    augment_prob: 1.0
@@ -161,6 +139,8 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
       !ref <freq_drop>,
       !ref <time_warp>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
    input_shape: [null, null, !ref <n_mels>]
    activation: !ref <activation>
diff --git a/recipes/CommonVoice/ASR/transducer/train.py b/recipes/CommonVoice/ASR/transducer/train.py
index 1782408b8..0304aabc8 100644
--- a/recipes/CommonVoice/ASR/transducer/train.py
+++ b/recipes/CommonVoice/ASR/transducer/train.py
@@ -134,26 +134,22 @@ class ASR(sb.Brain):
 
         if stage == sb.Stage.TRAIN:
             if hasattr(self.hparams, "wav_augment"):
-                tokens = self.hparams.wav_augment.replicate_labels(tokens)
-                token_lens = self.hparams.wav_augment.replicate_labels(
-                    token_lens
-                )
-                tokens_eos = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos
-                )
-                token_eos_lens = self.hparams.wav_augment.replicate_labels(
-                    token_eos_lens
+                (
+                    tokens,
+                    token_lens,
+                    tokens_eos,
+                    token_eos_lens,
+                ) = self.hparams.wav_augment.replicate_multiple_labels(
+                    tokens, token_lens, tokens_eos, token_eos_lens
                 )
             if hasattr(self.hparams, "fea_augment"):
-                tokens = self.hparams.fea_augment.replicate_labels(tokens)
-                token_lens = self.hparams.fea_augment.replicate_labels(
-                    token_lens
-                )
-                tokens_eos = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos
-                )
-                token_eos_lens = self.hparams.fea_augment.replicate_labels(
-                    token_eos_lens
+                (
+                    tokens,
+                    token_lens,
+                    tokens_eos,
+                    token_eos_lens,
+                ) = self.hparams.fea_augment.replicate_multiple_labels(
+                    tokens, token_lens, tokens_eos, token_eos_lens
                 )
 
         if stage == sb.Stage.TRAIN:
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_ar_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_ar_hf_whisper.yaml
index 2b358e6d4..d33c50c2b 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_ar_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_ar_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -82,45 +82,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -129,6 +114,7 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_de.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_de.yaml
index dbeb56be5..c5533e9bb 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_de.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_de.yaml
@@ -33,7 +33,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 32 # This works with a 32GB GPU ! (bs * nb_gpu * accum) > 128 !
 ctc_weight: 0.3
@@ -70,7 +70,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: 6
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 768
 nhead: 8
@@ -213,50 +213,27 @@ normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
     update_until_epoch: 3
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_fa_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_fa_hf_whisper.yaml
index bc0181044..bb23c98a6 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_fa_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_fa_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -82,45 +82,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -129,6 +114,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_fr.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_fr.yaml
index 120305e73..e62d9c390 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_fr.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_fr.yaml
@@ -33,7 +33,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 32 # This works with a 32GB GPU ! (bs * nb_gpu * accum) > 128 !
 ctc_weight: 0.3
@@ -70,7 +70,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: 6
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 768
 nhead: 8
@@ -213,50 +213,27 @@ normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
     update_until_epoch: 3
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_fr_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_fr_hf_whisper.yaml
index da5cbd28f..62363bdad 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_fr_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_fr_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -82,45 +82,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -129,6 +114,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_hi_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_hi_hf_whisper.yaml
index 8b130a83d..e21852639 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_hi_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_hi_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -82,45 +82,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -129,6 +114,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_it.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_it.yaml
index d937359b5..d95fbaffa 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_it.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_it.yaml
@@ -33,7 +33,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 32 # This works with a 32GB GPU ! (bs * nb_gpu * accum) > 128 !
 ctc_weight: 0.3
@@ -70,7 +70,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: 6
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 768
 nhead: 8
@@ -213,50 +213,27 @@ normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
     update_until_epoch: 3
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 5
+    drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 25
+    drop_length_high: 35
+    drop_count_low: 2
+    drop_count_high: 2
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_it_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_it_hf_whisper.yaml
index 5670f4fe8..e1fc08263 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_it_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_it_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -82,45 +82,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -129,6 +114,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_mn_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_mn_hf_whisper.yaml
index 9ffdc95fd..fe4fd6f17 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_mn_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_mn_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -82,45 +82,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -129,6 +114,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/hparams/train_sr_hf_whisper.yaml b/recipes/CommonVoice/ASR/transformer/hparams/train_sr_hf_whisper.yaml
index 4f257094d..d7390d9a5 100644
--- a/recipes/CommonVoice/ASR/transformer/hparams/train_sr_hf_whisper.yaml
+++ b/recipes/CommonVoice/ASR/transformer/hparams/train_sr_hf_whisper.yaml
@@ -37,7 +37,7 @@ avoid_if_longer_than: 10.0
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -63,7 +63,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 freeze_encoder: True
 
@@ -83,45 +83,30 @@ test_loader_kwargs:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -130,6 +115,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
diff --git a/recipes/CommonVoice/ASR/transformer/train.py b/recipes/CommonVoice/ASR/transformer/train.py
index 0aee6735e..89847d352 100644
--- a/recipes/CommonVoice/ASR/transformer/train.py
+++ b/recipes/CommonVoice/ASR/transformer/train.py
@@ -107,27 +107,25 @@ class ASR(sb.core.Brain):
 
         # Augment Labels
         if stage == sb.Stage.TRAIN:
+            # Labels must be extended if parallel augmentation or concatenated
+            # augmentation was performed on the input (increasing the time dimension)
             if hasattr(self.hparams, "wav_augment"):
-                tokens = self.hparams.wav_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.wav_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
             if hasattr(self.hparams, "fea_augment"):
-                tokens = self.hparams.fea_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.fea_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/CommonVoice/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml b/recipes/CommonVoice/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
index 5ead6dbc0..e7ceed4f5 100644
--- a/recipes/CommonVoice/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
+++ b/recipes/CommonVoice/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
@@ -27,11 +27,11 @@ skip_prep: False
 
 
 # We remove utterance slonger than 10s in the train/dev/test sets as
-# longer sentences certainly correspond to "open microphones".
+# longer sentences certainly correspond to open microphones.
 avoid_if_longer_than: 10.0
 avoid_if_shorter_than: 1.0
 
-# Training parameters
+####################### Training Parameters ####################################
 # Parameters are corresponding the the ones reported in the official wav2vec2
 # paper (for the masking).
 mask_length: 10
@@ -52,8 +52,8 @@ ckpt_interval_minutes: 30 # save checkpoint every N min
 # IMPORTANT: To train w2v2 model, we recommand to have the effective batch_size
 # higher than 100 (batch_size * nb_gpu * grad_accumulation_factor)
 # Examples are:
-# 32 Tesla V100 32GB — 12 * 32 * 1
-# 4 Tesla V100 32GB — 12 * 4 * {6-8}
+# 32 Tesla V100 32GB = 12 * 32 * 1
+# 4 Tesla V100 32GB = 12 * 4 * (6-8)
 batch_size: 12
 test_batch_size: 8
 grad_accumulation_factor: 8
@@ -104,7 +104,7 @@ modules:
     wav2vec2: !ref <wav2vec2>
 
 opt_class: !name:torch.optim.AdamW
-    lr: 0 # Will be changed by the scheduler, but we start at 0!
+    lr: 0 # Will be changed by the scheduler, but we start at 0
     betas: (0.9, 0.98)
     eps: 0.000000001
     weight_decay: !ref <weight_decay>
diff --git a/recipes/DNS/enhancement/hparams/sepformer-dns-16k.yaml b/recipes/DNS/enhancement/hparams/sepformer-dns-16k.yaml
index d43a2f0a9..87a07c97a 100644
--- a/recipes/DNS/enhancement/hparams/sepformer-dns-16k.yaml
+++ b/recipes/DNS/enhancement/hparams/sepformer-dns-16k.yaml
@@ -39,7 +39,7 @@ sample_rate: 16000
 audio_length: 4 # seconds
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 100
 batch_size: 4
 batch_size_test: 1
diff --git a/recipes/DVoice/ASR/CTC/hparams/train_amh_with_wav2vec.yaml b/recipes/DVoice/ASR/CTC/hparams/train_amh_with_wav2vec.yaml
index 551ba2c19..e9e1f4310 100644
--- a/recipes/DVoice/ASR/CTC/hparams/train_amh_with_wav2vec.yaml
+++ b/recipes/DVoice/ASR/CTC/hparams/train_amh_with_wav2vec.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 15.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +58,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -78,45 +78,31 @@ eos_index: 2
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -125,6 +111,7 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
 
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
diff --git a/recipes/DVoice/ASR/CTC/hparams/train_dar_with_wav2vec.yaml b/recipes/DVoice/ASR/CTC/hparams/train_dar_with_wav2vec.yaml
index 0b3647705..d1e2c6684 100644
--- a/recipes/DVoice/ASR/CTC/hparams/train_dar_with_wav2vec.yaml
+++ b/recipes/DVoice/ASR/CTC/hparams/train_dar_with_wav2vec.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 15.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +58,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -78,45 +78,31 @@ eos_index: 2
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -125,6 +111,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
diff --git a/recipes/DVoice/ASR/CTC/hparams/train_fon_with_wav2vec.yaml b/recipes/DVoice/ASR/CTC/hparams/train_fon_with_wav2vec.yaml
index 946ca0b6f..fca0230de 100644
--- a/recipes/DVoice/ASR/CTC/hparams/train_fon_with_wav2vec.yaml
+++ b/recipes/DVoice/ASR/CTC/hparams/train_fon_with_wav2vec.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 15.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +58,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -78,45 +78,31 @@ eos_index: 2
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -125,6 +111,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
diff --git a/recipes/DVoice/ASR/CTC/hparams/train_multi_with_wav2vec.yaml b/recipes/DVoice/ASR/CTC/hparams/train_multi_with_wav2vec.yaml
index 14aef36c9..89fedade8 100644
--- a/recipes/DVoice/ASR/CTC/hparams/train_multi_with_wav2vec.yaml
+++ b/recipes/DVoice/ASR/CTC/hparams/train_multi_with_wav2vec.yaml
@@ -31,7 +31,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 15.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -57,7 +57,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -77,45 +77,31 @@ eos_index: 2
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -124,6 +110,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
diff --git a/recipes/DVoice/ASR/CTC/hparams/train_sw_with_wav2vec.yaml b/recipes/DVoice/ASR/CTC/hparams/train_sw_with_wav2vec.yaml
index f00e330a4..0194fd877 100644
--- a/recipes/DVoice/ASR/CTC/hparams/train_sw_with_wav2vec.yaml
+++ b/recipes/DVoice/ASR/CTC/hparams/train_sw_with_wav2vec.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 15.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +58,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -78,45 +78,31 @@ eos_index: 2
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -125,6 +111,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
diff --git a/recipes/DVoice/ASR/CTC/hparams/train_wol_with_wav2vec.yaml b/recipes/DVoice/ASR/CTC/hparams/train_wol_with_wav2vec.yaml
index b1188dcb9..8470ce3a1 100644
--- a/recipes/DVoice/ASR/CTC/hparams/train_wol_with_wav2vec.yaml
+++ b/recipes/DVoice/ASR/CTC/hparams/train_wol_with_wav2vec.yaml
@@ -32,7 +32,7 @@ skip_prep: False # Skip data preparation
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 15.0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -58,7 +58,7 @@ test_dataloader_options:
 token_type: char  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -78,45 +78,31 @@ eos_index: 2
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -125,6 +111,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
     input_shape: [null, null, !ref <wav2vec_output_dim>]
     linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/ESC50/classification/hparams/cnn14_classifier.yaml b/recipes/ESC50/classification/hparams/cnn14_classifier.yaml
index e8034bfdd..bc0a83bbd 100644
--- a/recipes/ESC50/classification/hparams/cnn14_classifier.yaml
+++ b/recipes/ESC50/classification/hparams/cnn14_classifier.yaml
@@ -41,7 +41,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 200
 batch_size: 32
 lr: 0.0002
diff --git a/recipes/ESC50/classification/hparams/conv2d_classifier.yaml b/recipes/ESC50/classification/hparams/conv2d_classifier.yaml
index 2b0a49bcd..284d5681f 100644
--- a/recipes/ESC50/classification/hparams/conv2d_classifier.yaml
+++ b/recipes/ESC50/classification/hparams/conv2d_classifier.yaml
@@ -41,7 +41,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 200
 batch_size: 32
 lr: 0.00002
diff --git a/recipes/ESC50/interpret/hparams/l2i_cnn14.yaml b/recipes/ESC50/interpret/hparams/l2i_cnn14.yaml
index 6f57a843b..00acd1ff3 100644
--- a/recipes/ESC50/interpret/hparams/l2i_cnn14.yaml
+++ b/recipes/ESC50/interpret/hparams/l2i_cnn14.yaml
@@ -39,7 +39,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 200
 batch_size: 2
 lr: 0.0001
diff --git a/recipes/ESC50/interpret/hparams/l2i_conv2dclassifier.yaml b/recipes/ESC50/interpret/hparams/l2i_conv2dclassifier.yaml
index 7292f89af..4f6cb9b90 100644
--- a/recipes/ESC50/interpret/hparams/l2i_conv2dclassifier.yaml
+++ b/recipes/ESC50/interpret/hparams/l2i_conv2dclassifier.yaml
@@ -39,7 +39,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 200
 batch_size: 16
 lr: 0.0002
diff --git a/recipes/ESC50/interpret/hparams/nmf.yaml b/recipes/ESC50/interpret/hparams/nmf.yaml
index 7b6c9905d..e4da313ba 100644
--- a/recipes/ESC50/interpret/hparams/nmf.yaml
+++ b/recipes/ESC50/interpret/hparams/nmf.yaml
@@ -40,7 +40,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 100
 batch_size: 2
 lr: 0.0002
diff --git a/recipes/ESC50/interpret/hparams/piq.yaml b/recipes/ESC50/interpret/hparams/piq.yaml
index c45f50a20..68f8c06de 100644
--- a/recipes/ESC50/interpret/hparams/piq.yaml
+++ b/recipes/ESC50/interpret/hparams/piq.yaml
@@ -42,7 +42,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 200
 batch_size: 16
 lr: 0.0002
diff --git a/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/conformer.yaml b/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/conformer.yaml
index ec8653ade..49a7321f7 100644
--- a/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/conformer.yaml
+++ b/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/conformer.yaml
@@ -81,7 +81,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: !ref <num_workers>
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 256
 nhead: 4
diff --git a/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/transformer.yaml b/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/transformer.yaml
index 59c3782e1..4310e2d6b 100644
--- a/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/transformer.yaml
+++ b/recipes/Fisher-Callhome-Spanish/ST/transformer/hparams/transformer.yaml
@@ -91,7 +91,7 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: !ref <num_workers>
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 256
 nhead: 4
diff --git a/recipes/Google-speech-commands/hparams/xvect.yaml b/recipes/Google-speech-commands/hparams/xvect.yaml
index 8eb842ba9..417cecfdf 100644
--- a/recipes/Google-speech-commands/hparams/xvect.yaml
+++ b/recipes/Google-speech-commands/hparams/xvect.yaml
@@ -40,7 +40,7 @@ percentage_silence: 10 # Set this to 0 for the V2 35 task
 skip_prep: False
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 100
 batch_size: 32
 lr: 0.001
diff --git a/recipes/Google-speech-commands/hparams/xvect_leaf.yaml b/recipes/Google-speech-commands/hparams/xvect_leaf.yaml
index e06101850..f2897af22 100644
--- a/recipes/Google-speech-commands/hparams/xvect_leaf.yaml
+++ b/recipes/Google-speech-commands/hparams/xvect_leaf.yaml
@@ -42,7 +42,7 @@ percentage_silence: 10 # Set this to 0 for the V2 35 task
 skip_prep: False
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 100
 batch_size: 32
 lr: 0.001
diff --git a/recipes/IEMOCAP/emotion_recognition/hparams/train_with_wav2vec2.yaml b/recipes/IEMOCAP/emotion_recognition/hparams/train_with_wav2vec2.yaml
index ae3452882..d1b63d7bf 100644
--- a/recipes/IEMOCAP/emotion_recognition/hparams/train_with_wav2vec2.yaml
+++ b/recipes/IEMOCAP/emotion_recognition/hparams/train_with_wav2vec2.yaml
@@ -38,7 +38,7 @@ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 batch_size: 4
 lr: 0.0001
@@ -50,7 +50,7 @@ freeze_wav2vec2: False
 # We see an improvement of 2% with freezing CNNs
 freeze_wav2vec2_conv: True
 
-# Model parameters
+####################### Model Parameters #######################################
 encoder_dim: 768
 
 # Number of emotions
diff --git a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu.yaml b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu.yaml
index 05bca5c1e..3901391a5 100644
--- a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu.yaml
+++ b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu.yaml
@@ -30,7 +30,7 @@ wav2vec2_hub: LIA-AvignonUniversity/IWSLT2022-tamasheq-only
 # wav2vec 2.0 specific parameters
 wav2vec2_frozen: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 100
 lr: 0.001
 lr_wav2vec: 0.00001
diff --git a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_mbart_st.yaml b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_mbart_st.yaml
index a3a2f1c99..6887c3a40 100644
--- a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_mbart_st.yaml
+++ b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_mbart_st.yaml
@@ -36,7 +36,7 @@ wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 # wav2vec 2.0 specific parameters
 wav2vec2_frozen: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 500
 lr: 0.001
 lr_wav2vec: 0.0001
diff --git a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_nllb_st.yaml b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_nllb_st.yaml
index 11ebc937a..b86cef685 100644
--- a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_nllb_st.yaml
+++ b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_samu_nllb_st.yaml
@@ -36,7 +36,7 @@ wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 # wav2vec 2.0 specific parameters
 wav2vec2_frozen: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 500
 lr: 0.001
 lr_wav2vec: 0.0001
diff --git a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_mbart_st.yaml b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_mbart_st.yaml
index 68f74d9b4..77b7c8cd6 100644
--- a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_mbart_st.yaml
+++ b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_mbart_st.yaml
@@ -36,7 +36,7 @@ wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 # wav2vec 2.0 specific parameters
 wav2vec2_frozen: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 500
 lr: 0.001
 lr_wav2vec: 0.0001
diff --git a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_nllb_st.yaml b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_nllb_st.yaml
index b62e366f1..d384bf3a8 100644
--- a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_nllb_st.yaml
+++ b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_nllb_st.yaml
@@ -36,7 +36,7 @@ wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 # wav2vec 2.0 specific parameters
 wav2vec2_frozen: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 500
 lr: 0.001
 lr_wav2vec: 0.0001
diff --git a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_st.yaml b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_st.yaml
index 6bfb9db12..beafeba86 100644
--- a/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_st.yaml
+++ b/recipes/IWSLT22_lowresource/AST/transformer/hparams/train_w2v2_st.yaml
@@ -36,7 +36,7 @@ wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
 wav2vec2_frozen: False
 keep_n_layers: 6 # keep first N layers from the Transformer Encoder stack inside the wav2vec 2.0 model
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 100
 lr: 0.001
 lr_wav2vec: 0.00001
diff --git a/recipes/KsponSpeech/ASR/transformer/hparams/conformer_medium.yaml b/recipes/KsponSpeech/ASR/transformer/hparams/conformer_medium.yaml
index aee256973..3c0d43e2a 100644
--- a/recipes/KsponSpeech/ASR/transformer/hparams/conformer_medium.yaml
+++ b/recipes/KsponSpeech/ASR/transformer/hparams/conformer_medium.yaml
@@ -34,7 +34,7 @@ test_csv:
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -78,7 +78,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 256
 nhead: 4
diff --git a/recipes/KsponSpeech/LM/hparams/transformer.yaml b/recipes/KsponSpeech/LM/hparams/transformer.yaml
index cd9685e28..5b64cc196 100644
--- a/recipes/KsponSpeech/LM/hparams/transformer.yaml
+++ b/recipes/KsponSpeech/LM/hparams/transformer.yaml
@@ -24,7 +24,7 @@ test_csv:
 # Tokenizer model
 tokenizer_file: ddwkim/asr-conformer-transformerlm-ksponspeech/tokenizer.ckpt
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 batch_size: 256
 lr: 0.1
diff --git a/recipes/KsponSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml b/recipes/KsponSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml
index dd7cd4906..04ef0ebfd 100644
--- a/recipes/KsponSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml
+++ b/recipes/KsponSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml
@@ -16,7 +16,7 @@ skip_prep: False
 train_csv: !ref <output_folder>/train.csv
 valid_csv: !ref <output_folder>/dev.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 5000  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/LibriMix/separation/hparams/sepformer-libri2mix.yaml b/recipes/LibriMix/separation/hparams/sepformer-libri2mix.yaml
index 8fb195c03..ffa5a1ef2 100644
--- a/recipes/LibriMix/separation/hparams/sepformer-libri2mix.yaml
+++ b/recipes/LibriMix/separation/hparams/sepformer-libri2mix.yaml
@@ -37,7 +37,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/LibriMix/separation/hparams/sepformer-libri3mix.yaml b/recipes/LibriMix/separation/hparams/sepformer-libri3mix.yaml
index cf68e9a81..abc9c76c7 100644
--- a/recipes/LibriMix/separation/hparams/sepformer-libri3mix.yaml
+++ b/recipes/LibriMix/separation/hparams/sepformer-libri3mix.yaml
@@ -37,7 +37,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/LibriParty/VAD/hparams/train.yaml b/recipes/LibriParty/VAD/hparams/train.yaml
index e07258c44..be9191685 100644
--- a/recipes/LibriParty/VAD/hparams/train.yaml
+++ b/recipes/LibriParty/VAD/hparams/train.yaml
@@ -41,7 +41,7 @@ speech_csv: !ref <save_folder>/speech.csv
 multilang_speech_csv: !ref <save_folder>/multilang_speech.csv
 skip_prep: False # Skip data preparation
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 100
 lr: 1.0
 lr_final: 0.1
@@ -65,7 +65,7 @@ test_dataloader_opts:
 n_fft: 400
 n_mels: 40
 
-# Model parameters
+####################### Model Parameters #######################################
 # activation: !name:torch.nn.LeakyReLU
 # dropout: 0.15
 # cnn_blocks: 2
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_average_downsampling.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_average_downsampling.yaml
index b609bb766..fdbd7e86d 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_average_downsampling.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_average_downsampling.yaml
@@ -33,7 +33,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 1
 lr: 0.9
 lr_wav2vec: 0.0001
@@ -62,7 +63,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -73,76 +75,12 @@ ctc_neurons: 29
 output_neurons: 29  # Characters size, index(blank/eos/bos) = 0
 blank_index: 0
 
-# Decoding parameters
-test_beam_search:
-   beam_size: 200
-   topk: 1
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -10.0
-   token_prune_min_logp: -5
-   prune_history: True
-   alpha: 0.5
-   beta: 1.5
-   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
-   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
-   # If you don't want to use an LM, comment it out or set it to null
-   kenlm_model_path: null
-
 #
 # Functions and classes
 #
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
-
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
    input_shape: [null, null, 1024]
    activation: !ref <activation>
@@ -211,8 +149,60 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.9
    patient: 0
 
+############################## Decoding ########################################
+
+test_beam_search:
+   beam_size: 200
+   topk: 1
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -10.0
+   token_prune_min_logp: -5
+   prune_history: True
+   alpha: 0.5
+   beta: 1.5
+   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
+   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
+   # If you don't want to use an LM, comment it out or set it to null
+   kenlm_model_path: null
+
 label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
 
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_conv_downsampling.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_conv_downsampling.yaml
index f92f4f8fc..1b84596dc 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_conv_downsampling.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_conv_downsampling.yaml
@@ -34,7 +34,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 1
 lr: 0.9
 lr_wav2vec: 0.0001
@@ -63,7 +64,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -74,76 +76,12 @@ ctc_neurons: 29
 output_neurons: 29  # Characters size, index(blank/eos/bos) = 0
 blank_index: 0
 
-# Decoding parameters
-test_beam_search:
-   beam_size: 200
-   topk: 1
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -10.0
-   token_prune_min_logp: -5
-   prune_history: True
-   alpha: 0.5
-   beta: 1.5
-   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
-   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
-   # If you don't want to use an LM, comment it out or set it to null
-   kenlm_model_path: null
-
-
 #
 # Functions and classes
 #
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
    input_shape: [null, null, 1024]
@@ -214,8 +152,60 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.9
    patient: 0
 
+############################## Decoding ########################################
+
+test_beam_search:
+   beam_size: 200
+   topk: 1
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -10.0
+   token_prune_min_logp: -5
+   prune_history: True
+   alpha: 0.5
+   beta: 1.5
+   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
+   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
+   # If you don't want to use an LM, comment it out or set it to null
+   kenlm_model_path: null
+
 label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
 
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_signal_downsampling.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_signal_downsampling.yaml
index 6c0e7207d..d0daf5b77 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_signal_downsampling.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/downsampled/train_hf_wavlm_signal_downsampling.yaml
@@ -33,7 +33,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 1
 lr: 0.9
 lr_wav2vec: 0.0001
@@ -61,7 +62,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -72,75 +74,12 @@ ctc_neurons: 58 # Twice bigger than the  number of characters for upsampling
 output_neurons: 29  # Characters size, index(blank/eos/bos) = 0
 blank_index: 0
 
-# Decoding parameters
-test_beam_search:
-   beam_size: 200
-   topk: 1
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -10.0
-   token_prune_min_logp: -5
-   prune_history: True
-   alpha: 0.5
-   beta: 1.5
-   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
-   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
-   # If you don't want to use an LM, comment it out or set it to null
-   kenlm_model_path: null
-
 #
 # Functions and classes
 #
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
    input_shape: [null, null, 1024]
@@ -210,8 +149,58 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.9
    patient: 0
 
+############################## Decoding ########################################
+
+test_beam_search:
+   beam_size: 200
+   topk: 1
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -10.0
+   token_prune_min_logp: -5
+   prune_history: True
+   alpha: 0.5
+   beta: 1.5
+   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
+   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
+   # If you don't want to use an LM, comment it out or set it to null
+   kenlm_model_path: null
+
 label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
 
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec.yaml
index 2d91909f2..1d860a29f 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec.yaml
@@ -32,7 +32,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 1
 lr: 0.9
 lr_wav2vec: 0.0001
@@ -56,7 +57,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -66,75 +67,14 @@ freeze_wav2vec: True
 output_neurons: 29  # BPE size, index(blank/eos/bos) = 0
 blank_index: 0
 
-# Decoding parameters
-test_beam_search:
-   beam_size: 143
-   topk: 1
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -12.0
-   token_prune_min_logp: -1.2
-   prune_history: True
-   alpha: 0.8
-   beta: 1.2
-   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
-   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
-   # If you don't want to use an LM, comment it out or set it to null
-   kenlm_model_path: null
-
 #
 # Functions and classes
 #
-epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
-   limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
+label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
 
+epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
+   limit: !ref <number_of_epochs>
 
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
    input_shape: [null, null, 1024]
@@ -198,7 +138,58 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.9
    patient: 0
 
-label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Decoding ########################################
+
+# Decoding parameters
+test_beam_search:
+   beam_size: 143
+   topk: 1
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -12.0
+   token_prune_min_logp: -1.2
+   prune_history: True
+   alpha: 0.8
+   beta: 1.2
+   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
+   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
+   # If you don't want to use an LM, comment it out or set it to null
+   kenlm_model_path: null
+
+############################## Logging and Pretrainer ##########################
 
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_rnn_rescoring.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_rnn_rescoring.yaml
index 01a31cdd8..c946b0243 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_rnn_rescoring.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_rnn_rescoring.yaml
@@ -32,7 +32,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 1
 lr: 0.9
 lr_wav2vec: 0.0001
@@ -56,7 +57,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -68,49 +70,6 @@ output_neurons: 29  # BPE size, index(blank/eos/bos) = 0
 
 pretrained_lm_tokenizer_path: speechbrain/asr-crdnn-rnnlm-librispeech
 
-# This is the RNNLM that is used according to the Huggingface repository
-# NB: It has to match the pre-trained RNNLM!!
-lm_model: !new:speechbrain.lobes.models.RNNLM.RNNLM
-   output_neurons: 1000
-   embedding_dim: 128
-   activation: !name:torch.nn.LeakyReLU
-   dropout: 0.0
-   rnn_layers: 2
-   rnn_neurons: 2048
-   dnn_blocks: 1
-   dnn_neurons: 512
-   return_hidden: True  # For inference
-
-tokenizer: !new:sentencepiece.SentencePieceProcessor
-
-# Decoding parameters
-lm_weight: 0.5
-blank_index: 0
-# topk is the number of hypotheses that will be rescored in the rescorer
-# lowering this value might decrease the wer, but will increase speed.
-
-test_beam_search:
-   beam_size: 20
-   topk: 20
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -12.0
-   token_prune_min_logp: -12.0
-   prune_history: False
-   alpha: 0.8
-   beta: 1.2
-
-rnnlm: !new:speechbrain.decoders.scorer.RNNLMRescorer
-   language_model: !ref <lm_model>
-   tokenizer: !ref <tokenizer>
-   bos_index: 0
-   eos_index: 0
-   pad_index: 0
-
-rescorer: !new:speechbrain.decoders.scorer.RescorerBuilder
-   rescorers: [!ref <rnnlm>]
-   weights:
-      rnnlm: !ref <lm_weight>
 
 #
 # Functions and classes
@@ -118,53 +77,6 @@ rescorer: !new:speechbrain.decoders.scorer.RescorerBuilder
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
    input_shape: [null, null, 1024]
@@ -230,6 +142,84 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
 
 label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
 
+# This is the RNNLM that is used according to the Huggingface repository
+# NB: It has to match the pre-trained RNNLM!!
+lm_model: !new:speechbrain.lobes.models.RNNLM.RNNLM
+   output_neurons: 1000
+   embedding_dim: 128
+   activation: !name:torch.nn.LeakyReLU
+   dropout: 0.0
+   rnn_layers: 2
+   rnn_neurons: 2048
+   dnn_blocks: 1
+   dnn_neurons: 512
+   return_hidden: True  # For inference
+
+
+tokenizer: !new:sentencepiece.SentencePieceProcessor
+
+############################## Decoding ########################################
+
+# topk is the number of hypotheses that will be rescored in the rescorer
+# lowering this value might decrease the wer, but will increase speed.
+test_beam_search:
+   beam_size: 20
+   topk: 20
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -12.0
+   token_prune_min_logp: -12.0
+   prune_history: False
+   alpha: 0.8
+   beta: 1.2
+
+rnnlm: !new:speechbrain.decoders.scorer.RNNLMRescorer
+   language_model: !ref <lm_model>
+   tokenizer: !ref <tokenizer>
+   bos_index: 0
+   eos_index: 0
+   pad_index: 0
+
+rescorer: !new:speechbrain.decoders.scorer.RescorerBuilder
+   rescorers: [!ref <rnnlm>]
+   weights:
+      rnnlm: !ref <lm_weight>
+
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_transformer_rescoring.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_transformer_rescoring.yaml
index 724c3bf1e..d806b20cf 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_transformer_rescoring.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_transformer_rescoring.yaml
@@ -32,7 +32,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 1
 lr: 0.9
 lr_wav2vec: 0.0001
@@ -56,7 +57,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -88,30 +89,6 @@ tokenizer: !new:sentencepiece.SentencePieceProcessor
 # Decoding parameters
 lm_weight: 0.5
 blank_index: 0
-# topk is the number of hypotheses that will be rescored in the rescorer
-# lowering this value might decrease the wer, but will increase speed.
-test_beam_search:
-   beam_size: 20
-   topk: 20
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -12.0
-   token_prune_min_logp: -12.0
-   prune_history: False
-   alpha: 0.8
-   beta: 1.2
-
-transformerlm: !new:speechbrain.decoders.scorer.TransformerLMRescorer
-   language_model: !ref <lm_model>
-   tokenizer: !ref <tokenizer>
-   pad_index: 0
-   bos_index: 1
-   eos_index: 2
-
-rescorer: !new:speechbrain.decoders.scorer.RescorerBuilder
-   rescorers: [!ref <transformerlm>]
-   weights:
-      transformerlm: !ref <lm_weight>
 
 #
 # Functions and classes
@@ -119,53 +96,6 @@ rescorer: !new:speechbrain.decoders.scorer.RescorerBuilder
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 
 enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
    input_shape: [null, null, 1024]
@@ -229,8 +159,68 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.9
    patient: 0
 
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
 label_encoder: !new:speechbrain.dataio.encoder.CTCTextEncoder
 
+############################## Decoding ########################################
+
+# topk is the number of hypotheses that will be rescored in the rescorer
+# lowering this value might decrease the wer, but will increase speed.
+test_beam_search:
+   beam_size: 20
+   topk: 20
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -12.0
+   token_prune_min_logp: -12.0
+   prune_history: False
+   alpha: 0.8
+   beta: 1.2
+
+transformerlm: !new:speechbrain.decoders.scorer.TransformerLMRescorer
+   language_model: !ref <lm_model>
+   tokenizer: !ref <tokenizer>
+   pad_index: 0
+   bos_index: 1
+   eos_index: 2
+
+rescorer: !new:speechbrain.decoders.scorer.RescorerBuilder
+   rescorers: [!ref <transformerlm>]
+   weights:
+      transformerlm: !ref <lm_weight>
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_whisper_encoder.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_whisper_encoder.yaml
index 735b29db9..ba20bf2ac 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_whisper_encoder.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/train_hf_whisper_encoder.yaml
@@ -31,7 +31,8 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 15
 warmup_steps: 1000 # We freeze whisper for 1000 steps to let the CTC adapt
 lr: 0.0008
@@ -61,7 +62,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
 dnn_neurons: 1024
 freeze_whisper: False
 whisper_output_dim: 512
@@ -71,74 +72,12 @@ whisper_output_dim: 512
 output_neurons: 29  # BPE size, index(blank/eos/bos) = 0
 blank_index: 0
 
-# Decoding parameters
-test_beam_search:
-   beam_size: 143
-   topk: 1
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -12.0
-   token_prune_min_logp: -1.2
-   prune_history: True
-   alpha: 0.8
-   beta: 1.2
-   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
-   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
-   # If you don't want to use an LM, comment it out or set it to null
-   kenlm_model_path: null
 #
 # Functions and classes
 #
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 
 enc: !new:speechbrain.nnet.containers.Sequential
    input_shape: [null, null, !ref <whisper_output_dim>]
@@ -204,6 +143,57 @@ lr_annealing_whisper: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.75
    patient: 0
 
+############################## Decoding ########################################
+
+test_beam_search:
+   beam_size: 143
+   topk: 1
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -12.0
+   token_prune_min_logp: -1.2
+   prune_history: True
+   alpha: 0.8
+   beta: 1.2
+   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
+   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
+   # If you don't want to use an LM, comment it out or set it to null
+   kenlm_model_path: null
+
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
 
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
diff --git a/recipes/LibriSpeech/ASR/CTC/hparams/train_sb_wav2vec.yaml b/recipes/LibriSpeech/ASR/CTC/hparams/train_sb_wav2vec.yaml
index cf9cf7ec8..1b281b35c 100644
--- a/recipes/LibriSpeech/ASR/CTC/hparams/train_sb_wav2vec.yaml
+++ b/recipes/LibriSpeech/ASR/CTC/hparams/train_sb_wav2vec.yaml
@@ -33,7 +33,7 @@ test_csv:
    - !ref <output_folder>/test-clean.csv
    - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 0.0003
 lr_wav2vec: 0.00005
@@ -58,7 +58,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: !ref <test_batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
 dnn_activation: !new:torch.nn.LeakyReLU
 dnn_neurons: 1280
 dnn_dropout: 0.15
@@ -68,75 +68,12 @@ freeze_wav2vec: False
 output_neurons: 29  # BPE size, index(blank/eos/bos) = 0
 blank_index: 0
 
-# Decoding parameters
-test_beam_search:
-   beam_size: 200
-   topk: 1
-   blank_index: !ref <blank_index>
-   space_token: ' ' # make sure this is the same as the one used in the tokenizer
-   beam_prune_logp: -10.0
-   token_prune_min_logp: -5.0
-   prune_history: True
-   alpha: 0.8
-   beta: 1.2
-   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
-   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
-   # If you don't want to use an LM, comment it out or set it to null
-   kenlm_model_path: null
-
 #
 # Functions and classes
 #
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 
 enc: !new:speechbrain.nnet.containers.Sequential
    input_shape: [null, null, !ref <wav2vec_output_dim>]
@@ -227,6 +164,58 @@ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.7
    patient: 0
 
+############################## Decoding ########################################
+
+test_beam_search:
+   beam_size: 200
+   topk: 1
+   blank_index: !ref <blank_index>
+   space_token: ' ' # make sure this is the same as the one used in the tokenizer
+   beam_prune_logp: -10.0
+   token_prune_min_logp: -5.0
+   prune_history: True
+   alpha: 0.8
+   beta: 1.2
+   # can be downloaded from here https://www.openslr.org/11/ or trained with kenLM
+   # It can either be a .bin or .arpa ; note: .arpa is much slower at loading
+   # If you don't want to use an LM, comment it out or set it to null
+   kenlm_model_path: null
+
+############################## Augmentations ###################################
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/CTC/train_with_wav2vec.py b/recipes/LibriSpeech/ASR/CTC/train_with_wav2vec.py
index f2b6373b2..1f4ccdd2c 100644
--- a/recipes/LibriSpeech/ASR/CTC/train_with_wav2vec.py
+++ b/recipes/LibriSpeech/ASR/CTC/train_with_wav2vec.py
@@ -101,10 +101,15 @@ class ASR(sb.Brain):
         ids = batch.id
         tokens, tokens_lens = batch.tokens
 
-        # Label Augmentation
+        # Labels must be extended if parallel augmentation or concatenated
+        # augmentation was performed on the input (increasing the time dimension)
         if stage == sb.Stage.TRAIN and hasattr(self.hparams, "wav_augment"):
-            tokens = self.hparams.wav_augment.replicate_labels(tokens)
-            tokens_lens = self.hparams.wav_augment.replicate_labels(tokens_lens)
+            (
+                tokens,
+                tokens_lens,
+            ) = self.hparams.wav_augment.replicate_multiple_labels(
+                tokens, tokens_lens
+            )
 
         loss_ctc = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
         loss = loss_ctc
diff --git a/recipes/LibriSpeech/ASR/CTC/train_with_whisper.py b/recipes/LibriSpeech/ASR/CTC/train_with_whisper.py
index e3db36334..d575265e8 100644
--- a/recipes/LibriSpeech/ASR/CTC/train_with_whisper.py
+++ b/recipes/LibriSpeech/ASR/CTC/train_with_whisper.py
@@ -72,10 +72,15 @@ class ASR(sb.Brain):
         ids = batch.id
         tokens, tokens_lens = batch.tokens
 
-        # Label Augmentation
+        # Labels must be extended if parallel augmentation or concatenated
+        # augmentation was performed on the input (increasing the time dimension)
         if stage == sb.Stage.TRAIN and hasattr(self.hparams, "wav_augment"):
-            tokens = self.hparams.wav_augment.replicate_labels(tokens)
-            tokens_lens = self.hparams.wav_augment.replicate_labels(tokens_lens)
+            (
+                tokens,
+                tokens_lens,
+            ) = self.hparams.wav_augment.replicate_multiple_labels(
+                tokens, tokens_lens
+            )
 
         loss_ctc = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
         loss = loss_ctc
diff --git a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000.yaml b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000.yaml
index e38b545fb..3d0aaa200 100644
--- a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000.yaml
+++ b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000.yaml
@@ -44,7 +44,8 @@ data_folder_noise: !ref <data_folder>/noise # The noisy sequencies for data augm
 NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 15
 number_of_ctc_epochs: 5
 batch_size: 8
@@ -89,7 +90,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: 1
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -128,74 +130,6 @@ coverage_penalty: 1.5
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Download and prepare the dataset of noisy sequences for augmentation
-prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
-   URL: !ref <NOISE_DATASET_URL>
-   dest_folder: !ref <data_folder_noise>
-   ext: wav
-   csv_file: !ref <noise_annotation>
-
-
-# Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
-add_noise: !new:speechbrain.augment.time_domain.AddNoise
-   csv_file: !ref <noise_annotation>
-   snr_low: !ref <snr_low>
-   snr_high: !ref <snr_high>
-   noise_sample_rate: !ref <sample_rate>
-   clean_sample_rate: !ref <sample_rate>
-   num_workers: !ref <num_workers>
-
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <add_noise>,
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
 normalize: !new:speechbrain.processing.features.InputNormalization
    norm_type: global
 
@@ -288,7 +222,8 @@ modules:
 model: !new:torch.nn.ModuleList
    - [!ref <enc>, !ref <emb>, !ref <dec>, !ref <ctc_lin>, !ref <seq_lin>]
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 coverage_scorer: !new:speechbrain.decoders.scorer.CoverageScorer
    vocab_size: !ref <output_neurons>
 
@@ -339,6 +274,57 @@ lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.8
    patient: 0
 
+############################## Augmentations ###################################
+
+prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
+   URL: !ref <NOISE_DATASET_URL>
+   dest_folder: !ref <data_folder_noise>
+   ext: wav
+   csv_file: !ref <noise_annotation>
+
+# Add noise to input signal
+add_noise: !new:speechbrain.augment.time_domain.AddNoise
+   csv_file: !ref <noise_annotation>
+   snr_low: 0
+   snr_high: 15
+   noise_sample_rate: !ref <sample_rate>
+   clean_sample_rate: !ref <sample_rate>
+   num_workers: !ref <num_workers>
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <add_noise>,
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000_sligru.yaml b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000_sligru.yaml
index 164f1ffe7..355c49d36 100644
--- a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000_sligru.yaml
+++ b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_1000_sligru.yaml
@@ -44,7 +44,8 @@ data_folder_noise: !ref <data_folder>/noise # The noisy sequencies for data augm
 NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 15
 number_of_ctc_epochs: 15
 batch_size: 24
@@ -89,7 +90,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: 1
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -126,75 +128,6 @@ temperature_lm: 1.25
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Download and prepare the dataset of noisy sequences for augmentation
-prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
-   URL: !ref <NOISE_DATASET_URL>
-   dest_folder: !ref <data_folder_noise>
-   ext: wav
-   csv_file: !ref <noise_annotation>
-
-
-# Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
-add_noise: !new:speechbrain.augment.time_domain.AddNoise
-   csv_file: !ref <noise_annotation>
-   snr_low: !ref <snr_low>
-   snr_high: !ref <snr_high>
-   noise_sample_rate: !ref <sample_rate>
-   clean_sample_rate: !ref <sample_rate>
-   num_workers: !ref <num_workers>
-
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <add_noise>,
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
-
 normalize: !new:speechbrain.processing.features.InputNormalization
    norm_type: global
 
@@ -286,7 +219,8 @@ modules:
 model: !new:torch.nn.ModuleList
    - [!ref <enc>, !ref <emb>, !ref <dec>, !ref <ctc_lin>, !ref <seq_lin>]
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 coverage_scorer: !new:speechbrain.decoders.scorer.CoverageScorer
    vocab_size: !ref <output_neurons>
 
@@ -337,6 +271,57 @@ lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.8
    patient: 0
 
+############################## Augmentations ###################################
+
+prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
+   URL: !ref <NOISE_DATASET_URL>
+   dest_folder: !ref <data_folder_noise>
+   ext: wav
+   csv_file: !ref <noise_annotation>
+
+# Add noise to input signal
+add_noise: !new:speechbrain.augment.time_domain.AddNoise
+   csv_file: !ref <noise_annotation>
+   snr_low: 0
+   snr_high: 15
+   noise_sample_rate: !ref <sample_rate>
+   clean_sample_rate: !ref <sample_rate>
+   num_workers: !ref <num_workers>
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <add_noise>,
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_5000.yaml b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_5000.yaml
index cc0647562..3046dfea8 100644
--- a/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_5000.yaml
+++ b/recipes/LibriSpeech/ASR/seq2seq/hparams/train_BPE_5000.yaml
@@ -45,7 +45,8 @@ data_folder_noise: !ref <data_folder>/noise # The noisy sequencies for data augm
 NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
-# Training parameters
+####################### Training Parameters ####################################
+
 number_of_epochs: 25
 number_of_ctc_epochs: 25
 batch_size: 8
@@ -90,7 +91,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
    batch_size: 1
 
-# Model parameters
+####################### Model Parameters #######################################
+
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -132,75 +134,6 @@ coverage_penalty: 1.5
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
    limit: !ref <number_of_epochs>
 
-# Download and prepare the dataset of noisy sequences for augmentation
-prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
-   URL: !ref <NOISE_DATASET_URL>
-   dest_folder: !ref <data_folder_noise>
-   ext: wav
-   csv_file: !ref <noise_annotation>
-
-
-# Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
-add_noise: !new:speechbrain.augment.time_domain.AddNoise
-   csv_file: !ref <noise_annotation>
-   snr_low: !ref <snr_low>
-   snr_high: !ref <snr_high>
-   noise_sample_rate: !ref <sample_rate>
-   clean_sample_rate: !ref <sample_rate>
-   num_workers: !ref <num_workers>
-
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-   orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
-
-# Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
-drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
-
-# Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
-drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
-
-# Augmenter: Combines previously defined augmentations to perform data augmentation
-wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
-   min_augmentations: 4
-   max_augmentations: 4
-   augment_prob: 1.0
-   augmentations: [
-      !ref <add_noise>,
-      !ref <speed_perturb>,
-      !ref <drop_freq>,
-      !ref <drop_chunk>]
-
-
 normalize: !new:speechbrain.processing.features.InputNormalization
    norm_type: global
 
@@ -294,7 +227,8 @@ modules:
 model: !new:torch.nn.ModuleList
    - [!ref <enc>, !ref <emb>, !ref <dec>, !ref <ctc_lin>, !ref <seq_lin>]
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
    eos_index: !ref <eos_index>
    blank_index: !ref <blank_index>
@@ -360,6 +294,57 @@ lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
    annealing_factor: 0.8
    patient: 0
 
+############################## Augmentations ###################################
+
+prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
+   URL: !ref <NOISE_DATASET_URL>
+   dest_folder: !ref <data_folder_noise>
+   ext: wav
+   csv_file: !ref <noise_annotation>
+
+# Add noise to input signal
+add_noise: !new:speechbrain.augment.time_domain.AddNoise
+   csv_file: !ref <noise_annotation>
+   snr_low: 0
+   snr_high: 15
+   noise_sample_rate: !ref <sample_rate>
+   clean_sample_rate: !ref <sample_rate>
+   num_workers: !ref <num_workers>
+
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+   orig_freq: !ref <sample_rate>
+   speeds: [95, 100, 105]
+
+# Frequency drop: randomly drops a number of frequency bands to zero.
+drop_freq: !new:speechbrain.augment.time_domain.DropFreq
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
+
+# Time drop: randomly drops a number of temporal chunks.
+drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
+
+# Augmenter: Combines previously defined augmentations to perform data augmentation
+wav_augment: !new:speechbrain.augment.augmenter.Augmenter
+   concat_original: True
+   min_augmentations: 4
+   max_augmentations: 4
+   augment_prob: 1.0
+   augmentations: [
+      !ref <add_noise>,
+      !ref <speed_perturb>,
+      !ref <drop_freq>,
+      !ref <drop_chunk>]
+
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/seq2seq/train.py b/recipes/LibriSpeech/ASR/seq2seq/train.py
index b3adaa67a..7f5351008 100644
--- a/recipes/LibriSpeech/ASR/seq2seq/train.py
+++ b/recipes/LibriSpeech/ASR/seq2seq/train.py
@@ -97,12 +97,16 @@ class ASR(sb.Brain):
         tokens_eos, tokens_eos_lens = batch.tokens_eos
         tokens, tokens_lens = batch.tokens
 
+        # Labels must be extended if parallel augmentation or concatenated
+        # augmentation was performed on the input (increasing the time dimension)
         if stage == sb.Stage.TRAIN and hasattr(self.hparams, "wav_augment"):
-            tokens = self.hparams.wav_augment.replicate_labels(tokens)
-            tokens_lens = self.hparams.wav_augment.replicate_labels(tokens_lens)
-            tokens_eos = self.hparams.wav_augment.replicate_labels(tokens_eos)
-            tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                tokens_eos_lens
+            (
+                tokens,
+                tokens_lens,
+                tokens_eos,
+                tokens_eos_lens,
+            ) = self.hparams.wav_augment.replicate_multiple_labels(
+                tokens, tokens_lens, tokens_eos, tokens_eos_lens
             )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/LibriSpeech/ASR/transducer/hparams/conformer_transducer.yaml b/recipes/LibriSpeech/ASR/transducer/hparams/conformer_transducer.yaml
index c7ad99c63..e9757e208 100644
--- a/recipes/LibriSpeech/ASR/transducer/hparams/conformer_transducer.yaml
+++ b/recipes/LibriSpeech/ASR/transducer/hparams/conformer_transducer.yaml
@@ -40,7 +40,8 @@ test_csv:
 skip_prep: False
 ckpt_interval_minutes: 5 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -120,7 +121,8 @@ dynamic_batch_sampler:
    batch_ordering: random
    max_batch_ex: 256
 
-# Model parameters
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 512
 joint_dim: 640
@@ -164,18 +166,15 @@ compute_features: !new:speechbrain.lobes.features.Fbank
    n_mels: !ref <n_mels>
    win_length: !ref <win_length>
 
+############################## Augmentations ###################################
+
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
    orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
+   speeds: [95, 100, 105]
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
-   concat_original: False
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 1
    max_augmentations: 1
    augment_prob: 1.0
@@ -183,43 +182,24 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
 
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <time_drop_length_low>
-   drop_length_high: !ref <time_drop_length_high>
-   drop_count_low: !ref <time_drop_count_low>
-   drop_count_high: !ref <time_drop_count_high>
-   replace: !ref <time_drop_replace>
-   dim: 1
+   drop_length_low: 15
+   drop_length_high: 25
+   drop_count_low: 5
+   drop_count_high: 5
+   replace: "zeros"
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-   drop_length_low: !ref <freq_drop_length_low>
-   drop_length_high: !ref <freq_drop_length_high>
-   drop_count_low: !ref <freq_drop_count_low>
-   drop_count_high: !ref <freq_drop_count_high>
-   replace: !ref <freq_drop_replace>
+   drop_length_low: 25
+   drop_length_high: 35
+   drop_count_low: 2
+   drop_count_high: 2
+   replace: "zeros"
    dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-   warp_window: !ref <time_warp_window>
-   warp_mode: !ref <time_warp_mode>
-   dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
    parallel_augment: False
@@ -234,6 +214,8 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
       !ref <freq_drop>,
       !ref <time_warp>]
 
+############################## Models ##########################################
+
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
    input_shape: (8, 10, 80)
    num_blocks: 2
@@ -355,6 +337,8 @@ modules:
 model: !new:torch.nn.ModuleList
    - [!ref <CNN>, !ref <enc>, !ref <emb>, !ref <dec>, !ref <proj_enc>, !ref <proj_dec>, !ref <proj_ctc>, !ref <transducer_lin>]
 
+############################## Decoding & optimiser ############################
+
 # Tokenizer initialization
 tokenizer: !new:sentencepiece.SentencePieceProcessor
 
@@ -388,6 +372,8 @@ noam_annealing: !new:speechbrain.nnet.schedulers.NoamScheduler
    lr_initial: !ref <lr>
    n_warmup_steps: !ref <warmup_steps>
 
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
    checkpoints_dir: !ref <save_folder>
    recoverables:
diff --git a/recipes/LibriSpeech/ASR/transducer/train.py b/recipes/LibriSpeech/ASR/transducer/train.py
index 497912c83..84d7e05ff 100644
--- a/recipes/LibriSpeech/ASR/transducer/train.py
+++ b/recipes/LibriSpeech/ASR/transducer/train.py
@@ -155,27 +155,16 @@ class ASR(sb.Brain):
             logits_transducer, wav_lens, predicted_tokens = predictions
 
         if stage == sb.Stage.TRAIN:
-            if hasattr(self.hparams, "wav_augment"):
-                tokens = self.hparams.wav_augment.replicate_labels(tokens)
-                token_lens = self.hparams.wav_augment.replicate_labels(
-                    token_lens
-                )
-                tokens_eos = self.hparams.wav_augment.replicate_labels(
-                    tokens_eos
-                )
-                token_eos_lens = self.hparams.wav_augment.replicate_labels(
-                    token_eos_lens
-                )
+            # Labels must be extended if parallel augmentation or concatenated
+            # augmentation was performed on the input (increasing the time dimension)
             if hasattr(self.hparams, "fea_augment"):
-                tokens = self.hparams.fea_augment.replicate_labels(tokens)
-                token_lens = self.hparams.fea_augment.replicate_labels(
-                    token_lens
-                )
-                tokens_eos = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos
-                )
-                token_eos_lens = self.hparams.fea_augment.replicate_labels(
-                    token_eos_lens
+                (
+                    tokens,
+                    token_lens,
+                    tokens_eos,
+                    token_eos_lens,
+                ) = self.hparams.fea_augment.replicate_multiple_labels(
+                    tokens, token_lens, tokens_eos, token_eos_lens
                 )
 
         if stage == sb.Stage.TRAIN:
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/bayesspeech.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/bayesspeech.yaml
index 7772517e9..2eee3646e 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/bayesspeech.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/bayesspeech.yaml
@@ -42,7 +42,8 @@ test_csv:
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -116,7 +117,7 @@ test_dataloader_opts:
         padding_kwargs:
             value: !ref <pad_index>
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
 # Transformer
 d_model: 512
 nhead: 4
@@ -148,7 +149,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -216,7 +217,8 @@ Adam: !name:torch.optim.Adam
     eps: 0.000000001
 
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -290,57 +292,34 @@ normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
     update_until_epoch: 4
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
 # Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -354,6 +333,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/branchformer_large.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/branchformer_large.yaml
index c3f66ec9f..02fc2eac4 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/branchformer_large.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/branchformer_large.yaml
@@ -41,9 +41,11 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
-# The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
+# The global batch size is computed as batch_size * n_gpus *
+# grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
 # Please, set your parameters accordingly.
 number_of_epochs: 120
@@ -103,7 +105,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 512
 nhead: 8
@@ -131,7 +134,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -204,7 +207,8 @@ Adam: !name:torch.optim.AdamW
     eps: 0.000000001
     weight_decay: !ref <weight_decay>
 
-# Scorer
+####################### Decoding & optimiser ###################################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -275,57 +279,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+####################### Augmentations ###########################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -340,6 +321,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     win_length: !ref <win_length>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/conformer_large.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/conformer_large.yaml
index 5d252a4b5..7cdd4c06f 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/conformer_large.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/conformer_large.yaml
@@ -41,7 +41,8 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -102,7 +103,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 512
 nhead: 8
@@ -129,7 +131,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -200,7 +202,8 @@ Adam: !name:torch.optim.AdamW
 model: !new:torch.nn.ModuleList
     - [!ref <CNN>, !ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
 
-# Scorer
+####################### Decoding & optimiser ###########################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -271,57 +274,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -336,6 +316,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_mels: !ref <n_mels>
     win_length: !ref <win_length>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/conformer_small.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/conformer_small.yaml
index eddc96780..a24e6649a 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/conformer_small.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/conformer_small.yaml
@@ -40,7 +40,8 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -102,7 +103,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 144
 nhead: 4
@@ -129,7 +131,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -200,12 +202,8 @@ Adam: !name:torch.optim.Adam
     betas: (0.9, 0.98)
     eps: 0.000000001
 
-#SGD: !name:torch.optim.SGD
-#    lr: !ref <lr_sgd>
-#    momentum: 0.99
-#    nesterov: True
+############################## Decoding & optimiser ############################
 
-# Scorer
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -277,57 +275,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -341,6 +316,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_13M.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_13M.yaml
index 7b2912ec3..4b6ca718f 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_13M.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_13M.yaml
@@ -40,7 +40,8 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+############################## Training Parameters #############################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -99,7 +100,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 144
 nhead: 8
@@ -133,7 +135,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -206,7 +208,8 @@ Adam: !name:torch.optim.Adam
     betas: (0.9, 0.98)
     eps: 0.000000001
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -276,57 +279,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -340,6 +320,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_25M.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_25M.yaml
index 71d97cdb9..2e0242e31 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_25M.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/hyperbranchformer_25M.yaml
@@ -40,7 +40,8 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+############################## Training Parameters #############################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -100,7 +101,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
 # Transformer
 d_model: 256
 nhead: 8
@@ -134,7 +135,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -207,7 +208,8 @@ Adam: !name:torch.optim.Adam
     betas: (0.9, 0.98)
     eps: 0.000000001
 
-# Scorer
+############################## Decoding & optimiser ############################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -277,57 +279,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -335,11 +314,14 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <time_drop>,
         !ref <freq_drop>,
         !ref <time_warp>]
+
 compute_features: !new:speechbrain.lobes.features.Fbank
     sample_rate: !ref <sample_rate>
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_22M.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_22M.yaml
index fdf65fde3..6e165ed5c 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_22M.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_22M.yaml
@@ -40,7 +40,8 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -87,6 +88,7 @@ dynamic_batch_sampler_valid:
     batch_ordering: !ref <batch_ordering>
     max_batch_ex: !ref <max_batch_ex>
 
+
 # Dataloader options
 train_dataloader_opts:
     batch_size: !ref <batch_size>
@@ -99,7 +101,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 256
 nhead: 8
@@ -132,7 +135,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -203,7 +206,8 @@ Adam: !name:torch.optim.Adam
     betas: (0.9, 0.98)
     eps: 0.000000001
 
-# Scorer
+####################### Decoding & optimiser ###################################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -273,57 +277,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -337,6 +318,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_8M.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_8M.yaml
index 47053b5ff..fe3bd599c 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_8M.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/hyperconformer_8M.yaml
@@ -40,7 +40,8 @@ test_csv:
     - !ref <output_folder>/test-clean.csv
     - !ref <output_folder>/test-other.csv
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -100,7 +101,8 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 144
 nhead: 8
@@ -133,7 +135,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -204,7 +206,8 @@ Adam: !name:torch.optim.Adam
     betas: (0.9, 0.98)
     eps: 0.000000001
 
-# Scorer
+####################### Decoding & optimiser ###########################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -274,57 +277,34 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -338,6 +318,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/train_hf_whisper.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/train_hf_whisper.yaml
index 4805d7c6c..4891ca617 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/train_hf_whisper.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/train_hf_whisper.yaml
@@ -35,7 +35,8 @@ test_csv:
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+############################## Training Parameters #############################
+
 number_of_epochs: 1
 lr_whisper: 0.00003
 sorting: ascending
@@ -61,7 +62,7 @@ min_decode_ratio: 0.0
 max_decode_ratio: 1.0
 test_beam_size: 8
 
-# Model parameters
+####################### Model Parameters #######################################
 freeze_whisper: False
 
 
@@ -74,52 +75,34 @@ valid_loader_kwargs:
 test_loader_kwargs:
     batch_size: !ref <test_batch_size>
 
-
-#
-# Functions and classes
-#
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0  # Min frequency band dropout probability
+    drop_freq_high: 1  # Max frequency band dropout probability
+    drop_freq_count_low: 1  # Min number of frequency bands to drop
+    drop_freq_count_high: 3  # Max number of frequency bands to drop
+    drop_freq_width: 0.05  # Width of frequency bands to drop
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1
+    drop_length_high: 5
+    drop_count_low: 1000
+    drop_count_high: 2000
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -128,6 +111,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
     source: !ref <whisper_hub>
     freeze: !ref <freeze_whisper>
@@ -142,6 +127,8 @@ nll_loss: !name:speechbrain.nnet.losses.nll_loss
 modules:
     whisper: !ref <whisper>
 
+############################## Decoding & optimiser ############################
+
 whisper_opt_class: !name:torch.optim.AdamW
     lr: !ref <lr_whisper>
     weight_decay: 0.01
@@ -167,6 +154,8 @@ lr_annealing_whisper: !new:speechbrain.nnet.schedulers.NewBobScheduler
     annealing_factor: 0.9
     patient: 0
 
+############################## Logging and Pretrainer ##########################
+
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
     checkpoints_dir: !ref <save_folder>
     recoverables:
diff --git a/recipes/LibriSpeech/ASR/transformer/hparams/transformer.yaml b/recipes/LibriSpeech/ASR/transformer/hparams/transformer.yaml
index 36626f0d9..173453e9d 100644
--- a/recipes/LibriSpeech/ASR/transformer/hparams/transformer.yaml
+++ b/recipes/LibriSpeech/ASR/transformer/hparams/transformer.yaml
@@ -42,7 +42,8 @@ test_csv:
 
 ckpt_interval_minutes: 30 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
+
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -116,7 +117,8 @@ test_dataloader_opts:
         padding_kwargs:
             value: !ref <pad_index>
 
-####################### Model parameters ###########################
+####################### Model Parameters #######################################
+
 # Transformer
 d_model: 512
 nhead: 4
@@ -142,7 +144,7 @@ test_beam_size: 66
 lm_weight: 0.60
 ctc_weight_decode: 0.40
 
-############################## models ################################
+############################## Models ##########################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
     input_shape: (8, 10, 80)
@@ -210,7 +212,8 @@ Adam: !name:torch.optim.Adam
     eps: 0.000000001
 
 
-# Scorer
+####################### Decoding & optimiser ###################################
+
 ctc_scorer: !new:speechbrain.decoders.scorer.CTCScorer
     eos_index: !ref <eos_index>
     blank_index: !ref <blank_index>
@@ -284,57 +287,34 @@ normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
     update_until_epoch: 4
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 4  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 4  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "mean"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <time_drop_length_low>
-    drop_length_high: !ref <time_drop_length_high>
-    drop_count_low: !ref <time_drop_count_low>
-    drop_count_high: !ref <time_drop_count_high>
-    replace: !ref <time_drop_replace>
-    dim: 1
-
-# Frequency Drop
-freq_drop_length_low: 10  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 20  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 4  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 4  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "mean"  # Method of dropping chunks
+    drop_length_low: 15
+    drop_length_high: 25
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
 
+# Freq Drop
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-    drop_length_low: !ref <freq_drop_length_low>
-    drop_length_high: !ref <freq_drop_length_high>
-    drop_count_low: !ref <freq_drop_count_low>
-    drop_count_high: !ref <freq_drop_count_high>
-    replace: !ref <freq_drop_replace>
+    drop_length_low: 10
+    drop_length_high: 20
+    drop_count_low: 4
+    drop_count_high: 4
+    replace: "mean"
     dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-    warp_window: !ref <time_warp_window>
-    warp_mode: !ref <time_warp_mode>
-    dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -348,6 +328,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
+############################## Logging and Pretrainer ##########################
+
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
diff --git a/recipes/LibriSpeech/ASR/transformer/train.py b/recipes/LibriSpeech/ASR/transformer/train.py
index b69763e26..292d7cc42 100644
--- a/recipes/LibriSpeech/ASR/transformer/train.py
+++ b/recipes/LibriSpeech/ASR/transformer/train.py
@@ -114,16 +114,16 @@ class ASR(sb.core.Brain):
         tokens, tokens_lens = batch.tokens
 
         if stage == sb.Stage.TRAIN:
+            # Labels must be extended if parallel augmentation or concatenated
+            # augmentation was performed on the input (increasing the time dimension)
             if hasattr(self.hparams, "fea_augment"):
-                tokens = self.hparams.fea_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.fea_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/LibriSpeech/G2P/hparams/hparams_g2p_rnn.yaml b/recipes/LibriSpeech/G2P/hparams/hparams_g2p_rnn.yaml
index a565905c8..f487ffbe1 100644
--- a/recipes/LibriSpeech/G2P/hparams/hparams_g2p_rnn.yaml
+++ b/recipes/LibriSpeech/G2P/hparams/hparams_g2p_rnn.yaml
@@ -95,7 +95,7 @@ homograph_loss_weight: 2.0
 lr: 0.002
 save_for_pretrained: True
 
-# Model parameters
+####################### Model Parameters #######################################
 output_neurons: !apply:speechbrain.utils.hparams.choice
     value: !ref <phn_tokenize>
     choices:
diff --git a/recipes/LibriSpeech/G2P/hparams/hparams_g2p_transformer.yaml b/recipes/LibriSpeech/G2P/hparams/hparams_g2p_transformer.yaml
index c75cd97bf..e1c0f44c7 100644
--- a/recipes/LibriSpeech/G2P/hparams/hparams_g2p_transformer.yaml
+++ b/recipes/LibriSpeech/G2P/hparams/hparams_g2p_transformer.yaml
@@ -95,7 +95,7 @@ lr_dont_halve_until_epoch: 1
 lr_patience: 1
 save_for_pretrained: True
 
-# Model parameters
+####################### Model Parameters #######################################
 output_neurons: !apply:speechbrain.utils.hparams.choice
     value: !ref <phn_tokenize>
     choices:
diff --git a/recipes/LibriSpeech/G2P/hparams/hparams_lm_rnn.yaml b/recipes/LibriSpeech/G2P/hparams/hparams_lm_rnn.yaml
index dcb768259..7e1b7bc4a 100644
--- a/recipes/LibriSpeech/G2P/hparams/hparams_lm_rnn.yaml
+++ b/recipes/LibriSpeech/G2P/hparams/hparams_lm_rnn.yaml
@@ -50,7 +50,7 @@ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
 tokenizer_file: <output_folder>/save/phoneme_tokenizer.model
 
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 80
 lr: 0.001
@@ -68,7 +68,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-# Model parameters
+####################### Model Parameters #######################################
 model_dim: !apply:speechbrain.utils.hparams.choice
     value: !ref <phn_tokenize>
     choices:
diff --git a/recipes/LibriSpeech/G2P/hparams/hparams_lm_transformer.yaml b/recipes/LibriSpeech/G2P/hparams/hparams_lm_transformer.yaml
index 2a9a434d6..5e319e3d8 100644
--- a/recipes/LibriSpeech/G2P/hparams/hparams_lm_transformer.yaml
+++ b/recipes/LibriSpeech/G2P/hparams/hparams_lm_transformer.yaml
@@ -39,7 +39,7 @@ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
 # Tokenizer model (you must use the same tokenizer for LM and ASR training)
 tokenizer_file: <output_folder>/save/phoneme_tokenizer.model
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 80
 lr: 0.001
@@ -57,7 +57,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-# Model parameters
+####################### Model Parameters #######################################
 
 emb_dim: 256 # dimension of the embeddings
 transformer_num_heads: 4
diff --git a/recipes/LibriSpeech/LM/hparams/RNNLM.yaml b/recipes/LibriSpeech/LM/hparams/RNNLM.yaml
index b061b4fcc..0896de960 100644
--- a/recipes/LibriSpeech/LM/hparams/RNNLM.yaml
+++ b/recipes/LibriSpeech/LM/hparams/RNNLM.yaml
@@ -29,7 +29,7 @@ test_transcripts_pattern: "test*/**/*.trans.txt"
 # Tokenizer model
 tokenizer_file: https://www.dropbox.com/s/o7gnouwdoqchotj/1000_unigram.model?dl=1
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 80
 lr: 0.001
@@ -47,7 +47,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-# Model parameters
+####################### Model Parameters #######################################
 emb_size: 128
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.0
diff --git a/recipes/LibriSpeech/LM/hparams/transformer.yaml b/recipes/LibriSpeech/LM/hparams/transformer.yaml
index c79ef5769..50123a4c3 100644
--- a/recipes/LibriSpeech/LM/hparams/transformer.yaml
+++ b/recipes/LibriSpeech/LM/hparams/transformer.yaml
@@ -29,7 +29,7 @@ test_transcripts_pattern: "test*/**/*.trans.txt"
 # Tokenizer model
 tokenizer_file: speechbrain/asr-transformer-transformerlm-librispeech/tokenizer.ckpt
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 16
 lr: 10
diff --git a/recipes/LibriSpeech/Tokenizer/hparams/1K_unigram_subword_bpe.yaml b/recipes/LibriSpeech/Tokenizer/hparams/1K_unigram_subword_bpe.yaml
index b5a9fa60e..9dda21f82 100644
--- a/recipes/LibriSpeech/Tokenizer/hparams/1K_unigram_subword_bpe.yaml
+++ b/recipes/LibriSpeech/Tokenizer/hparams/1K_unigram_subword_bpe.yaml
@@ -16,7 +16,7 @@ skip_prep: False
 train_csv: !ref <output_folder>/train.csv
 valid_csv: !ref <output_folder>/dev-clean.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 1000  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/LibriSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml b/recipes/LibriSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml
index c312ce5bb..1f328c6f1 100644
--- a/recipes/LibriSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml
+++ b/recipes/LibriSpeech/Tokenizer/hparams/5K_unigram_subword_bpe.yaml
@@ -16,7 +16,7 @@ skip_prep: False
 train_csv: !ref <output_folder>/train.csv
 valid_csv: !ref <output_folder>/dev-clean.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 5000  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/LibriSpeech/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml b/recipes/LibriSpeech/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
index 4806f3e06..13ce0d220 100644
--- a/recipes/LibriSpeech/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
+++ b/recipes/LibriSpeech/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml
@@ -48,7 +48,7 @@ test_dataloader_options:
    batch_size: 8 # DynamicBatching not used at testing time
    num_workers: 4
 
-# Training parameters
+####################### Training Parameters ####################################
 lr: 0.0005
 warmup: 30000
 # This is equivalent to optimizer_step_limit - warmup
@@ -63,7 +63,7 @@ mask_prob: 0.65
 mask_length: 10
 num_negatives: 100
 
-# Model parameters
+####################### Model Parameters #######################################
 embedding_dim: 768
 extractor_dim: 512
 final_dim: 256
diff --git a/recipes/MEDIA/ASR/CTC/hparams/train_hf_wav2vec.yaml b/recipes/MEDIA/ASR/CTC/hparams/train_hf_wav2vec.yaml
index 924579a9c..70ef38de7 100644
--- a/recipes/MEDIA/ASR/CTC/hparams/train_hf_wav2vec.yaml
+++ b/recipes/MEDIA/ASR/CTC/hparams/train_hf_wav2vec.yaml
@@ -55,7 +55,7 @@ test_dataloader_options:
 sample_rate: 16000
 feats_dim: 1024
 
-# Training parameters:
+####################### Training Parameters ####################################:
 number_of_epochs: 30
 lr: 1
 lr_wav2vec: 0.0001
@@ -67,7 +67,7 @@ patient: 0
 patient_wav2vec: 0
 sorting: ascending
 
-# Model parameters:
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_blocks: 3
 dnn_neurons: 512
diff --git a/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_full.yaml b/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_full.yaml
index 7be6f6b94..4f9bad2e7 100644
--- a/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_full.yaml
+++ b/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_full.yaml
@@ -57,7 +57,7 @@ test_dataloader_options:
 sample_rate: 16000
 feats_dim: 1024
 
-# Training parameters:
+####################### Training Parameters ####################################:
 number_of_epochs: 30
 lr: 1
 lr_wav2vec: 0.0001
@@ -69,7 +69,7 @@ patient: 0
 patient_wav2vec: 0
 sorting: ascending
 
-# Model parameters:
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_blocks: 3
 dnn_neurons: 512
diff --git a/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_relax.yaml b/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_relax.yaml
index d631a6da8..8631e6e88 100644
--- a/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_relax.yaml
+++ b/recipes/MEDIA/SLU/CTC/hparams/train_hf_wav2vec_relax.yaml
@@ -57,7 +57,7 @@ test_dataloader_options:
 sample_rate: 16000
 feats_dim: 1024
 
-# Training parameters:
+####################### Training Parameters ####################################:
 number_of_epochs: 30
 lr: 1
 lr_wav2vec: 0.0001
@@ -69,7 +69,7 @@ patient: 0
 patient_wav2vec: 0
 sorting: ascending
 
-# Model parameters:
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_blocks: 3
 dnn_neurons: 512
diff --git a/recipes/MultiWOZ/response_generation/gpt/hparams/train_gpt.yaml b/recipes/MultiWOZ/response_generation/gpt/hparams/train_gpt.yaml
index 05b16bea3..5bb3b8ed8 100644
--- a/recipes/MultiWOZ/response_generation/gpt/hparams/train_gpt.yaml
+++ b/recipes/MultiWOZ/response_generation/gpt/hparams/train_gpt.yaml
@@ -58,7 +58,7 @@ max_history: 5
 ignore_index: -100
 label_smoothing: 0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 4
 batch_size: 8
 test_batch_size: 4
diff --git a/recipes/MultiWOZ/response_generation/llama2/hparams/train_llama2.yaml b/recipes/MultiWOZ/response_generation/llama2/hparams/train_llama2.yaml
index f7fd2b087..507115e83 100644
--- a/recipes/MultiWOZ/response_generation/llama2/hparams/train_llama2.yaml
+++ b/recipes/MultiWOZ/response_generation/llama2/hparams/train_llama2.yaml
@@ -40,7 +40,7 @@ max_history: 2
 ignore_index: -100
 label_smoothing: 0
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 4
 batch_size: 1
 test_batch_size: 1
diff --git a/recipes/REAL-M/sisnr-estimation/hparams/pool_sisnrestimator.yaml b/recipes/REAL-M/sisnr-estimation/hparams/pool_sisnrestimator.yaml
index 3f1da0919..c23c11c53 100644
--- a/recipes/REAL-M/sisnr-estimation/hparams/pool_sisnrestimator.yaml
+++ b/recipes/REAL-M/sisnr-estimation/hparams/pool_sisnrestimator.yaml
@@ -67,7 +67,7 @@ num_spks: 2
 noprogressbar: False
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.0001
diff --git a/recipes/RescueSpeech/ASR/noise-robust/hparams/robust_asr_16k.yaml b/recipes/RescueSpeech/ASR/noise-robust/hparams/robust_asr_16k.yaml
index 302dabe57..10e8e58e4 100644
--- a/recipes/RescueSpeech/ASR/noise-robust/hparams/robust_asr_16k.yaml
+++ b/recipes/RescueSpeech/ASR/noise-robust/hparams/robust_asr_16k.yaml
@@ -41,7 +41,7 @@ skip_prep: False
 # longer sentences certainly correspond to "open microphones".
 avoid_if_longer_than: 10.0
 
-## Model parameters- Enhance model
+## Model Parameters- Enhance model
 dereverberate: False
 save_audio: True
 sample_rate: 16000
@@ -54,7 +54,7 @@ use_rand_shift: False
 min_shift: -8000
 max_shift: 8000
 
-## Training parameters- ASR
+######################## Training Parameters ####################################- ASR
 number_of_epochs: 10
 lr_whisper: 0.00003
 sorting: ascending
diff --git a/recipes/SLURP/NLU/hparams/train.yaml b/recipes/SLURP/NLU/hparams/train.yaml
index e2201d96b..7d88d62a9 100644
--- a/recipes/SLURP/NLU/hparams/train.yaml
+++ b/recipes/SLURP/NLU/hparams/train.yaml
@@ -28,14 +28,14 @@ asr_tokenizer_file: https://www.dropbox.com/s/o7gnouwdoqchotj/1000_unigram.model
 slu_tokenizer_file: https://www.dropbox.com/s/tmwq12r5vgcsif9/58_unigram.model?dl=1
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 16
 lr: 0.0003
 # token_type: unigram # ["unigram", "bpe", "char"]
 sorting: random
 
-# Model parameters
+####################### Model Parameters #######################################
 # sample_rate: 1600
 emb_size: 128
 dec_neurons: 512
diff --git a/recipes/SLURP/Tokenizer/hparams/tokenizer_bpe58.yaml b/recipes/SLURP/Tokenizer/hparams/tokenizer_bpe58.yaml
index 51f805b07..bf935024a 100644
--- a/recipes/SLURP/Tokenizer/hparams/tokenizer_bpe58.yaml
+++ b/recipes/SLURP/Tokenizer/hparams/tokenizer_bpe58.yaml
@@ -14,7 +14,7 @@ train_csv: !ref <output_folder>/train-type=direct.csv
 valid_csv: !ref <output_folder>/devel-type=direct.csv
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 58  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/SLURP/direct/hparams/train.yaml b/recipes/SLURP/direct/hparams/train.yaml
index 5a42c738c..038d2e59e 100644
--- a/recipes/SLURP/direct/hparams/train.yaml
+++ b/recipes/SLURP/direct/hparams/train.yaml
@@ -34,7 +34,7 @@ rir_annotation: !ref <save_folder>/rir.csv
 tokenizer_file: https://www.dropbox.com/s/tmwq12r5vgcsif9/58_unigram.model?dl=1
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 16
 lr: 0.0003
@@ -42,7 +42,7 @@ lr: 0.0003
 sorting: random
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Model parameters
+####################### Model Parameters #######################################
 sample_rate: 16000
 emb_size: 128
 dec_neurons: 512
@@ -100,41 +100,31 @@ add_noise: !new:speechbrain.augment.time_domain.AddNoise
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
+############################## Augmentations ###################################
+
 # Speed perturbation
-speed_changes: [90, 95, 105, 110]  # List of speed changes for time-stretching
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [90, 95, 105, 110]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 3  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 3
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
     shuffle_augmentations: True
     min_augmentations: 1
     max_augmentations: 4
@@ -146,7 +136,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
-# Models
+############################## Models ##########################################
+
 asr_model_source: speechbrain/asr-crdnn-rnnlm-librispeech
 
 slu_enc: !new:speechbrain.nnet.containers.Sequential
diff --git a/recipes/SLURP/direct/hparams/train_with_wav2vec2.yaml b/recipes/SLURP/direct/hparams/train_with_wav2vec2.yaml
index b383da5cb..84222db5f 100644
--- a/recipes/SLURP/direct/hparams/train_with_wav2vec2.yaml
+++ b/recipes/SLURP/direct/hparams/train_with_wav2vec2.yaml
@@ -32,7 +32,7 @@ skip_prep: False
 # URL for the wav2vec2 model, you can change to benchmark diffrenet models
 wav2vec2_hub: "facebook/hubert-base-ls960"
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 35
 batch_size: 6
 lr: 0.0003
@@ -47,7 +47,7 @@ freeze_wav2vec2: False
 #set to true to freeze the CONV part of the wav2vec2 model
 freeze_wav2vec2_conv: True
 
-# Model parameters
+####################### Model Parameters #######################################
 sample_rate: 16000
 emb_size: 128
 dec_neurons: 512
@@ -96,45 +96,31 @@ seq_lin: !new:speechbrain.nnet.linear.Linear
     input_size: !ref <dec_neurons>
     n_neurons: !ref <output_neurons>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
diff --git a/recipes/Switchboard/ASR/CTC/hparams/train_with_wav2vec.yaml b/recipes/Switchboard/ASR/CTC/hparams/train_with_wav2vec.yaml
index 2933e0fd4..7741680bd 100644
--- a/recipes/Switchboard/ASR/CTC/hparams/train_with_wav2vec.yaml
+++ b/recipes/Switchboard/ASR/CTC/hparams/train_with_wav2vec.yaml
@@ -49,7 +49,7 @@ test_csv:
   - !ref <output_folder>/test_callhome.csv
   - !ref <output_folder>/test.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 lr: 1.0
 lr_wav2vec: 0.0001
@@ -74,7 +74,7 @@ test_dataloader_options:
 token_type: unigram  # ["unigram", "bpe", "char"]
 character_coverage: 1.0
 
-# Model parameters
+####################### Model Parameters #######################################
 wav2vec_output_dim: 1024
 dnn_neurons: 1024
 freeze_wav2vec: False
@@ -109,45 +109,31 @@ kenlm_model_path: null
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
   limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
   orig_freq: !ref <sample_rate>
-  speeds: !ref <speed_changes>
+  speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-  drop_freq_low: !ref <drop_freq_low>
-  drop_freq_high: !ref <drop_freq_high>
-  drop_freq_count_low: !ref <drop_freq_count_low>
-  drop_freq_count_high: !ref <drop_freq_count_high>
-  drop_freq_width: !ref <drop_freq_width>
+  drop_freq_low: 0
+  drop_freq_high: 1
+  drop_freq_count_low: 1
+  drop_freq_count_high: 3
+  drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-  drop_length_low: !ref <drop_chunk_length_low>
-  drop_length_high: !ref <drop_chunk_length_high>
-  drop_count_low: !ref <drop_chunk_count_low>
-  drop_count_high: !ref <drop_chunk_count_high>
+  drop_length_low: 1000
+  drop_length_high: 2000
+  drop_count_low: 1
+  drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-  parallel_augment: False
   concat_original: True
-  repeat_augment: 1
-  shuffle_augmentations: False
   min_augmentations: 3
   max_augmentations: 3
   augment_prob: 1.0
@@ -156,6 +142,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
     !ref <drop_freq>,
     !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.nnet.containers.Sequential
   input_shape: [null, null, !ref <wav2vec_output_dim>]
   linear1: !name:speechbrain.nnet.linear.Linear
diff --git a/recipes/Switchboard/ASR/seq2seq/hparams/train_BPE_2000.yaml b/recipes/Switchboard/ASR/seq2seq/hparams/train_BPE_2000.yaml
index d20001f1c..743467bcf 100644
--- a/recipes/Switchboard/ASR/seq2seq/hparams/train_BPE_2000.yaml
+++ b/recipes/Switchboard/ASR/seq2seq/hparams/train_BPE_2000.yaml
@@ -57,7 +57,7 @@ test_csv:
    - !ref <save_folder>/test_callhome.csv
    - !ref <save_folder>/test.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 number_of_ctc_epochs: 5
 batch_size: 10
@@ -103,7 +103,7 @@ test_dataloader_opts:
    num_workers: !ref <num_workers>
    batch_size: !ref <batch_size>
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -144,57 +144,40 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
    ext: wav
    csv_file: !ref <noise_annotation>
 
-# Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
+############################## Augmentations ###################################
 
+# Add noise to input signal
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
    csv_file: !ref <noise_annotation>
-   snr_low: !ref <snr_low>
-   snr_high: !ref <snr_high>
+   snr_low: 0
+   snr_high: 15
    noise_sample_rate: !ref <sample_rate>
    clean_sample_rate: !ref <sample_rate>
    num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
    orig_freq: !ref <sample_rate>
-   speeds: !ref <speed_changes>
+   speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-   drop_freq_low: !ref <drop_freq_low>
-   drop_freq_high: !ref <drop_freq_high>
-   drop_freq_count_low: !ref <drop_freq_count_low>
-   drop_freq_count_high: !ref <drop_freq_count_high>
-   drop_freq_width: !ref <drop_freq_width>
+   drop_freq_low: 0
+   drop_freq_high: 1
+   drop_freq_count_low: 1
+   drop_freq_count_high: 3
+   drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-   drop_length_low: !ref <drop_chunk_length_low>
-   drop_length_high: !ref <drop_chunk_length_high>
-   drop_count_low: !ref <drop_chunk_count_low>
-   drop_count_high: !ref <drop_chunk_count_high>
+   drop_length_low: 1000
+   drop_length_high: 2000
+   drop_count_low: 1
+   drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-   parallel_augment: False
    concat_original: True
-   repeat_augment: 1
-   shuffle_augmentations: False
    min_augmentations: 4
    max_augmentations: 4
    augment_prob: 1.0
@@ -215,6 +198,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
    n_fft: !ref <n_fft>
    n_mels: !ref <n_mels>
 
+############################## Models ##########################################
+
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
    input_shape: [null, null, !ref <n_mels>]
    activation: !ref <activation>
diff --git a/recipes/Switchboard/ASR/seq2seq/train.py b/recipes/Switchboard/ASR/seq2seq/train.py
index 57ce5c966..d0cd3ce91 100644
--- a/recipes/Switchboard/ASR/seq2seq/train.py
+++ b/recipes/Switchboard/ASR/seq2seq/train.py
@@ -127,12 +127,16 @@ class ASR(sb.Brain):
         tokens_eos, tokens_eos_lens = batch.tokens_eos
         tokens, tokens_lens = batch.tokens
 
+        # Labels must be extended if parallel augmentation or concatenated
+        # augmentation was performed on the input (increasing the time dimension)
         if stage == sb.Stage.TRAIN and hasattr(self.hparams, "wav_augment"):
-            tokens = self.hparams.wav_augment.replicate_labels(tokens)
-            tokens_lens = self.hparams.wav_augment.replicate_labels(tokens_lens)
-            tokens_eos = self.hparams.wav_augment.replicate_labels(tokens_eos)
-            tokens_eos_lens = self.hparams.wav_augment.replicate_labels(
-                tokens_eos_lens
+            (
+                tokens,
+                tokens_lens,
+                tokens_eos,
+                tokens_eos_lens,
+            ) = self.hparams.wav_augment.replicate_multiple_labels(
+                tokens, tokens_lens, tokens_eos, tokens_eos_lens
             )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/Switchboard/ASR/transformer/hparams/transformer.yaml b/recipes/Switchboard/ASR/transformer/hparams/transformer.yaml
index bd84a8a19..674c03719 100644
--- a/recipes/Switchboard/ASR/transformer/hparams/transformer.yaml
+++ b/recipes/Switchboard/ASR/transformer/hparams/transformer.yaml
@@ -51,7 +51,7 @@ test_csv:
 
 ckpt_interval_minutes: 30  # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 # To make Transformers converge, the global batch size should be large enough.
 # The global batch size is computed as:
 # batch_size * n_gpus * grad_accumulation_factor.
@@ -96,7 +96,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
   batch_size: 1
 
-####################### Model parameters  ###########################
+####################### Model Parameters  ###########################
 # Transformer
 transformer_input_size: 1280
 d_model: 256
@@ -271,50 +271,32 @@ normalize: !new:speechbrain.processing.features.InputNormalization
   norm_type: global
   update_until_epoch: 4
 
-# Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
+############################## Augmentations ###################################
 
+# Speed perturbation
+speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
+  orig_freq: !ref <sample_rate>
+  speeds: [95, 100, 105]
+
+# Time Drop
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-  drop_length_low: !ref <time_drop_length_low>
-  drop_length_high: !ref <time_drop_length_high>
-  drop_count_low: !ref <time_drop_count_low>
-  drop_count_high: !ref <time_drop_count_high>
-  replace: !ref <time_drop_replace>
-  dim: 1
+  drop_length_low: 15
+  drop_length_high: 25
+  drop_count_low: 5
+  drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-  drop_length_low: !ref <freq_drop_length_low>
-  drop_length_high: !ref <freq_drop_length_high>
-  drop_count_low: !ref <freq_drop_count_low>
-  drop_count_high: !ref <freq_drop_count_high>
-  replace: !ref <freq_drop_replace>
+  drop_length_low: 25
+  drop_length_high: 35
+  drop_count_low: 2
+  drop_count_high: 2
   dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-  warp_window: !ref <time_warp_window>
-  warp_mode: !ref <time_warp_mode>
-  dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-  parallel_augment: False
-  concat_original: False
-  repeat_augment: 1
-  shuffle_augmentations: False
   min_augmentations: 3
   max_augmentations: 3
   augment_prob: 1.0
@@ -323,14 +305,7 @@ fea_augment: !new:speechbrain.augment.augmenter.Augmenter
     !ref <freq_drop>,
     !ref <time_warp>]
 
-
-# Speed perturbation
 do_speed_perturb: True
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
-speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
-  orig_freq: !ref <sample_rate>
-  speeds: !ref <speed_changes>
 
 compute_features: !new:speechbrain.lobes.features.Fbank
   sample_rate: !ref <sample_rate>
diff --git a/recipes/Switchboard/ASR/transformer/hparams/transformer_finetuned_LM.yaml b/recipes/Switchboard/ASR/transformer/hparams/transformer_finetuned_LM.yaml
index 45a765cd1..8dd221ca4 100644
--- a/recipes/Switchboard/ASR/transformer/hparams/transformer_finetuned_LM.yaml
+++ b/recipes/Switchboard/ASR/transformer/hparams/transformer_finetuned_LM.yaml
@@ -51,7 +51,7 @@ test_csv:
 
 ckpt_interval_minutes: 30  # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 # To make Transformers converge, the global batch size should be large enough.
 # The global batch size is computed as:
 # batch_size * n_gpus * grad_accumulation_factor.
@@ -96,7 +96,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
   batch_size: 1
 
-####################### Model parameters  ###########################
+####################### Model Parameters  ###########################
 # Transformer
 d_model: 512
 nhead: 4
@@ -126,7 +126,7 @@ lm_weight: 0.60
 ctc_weight_decode: 0.40
 temperature: 1.15
 temperature_lm: 1.15
-############################## models  ################################
+############################## Models  ################################
 
 CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
   input_shape: (8, 10, 80)
@@ -258,57 +258,32 @@ normalize: !new:speechbrain.processing.features.InputNormalization
   norm_type: global
   update_until_epoch: 4
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
   orig_freq: !ref <sample_rate>
-  speeds: !ref <speed_changes>
+  speeds: [95, 100, 105]
 
 # Time Drop
-time_drop_length_low: 15  # Min length for temporal chunk to drop in spectrogram
-time_drop_length_high: 25  # Max length for temporal chunk to drop in spectrogram
-time_drop_count_low: 5  # Min number of chunks to drop in time in the spectrogram
-time_drop_count_high: 5  # Max number of chunks to drop in time in the spectrogram
-time_drop_replace: "zeros"  # Method of dropping chunks
-
 time_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-  drop_length_low: !ref <time_drop_length_low>
-  drop_length_high: !ref <time_drop_length_high>
-  drop_count_low: !ref <time_drop_count_low>
-  drop_count_high: !ref <time_drop_count_high>
-  replace: !ref <time_drop_replace>
-  dim: 1
+  drop_length_low: 15
+  drop_length_high: 25
+  drop_count_low: 5
+  drop_count_high: 5
 
 # Frequency Drop
-freq_drop_length_low: 25  # Min length for chunks to drop in frequency in the spectrogram
-freq_drop_length_high: 35  # Max length for chunks to drop in frequency in the spectrogram
-freq_drop_count_low: 2  # Min number of chunks to drop in frequency in the spectrogram
-freq_drop_count_high: 2  # Max number of chunks to drop in frequency in the spectrogram
-freq_drop_replace: "zeros"  # Method of dropping chunks
-
 freq_drop: !new:speechbrain.augment.freq_domain.SpectrogramDrop
-  drop_length_low: !ref <freq_drop_length_low>
-  drop_length_high: !ref <freq_drop_length_high>
-  drop_count_low: !ref <freq_drop_count_low>
-  drop_count_high: !ref <freq_drop_count_high>
-  replace: !ref <freq_drop_replace>
+  drop_length_low: 25
+  drop_length_high: 35
+  drop_count_low: 2
+  drop_count_high: 2
   dim: 2
 
 # Time warp
-time_warp_window: 5  # Length of time warping window
-time_warp_mode: "bicubic"  # Time warping method
-
 time_warp: !new:speechbrain.augment.freq_domain.Warping
-  warp_window: !ref <time_warp_window>
-  warp_mode: !ref <time_warp_mode>
-  dim: 1
 
 fea_augment: !new:speechbrain.augment.augmenter.Augmenter
-  parallel_augment: False
-  concat_original: False
-  repeat_augment: 1
-  shuffle_augmentations: False
   min_augmentations: 3
   max_augmentations: 3
   augment_prob: 1.0
diff --git a/recipes/Switchboard/ASR/transformer/train.py b/recipes/Switchboard/ASR/transformer/train.py
index 5fb6ebc47..dcc527952 100644
--- a/recipes/Switchboard/ASR/transformer/train.py
+++ b/recipes/Switchboard/ASR/transformer/train.py
@@ -133,16 +133,16 @@ class ASR(sb.core.Brain):
         tokens, tokens_lens = batch.tokens
 
         if stage == sb.Stage.TRAIN:
+            # Labels must be extended if parallel augmentation or concatenated
+            # augmentation was performed on the input (increasing the time dimension)
             if hasattr(self.hparams, "fea_augment"):
-                tokens = self.hparams.fea_augment.replicate_labels(tokens)
-                tokens_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_lens
-                )
-                tokens_eos = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos
-                )
-                tokens_eos_lens = self.hparams.fea_augment.replicate_labels(
-                    tokens_eos_lens
+                (
+                    tokens,
+                    tokens_lens,
+                    tokens_eos,
+                    tokens_eos_lens,
+                ) = self.hparams.fea_augment.replicate_multiple_labels(
+                    tokens, tokens_lens, tokens_eos, tokens_eos_lens
                 )
 
         loss_seq = self.hparams.seq_cost(
diff --git a/recipes/Switchboard/LM/hparams/transformer.yaml b/recipes/Switchboard/LM/hparams/transformer.yaml
index 2f27463af..b501faf55 100644
--- a/recipes/Switchboard/LM/hparams/transformer.yaml
+++ b/recipes/Switchboard/LM/hparams/transformer.yaml
@@ -36,7 +36,7 @@ test_csv: !ref <save_folder>/test.csv
 # (e.g. /path/to/2000_unigram.model)
 tokenizer_file: !PLACEHOLDER
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 100
 batch_size: 164
 lr: 1
diff --git a/recipes/Switchboard/LM/hparams/transformer_finetune.yaml b/recipes/Switchboard/LM/hparams/transformer_finetune.yaml
index f5657c76c..5b0860e41 100644
--- a/recipes/Switchboard/LM/hparams/transformer_finetune.yaml
+++ b/recipes/Switchboard/LM/hparams/transformer_finetune.yaml
@@ -39,7 +39,7 @@ test_csv: !ref <save_folder>/test.csv
 # instead. E.g if you want to use your own LM / tokenizer.
 pretrained_lm_tokenizer_path: speechbrain/asr-transformer-transformerlm-librispeech
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 5
 batch_size: 128
 lr: 2
diff --git a/recipes/Switchboard/Tokenizer/hparams/2K_unigram_subword_bpe.yaml b/recipes/Switchboard/Tokenizer/hparams/2K_unigram_subword_bpe.yaml
index e6c546bdf..d07d83e70 100644
--- a/recipes/Switchboard/Tokenizer/hparams/2K_unigram_subword_bpe.yaml
+++ b/recipes/Switchboard/Tokenizer/hparams/2K_unigram_subword_bpe.yaml
@@ -20,7 +20,7 @@ train_csv: !ref <output_folder>/train_lm.csv
 valid_csv: !ref <output_folder>/dev.csv
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 2000  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/TIMIT/ASR/CTC/hparams/train.yaml b/recipes/TIMIT/ASR/CTC/hparams/train.yaml
index dce350b7e..145fa1a3e 100644
--- a/recipes/TIMIT/ASR/CTC/hparams/train.yaml
+++ b/recipes/TIMIT/ASR/CTC/hparams/train.yaml
@@ -25,7 +25,7 @@ data_folder_noise: !ref <data_folder>/noise # The noisy sequencies for data augm
 NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 8
 lr: 1.0
@@ -36,7 +36,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 40
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -69,6 +69,8 @@ test_dataloader_opts:
 normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -76,58 +78,38 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
     ext: wav
     csv_file: !ref <noise_annotation>
 
-
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -137,6 +119,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
diff --git a/recipes/TIMIT/ASR/seq2seq/hparams/train.yaml b/recipes/TIMIT/ASR/seq2seq/hparams/train.yaml
index cc3276506..d61179fa9 100644
--- a/recipes/TIMIT/ASR/seq2seq/hparams/train.yaml
+++ b/recipes/TIMIT/ASR/seq2seq/hparams/train.yaml
@@ -22,7 +22,7 @@ test_annotation: !ref <save_folder>/test.json
 skip_prep: False # Skip data preparation
 uppercase: False # Must be True when the TIMIT dataset is in the upper-case version
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 8 # Used if dynamic_batching is False
 lr: 0.0003
@@ -34,7 +34,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 40
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -88,45 +88,30 @@ dynamic_batch_sampler:
     shuffle: !ref <shuffle>
     batch_ordering: !ref <batch_ordering>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -135,6 +120,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
diff --git a/recipes/TIMIT/ASR/seq2seq/hparams/train_with_wav2vec2.yaml b/recipes/TIMIT/ASR/seq2seq/hparams/train_with_wav2vec2.yaml
index 9756e9356..705f79e9a 100644
--- a/recipes/TIMIT/ASR/seq2seq/hparams/train_with_wav2vec2.yaml
+++ b/recipes/TIMIT/ASR/seq2seq/hparams/train_with_wav2vec2.yaml
@@ -23,7 +23,7 @@ test_annotation: !ref <save_folder>/test.json
 skip_prep: False # Skip data preparation
 uppercase: False # Must be True when the TIMIT dataset is in the upper-case version
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 8
 lr: 0.0003
@@ -33,7 +33,7 @@ sorting: ascending
 precision: fp32 # bf16, fp16 or fp32
 sample_rate: 16000
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_layers: 2
 dnn_neurons: 1024
@@ -66,45 +66,30 @@ test_dataloader_opts:
     batch_size: !ref <batch_size>
     num_workers: !ref <batch_size>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -113,6 +98,7 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
 
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
diff --git a/recipes/TIMIT/ASR/transducer/hparams/train.yaml b/recipes/TIMIT/ASR/transducer/hparams/train.yaml
index 5b8e53809..204297dc6 100644
--- a/recipes/TIMIT/ASR/transducer/hparams/train.yaml
+++ b/recipes/TIMIT/ASR/transducer/hparams/train.yaml
@@ -28,7 +28,7 @@ data_folder_noise: !ref <data_folder>/noise # The noisy sequencies for data augm
 NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 8
 lr: 1.0
@@ -40,7 +40,7 @@ n_fft: 400
 n_mels: 40
 
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -92,6 +92,8 @@ compute_features: !new:speechbrain.lobes.features.Fbank
 normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -99,58 +101,38 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
     ext: wav
     csv_file: !ref <noise_annotation>
 
-
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -160,6 +142,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 
 enc: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
diff --git a/recipes/TIMIT/ASR/transducer/hparams/train_wav2vec.yaml b/recipes/TIMIT/ASR/transducer/hparams/train_wav2vec.yaml
index 90f899d26..9ead09f56 100644
--- a/recipes/TIMIT/ASR/transducer/hparams/train_wav2vec.yaml
+++ b/recipes/TIMIT/ASR/transducer/hparams/train_wav2vec.yaml
@@ -28,7 +28,7 @@ test_annotation: !ref <save_folder>/test.json
 skip_prep: False # Skip data preparation
 uppercase: False # Must be True when the TIMIT dataset is in the upper-case version
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 batch_size: 8
 lr: 0.0003
@@ -41,7 +41,7 @@ sample_rate: 16000
 # n_fft: 400
 # n_mels: 40
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 # dropout: 0.15
 dnn_blocks: 1
@@ -74,45 +74,30 @@ test_dataloader_opts:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
-    concat_original: False
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -121,6 +106,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 wav2vec2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
     source: !ref <wav2vec2_hub>
     output_norm: True
diff --git a/recipes/TIMIT/Alignment/hparams/train.yaml b/recipes/TIMIT/Alignment/hparams/train.yaml
index 7a2a581d9..aaf06b7ff 100644
--- a/recipes/TIMIT/Alignment/hparams/train.yaml
+++ b/recipes/TIMIT/Alignment/hparams/train.yaml
@@ -20,7 +20,7 @@ valid_annotation: !ref <data_folder>/dev.json
 test_annotation: !ref <data_folder>/test.json
 skip_prep: False # Skip data prep
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 10
 batch_size: 256
 lr: 0.0003
@@ -40,7 +40,7 @@ phn_set: 60 # {60, 48, 39}
 output_neurons: 183
 blank_index: 182
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dnn_blocks: 1
 dnn_neurons: 2000
diff --git a/recipes/Tedlium2/ASR/transformer/hparams/branchformer_large.yaml b/recipes/Tedlium2/ASR/transformer/hparams/branchformer_large.yaml
index 6ffad0b00..2f9f924c7 100644
--- a/recipes/Tedlium2/ASR/transformer/hparams/branchformer_large.yaml
+++ b/recipes/Tedlium2/ASR/transformer/hparams/branchformer_large.yaml
@@ -34,7 +34,7 @@ valid_csv: !ref <output_folder>/dev/dev.csv
 test_csv:
     - !ref <output_folder>/test/test.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 # To make Transformers converge, the global bath size should be large enough.
 # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
 # Empirically, we found that this value should be >= 128.
@@ -97,7 +97,7 @@ valid_dataloader_opts:
 test_dataloader_opts:
     batch_size: 1
 
-####################### Model parameters ###########################
+####################### Model Parameters ###########################
 # Transformer
 d_model: 512
 nhead: 8
diff --git a/recipes/Tedlium2/Tokenizer/hparams/tedlium2_500_bpe.yaml b/recipes/Tedlium2/Tokenizer/hparams/tedlium2_500_bpe.yaml
index a97290f13..03c91b126 100644
--- a/recipes/Tedlium2/Tokenizer/hparams/tedlium2_500_bpe.yaml
+++ b/recipes/Tedlium2/Tokenizer/hparams/tedlium2_500_bpe.yaml
@@ -14,7 +14,7 @@ skip_prep: False
 train_csv: !ref <output_folder>/train/train.csv
 valid_csv: !ref <output_folder>/dev/dev.csv
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: bpe  # ["unigram", "bpe", "char"]
 token_output: 500  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/UrbanSound8k/SoundClassification/hparams/train_ecapa_tdnn.yaml b/recipes/UrbanSound8k/SoundClassification/hparams/train_ecapa_tdnn.yaml
index 1d9613a16..3ecb1119b 100644
--- a/recipes/UrbanSound8k/SoundClassification/hparams/train_ecapa_tdnn.yaml
+++ b/recipes/UrbanSound8k/SoundClassification/hparams/train_ecapa_tdnn.yaml
@@ -48,7 +48,7 @@ skip_manifest_creation: False
 
 ckpt_interval_minutes: 15 # save checkpoint every N min
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 25
 batch_size: 32
 lr: 0.001
diff --git a/recipes/Voicebank/ASR/CTC/hparams/train.yaml b/recipes/Voicebank/ASR/CTC/hparams/train.yaml
index 65b833a0b..a49bae5fa 100644
--- a/recipes/Voicebank/ASR/CTC/hparams/train.yaml
+++ b/recipes/Voicebank/ASR/CTC/hparams/train.yaml
@@ -20,7 +20,7 @@ valid_annotation: !ref <output_folder>/valid.json
 test_annotation: !ref <output_folder>/test.json
 skip_prep: False # Skip data preparation
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 8
 sorting: ascending
@@ -37,7 +37,7 @@ sample_rate: 16000
 n_fft: 400
 n_mels: 40
 
-# Model parameters
+####################### Model Parameters #######################################
 activation: !name:torch.nn.LeakyReLU
 dropout: 0.15
 cnn_blocks: 2
@@ -61,45 +61,31 @@ compute_features: !new:speechbrain.lobes.features.Fbank
     n_fft: !ref <n_fft>
     n_mels: !ref <n_mels>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 3
     max_augmentations: 3
     augment_prob: 1.0
@@ -108,6 +94,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 model: !new:speechbrain.lobes.models.CRDNN.CRDNN
     input_shape: [null, null, !ref <n_mels>]
     activation: !ref <activation>
diff --git a/recipes/Voicebank/MTL/ASR_enhance/hparams/enhance_mimic.yaml b/recipes/Voicebank/MTL/ASR_enhance/hparams/enhance_mimic.yaml
index 2a96e25db..c3391498a 100644
--- a/recipes/Voicebank/MTL/ASR_enhance/hparams/enhance_mimic.yaml
+++ b/recipes/Voicebank/MTL/ASR_enhance/hparams/enhance_mimic.yaml
@@ -18,7 +18,7 @@ valid_annotation: !ref <data_folder>/valid.json
 test_annotation: !ref <data_folder>/test.json
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 8
 lr: 0.0001
diff --git a/recipes/Voicebank/MTL/ASR_enhance/hparams/pretrain_perceptual.yaml b/recipes/Voicebank/MTL/ASR_enhance/hparams/pretrain_perceptual.yaml
index a86fbc4cc..d384d026a 100644
--- a/recipes/Voicebank/MTL/ASR_enhance/hparams/pretrain_perceptual.yaml
+++ b/recipes/Voicebank/MTL/ASR_enhance/hparams/pretrain_perceptual.yaml
@@ -18,7 +18,7 @@ valid_annotation: !ref <data_folder>/valid.json
 test_annotation: !ref <data_folder>/test.json
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 20
 ctc_epochs: 4
 batch_size: 8
diff --git a/recipes/Voicebank/MTL/ASR_enhance/hparams/robust_asr.yaml b/recipes/Voicebank/MTL/ASR_enhance/hparams/robust_asr.yaml
index 1bf087d85..1835342c3 100644
--- a/recipes/Voicebank/MTL/ASR_enhance/hparams/robust_asr.yaml
+++ b/recipes/Voicebank/MTL/ASR_enhance/hparams/robust_asr.yaml
@@ -24,7 +24,7 @@ test_annotation: !ref <data_folder>/test.json
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 30
 ctc_epochs: 0
 batch_size: 8
@@ -141,6 +141,8 @@ compute_stft: !new:speechbrain.processing.features.STFT
 spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude
     power: 0.5
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -148,58 +150,38 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
     ext: wav
     csv_file: !ref <noise_annotation>
 
-
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -209,7 +191,6 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
-
 fbank: !new:speechbrain.lobes.features.Fbank
     n_mels: !ref <n_mels>
     sample_rate: !ref <sample_rate>
diff --git a/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn.yaml b/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn.yaml
index d7d2be081..c93ba21ec 100644
--- a/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn.yaml
+++ b/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn.yaml
@@ -85,6 +85,7 @@ classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
 
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -95,18 +96,14 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
 
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
-
 # Download and prepare the dataset of room impulse responses for augmentation
 prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <RIR_DATASET_URL>
@@ -122,37 +119,24 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
     parallel_augment: True
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
diff --git a/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn_mel_spec.yaml b/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn_mel_spec.yaml
index 6cb17c6bc..becd8e4d4 100644
--- a/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn_mel_spec.yaml
+++ b/recipes/VoxCeleb/SpeakerRec/hparams/train_ecapa_tdnn_mel_spec.yaml
@@ -104,6 +104,7 @@ classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
 
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -114,18 +115,14 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
 
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
-
 # Download and prepare the dataset of room impulse responses for augmentation
 prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <RIR_DATASET_URL>
@@ -141,37 +138,24 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
     parallel_augment: True
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
diff --git a/recipes/VoxCeleb/SpeakerRec/hparams/train_resnet.yaml b/recipes/VoxCeleb/SpeakerRec/hparams/train_resnet.yaml
index 617457f1a..a20786574 100644
--- a/recipes/VoxCeleb/SpeakerRec/hparams/train_resnet.yaml
+++ b/recipes/VoxCeleb/SpeakerRec/hparams/train_resnet.yaml
@@ -55,7 +55,7 @@ right_frames: 0
 deltas: False
 
 # Number of speakers
-# 1211 for vox1, 5994 for vox2, 7205 for vox1+vox2
+# 1211 for vox1, 5994 for vox2, 7205 for vox1+vox2
 out_n_neurons: 7205
 
 num_workers: 4
@@ -85,6 +85,7 @@ classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
 
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -95,18 +96,14 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
 
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
-
 # Download and prepare the dataset of room impulse responses for augmentation
 prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <RIR_DATASET_URL>
@@ -122,37 +119,24 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
     parallel_augment: True
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
diff --git a/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml b/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
index 8a70462c5..ab628c681 100644
--- a/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
+++ b/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
@@ -88,6 +88,7 @@ classifier: !new:speechbrain.lobes.models.Xvector.Classifier
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
 
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -98,18 +99,14 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
 
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
-
 # Download and prepare the dataset of room impulse responses for augmentation
 prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <RIR_DATASET_URL>
@@ -125,37 +122,24 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
     parallel_augment: True
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
diff --git a/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml b/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
index a2d872486..db29a301b 100644
--- a/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
+++ b/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
@@ -58,6 +58,8 @@ val_dataloader_options:
     num_workers: 1
     batch_size: !ref <batch_size_val>
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -72,7 +74,6 @@ prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     ext: wav
     csv_file: !ref <rir_annotation>
 
-
 # Add reverberation to input signal
 add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     csv_file: !ref <rir_annotation>
@@ -81,27 +82,21 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [90, 100, 110]  # List of speed changes for time-stretching
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
     shuffle_augmentations: True
     min_augmentations: 1
     max_augmentations: 3
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-wham-DM.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-wham-DM.yaml
index a38bb8c88..843c9fb09 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-wham-DM.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-wham-DM.yaml
@@ -45,7 +45,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 8000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 8
 lr: 0.0001
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-whamr-DM.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-whamr-DM.yaml
index af3fcb0d7..b55f05a11 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-whamr-DM.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/cnntransformer-whamr-DM.yaml
@@ -45,7 +45,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 8000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 8
 lr: 0.0001
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/convtasnet-whamr-DM.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/convtasnet-whamr-DM.yaml
index 7c2990442..545428d5a 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/convtasnet-whamr-DM.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/convtasnet-whamr-DM.yaml
@@ -44,7 +44,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 8000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 10
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/dprnn-whamr-DM.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/dprnn-whamr-DM.yaml
index f3158a625..d974b03c1 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/dprnn-whamr-DM.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/dprnn-whamr-DM.yaml
@@ -44,7 +44,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 8000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-wham.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-wham.yaml
index 75c90a0f1..df1935306 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-wham.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-wham.yaml
@@ -44,7 +44,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k-DM.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k-DM.yaml
index 625801e51..dc2783491 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k-DM.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k-DM.yaml
@@ -46,7 +46,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 16000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k.yaml
index d42060642..d11332c7e 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-16k.yaml
@@ -44,7 +44,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 16000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-DM.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-DM.yaml
index 2acd34370..3721698bc 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-DM.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr-DM.yaml
@@ -44,7 +44,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 8000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr.yaml b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr.yaml
index 536a46f49..14a38a06b 100644
--- a/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr.yaml
+++ b/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-whamr.yaml
@@ -44,7 +44,7 @@ save_audio: True # Save estimated sources on disk
 sample_rate: 8000
 n_audio_to_save: 20
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/separation/hparams/sepformer-wham.yaml b/recipes/WHAMandWHAMR/separation/hparams/sepformer-wham.yaml
index 1661d68f2..db920e7fa 100644
--- a/recipes/WHAMandWHAMR/separation/hparams/sepformer-wham.yaml
+++ b/recipes/WHAMandWHAMR/separation/hparams/sepformer-wham.yaml
@@ -42,7 +42,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WHAMandWHAMR/separation/hparams/sepformer-whamr.yaml b/recipes/WHAMandWHAMR/separation/hparams/sepformer-whamr.yaml
index d4bc250d0..8538529a6 100644
--- a/recipes/WHAMandWHAMR/separation/hparams/sepformer-whamr.yaml
+++ b/recipes/WHAMandWHAMR/separation/hparams/sepformer-whamr.yaml
@@ -40,7 +40,7 @@ num_spks: 2 # set to 3 for wsj0-3mix
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/convtasnet.yaml b/recipes/WSJ0Mix/separation/hparams/convtasnet.yaml
index 038330504..0305c6236 100644
--- a/recipes/WSJ0Mix/separation/hparams/convtasnet.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/convtasnet.yaml
@@ -36,7 +36,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/dprnn.yaml b/recipes/WSJ0Mix/separation/hparams/dprnn.yaml
index a78a78266..df1952d8c 100644
--- a/recipes/WSJ0Mix/separation/hparams/dprnn.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/dprnn.yaml
@@ -36,7 +36,7 @@ noprogressbar: False
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/resepformer.yaml b/recipes/WSJ0Mix/separation/hparams/resepformer.yaml
index 2b2711f52..406f2aa76 100644
--- a/recipes/WSJ0Mix/separation/hparams/resepformer.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/resepformer.yaml
@@ -37,7 +37,7 @@ num_spks: 2 # set to 3 for wsj0-3mix
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/sepformer-conformerintra.yaml b/recipes/WSJ0Mix/separation/hparams/sepformer-conformerintra.yaml
index 15550c9a2..2cf2b7ac5 100644
--- a/recipes/WSJ0Mix/separation/hparams/sepformer-conformerintra.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/sepformer-conformerintra.yaml
@@ -37,7 +37,7 @@ num_spks: 2 # set to 3 for wsj0-3mix
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/sepformer-customdataset.yaml b/recipes/WSJ0Mix/separation/hparams/sepformer-customdataset.yaml
index 82a2d3009..c896f2dfd 100644
--- a/recipes/WSJ0Mix/separation/hparams/sepformer-customdataset.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/sepformer-customdataset.yaml
@@ -39,7 +39,7 @@ noprogressbar: False
 save_audio: True # Save estimated sources on disk
 sample_rate: 16000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/sepformer.yaml b/recipes/WSJ0Mix/separation/hparams/sepformer.yaml
index 4787fb3aa..77319604d 100644
--- a/recipes/WSJ0Mix/separation/hparams/sepformer.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/sepformer.yaml
@@ -40,7 +40,7 @@ save_audio: True # Save estimated sources on disk
 n_audio_to_save: 20
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/WSJ0Mix/separation/hparams/skim.yaml b/recipes/WSJ0Mix/separation/hparams/skim.yaml
index 53b312efd..606c7060a 100644
--- a/recipes/WSJ0Mix/separation/hparams/skim.yaml
+++ b/recipes/WSJ0Mix/separation/hparams/skim.yaml
@@ -37,7 +37,7 @@ num_spks: 2 # set to 3 for wsj0-3mix
 save_audio: False # Save estimated sources on disk
 sample_rate: 8000
 
-# Training parameters
+####################### Training Parameters ####################################
 N_epochs: 200
 batch_size: 1
 lr: 0.00015
diff --git a/recipes/ZaionEmotionDataset/emotion_diarization/hparams/train.yaml b/recipes/ZaionEmotionDataset/emotion_diarization/hparams/train.yaml
index 644ca89a2..0d9601d20 100644
--- a/recipes/ZaionEmotionDataset/emotion_diarization/hparams/train.yaml
+++ b/recipes/ZaionEmotionDataset/emotion_diarization/hparams/train.yaml
@@ -30,7 +30,7 @@ train_annotation: !ref <output_folder>/train.json
 valid_annotation: !ref <output_folder>/valid.json
 test_annotation: !ref <output_folder>/test.json
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 15
 lr: 0.0001
 lr_wav2vec: 0.00001
diff --git a/recipes/fluent-speech-commands/Tokenizer/hparams/tokenizer_bpe51.yaml b/recipes/fluent-speech-commands/Tokenizer/hparams/tokenizer_bpe51.yaml
index db7c1ddb7..eff38c7bf 100644
--- a/recipes/fluent-speech-commands/Tokenizer/hparams/tokenizer_bpe51.yaml
+++ b/recipes/fluent-speech-commands/Tokenizer/hparams/tokenizer_bpe51.yaml
@@ -13,7 +13,7 @@ train_csv: !ref <output_folder>/train.csv
 valid_csv: !ref <output_folder>/valid.csv
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 51  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/fluent-speech-commands/direct/hparams/train.yaml b/recipes/fluent-speech-commands/direct/hparams/train.yaml
index a7c072343..428faf144 100644
--- a/recipes/fluent-speech-commands/direct/hparams/train.yaml
+++ b/recipes/fluent-speech-commands/direct/hparams/train.yaml
@@ -32,14 +32,14 @@ rir_annotation: !ref <save_folder>/rir.csv
 
 tokenizer_file: https://www.dropbox.com/s/hvf2huofnq0sjbn/51_unigram.model?dl=1
 skip_prep: False
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 6
 batch_size: 16
 lr: 0.0003
 # token_type: unigram # ["unigram", "bpe", "char"]
 sorting: random
 
-# Model parameters
+####################### Model Parameters #######################################
 sample_rate: 16000
 emb_size: 128
 dec_neurons: 512
@@ -65,6 +65,8 @@ dataloader_opts:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -87,45 +89,32 @@ add_reverb: !new:speechbrain.augment.time_domain.AddReverb
     num_workers: !ref <num_workers>
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 9
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 3  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 2
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
     shuffle_augmentations: True
     min_augmentations: 1
     max_augmentations: 4
@@ -136,7 +125,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
-# Models
+############################## Models ##########################################
+
 asr_model_source: speechbrain/asr-crdnn-rnnlm-librispeech
 
 slu_enc: !new:speechbrain.nnet.containers.Sequential
diff --git a/recipes/timers-and-such/LM/hparams/train.yaml b/recipes/timers-and-such/LM/hparams/train.yaml
index 485dd5426..f3ba652ed 100644
--- a/recipes/timers-and-such/LM/hparams/train.yaml
+++ b/recipes/timers-and-such/LM/hparams/train.yaml
@@ -23,7 +23,7 @@ csv_test_synth: !ref <output_folder>/test-synth-type=decoupled.csv
 csv_test_real: !ref <output_folder>/test-real-type=decoupled.csv
 skip_prep: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 10
 batch_size: 128
 lr: 0.0003
diff --git a/recipes/timers-and-such/Tokenizer/hparams/tokenizer_bpe51.yaml b/recipes/timers-and-such/Tokenizer/hparams/tokenizer_bpe51.yaml
index 7554a0342..2a9f39161 100644
--- a/recipes/timers-and-such/Tokenizer/hparams/tokenizer_bpe51.yaml
+++ b/recipes/timers-and-such/Tokenizer/hparams/tokenizer_bpe51.yaml
@@ -15,7 +15,7 @@ train_csv: !ref <output_folder>/train-type=direct.csv
 valid_csv: !ref <output_folder>/dev-real-type=direct.csv
 
 
-# Training parameters
+####################### Training Parameters ####################################
 token_type: unigram  # ["unigram", "bpe", "char"]
 token_output: 51  # index(blank/eos/bos/unk) = 0
 character_coverage: 1.0
diff --git a/recipes/timers-and-such/decoupled/hparams/train_LS_LM.yaml b/recipes/timers-and-such/decoupled/hparams/train_LS_LM.yaml
index 212752668..1ee56d561 100644
--- a/recipes/timers-and-such/decoupled/hparams/train_LS_LM.yaml
+++ b/recipes/timers-and-such/decoupled/hparams/train_LS_LM.yaml
@@ -34,7 +34,7 @@ skip_prep: False
 ckpt_interval_minutes: 15 # save checkpoint every N min
 test_on_all_real: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 batch_size: 16
 lr: 0.0003
diff --git a/recipes/timers-and-such/decoupled/hparams/train_TAS_LM.yaml b/recipes/timers-and-such/decoupled/hparams/train_TAS_LM.yaml
index fcf6393e6..5f0d93d09 100644
--- a/recipes/timers-and-such/decoupled/hparams/train_TAS_LM.yaml
+++ b/recipes/timers-and-such/decoupled/hparams/train_TAS_LM.yaml
@@ -34,7 +34,7 @@ skip_prep: False
 ckpt_interval_minutes: 15 # save checkpoint every N min
 test_on_all_real: False
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 batch_size: 16
 lr: 0.0003
diff --git a/recipes/timers-and-such/direct/hparams/train.yaml b/recipes/timers-and-such/direct/hparams/train.yaml
index 4fb574fc3..01909eb5b 100644
--- a/recipes/timers-and-such/direct/hparams/train.yaml
+++ b/recipes/timers-and-such/direct/hparams/train.yaml
@@ -38,14 +38,14 @@ data_folder_noise: !ref <data_folder>/noise # The noisy sequencies for data augm
 NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.zip?rlkey=j8b0n9kdjdr32o1f06t0cw5b7&dl=1
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 batch_size: 16
 lr: 0.0003
 # token_type: unigram # ["unigram", "bpe", "char"]
 sorting: random
 
-# Model parameters
+####################### Model Parameters #######################################
 sample_rate: 16000
 emb_size: 128
 dec_neurons: 512
@@ -71,6 +71,7 @@ dataloader_opts:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
 
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -80,56 +81,37 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
     csv_file: !ref <noise_annotation>
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -139,8 +121,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
 
-# Models
 asr_model_source: speechbrain/asr-crdnn-rnnlm-librispeech
 
 slu_enc: !new:speechbrain.nnet.containers.Sequential
diff --git a/recipes/timers-and-such/direct/hparams/train_with_wav2vec2.yaml b/recipes/timers-and-such/direct/hparams/train_with_wav2vec2.yaml
index b9b451e91..b9ad3cfc2 100644
--- a/recipes/timers-and-such/direct/hparams/train_with_wav2vec2.yaml
+++ b/recipes/timers-and-such/direct/hparams/train_with_wav2vec2.yaml
@@ -37,7 +37,7 @@ ckpt_interval_minutes: 15 # save checkpoint every N min
 test_on_all_real: False
 
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 50
 batch_size: 8
 lr: 0.0004
@@ -49,7 +49,7 @@ freeze_wav2vec: False
 # token_type: unigram # ["unigram", "bpe", "char"]
 sorting: ascending
 
-# Model parameters
+####################### Model Parameters #######################################
 sample_rate: 16000
 emb_size: 128
 dec_neurons: 512
@@ -171,45 +171,31 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
         lr_annealing_wav2vec2: !ref <lr_annealing_wav2vec2>
         counter: !ref <epoch_counter>
 
-# Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
+############################## Augmentations ###################################
 
+# Speed perturbation
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
diff --git a/recipes/timers-and-such/multistage/hparams/train_LS_LM.yaml b/recipes/timers-and-such/multistage/hparams/train_LS_LM.yaml
index 3d1c4e156..b804df9f9 100644
--- a/recipes/timers-and-such/multistage/hparams/train_LS_LM.yaml
+++ b/recipes/timers-and-such/multistage/hparams/train_LS_LM.yaml
@@ -39,7 +39,7 @@ NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.z
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 batch_size: 16
 lr: 0.0003
@@ -73,6 +73,8 @@ dataloader_opts:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -81,57 +83,37 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
     csv_file: !ref <noise_annotation>
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
-# Augmenter: Combines previously defined augmentations to perform data augmentation
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -141,7 +123,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
-# Models
+############################## Models ##########################################
+
 asr_model: !apply:speechbrain.inference.ASR.EncoderDecoderASR.from_hparams
     source: speechbrain/asr-crdnn-rnnlm-librispeech
     run_opts: {"device":"cuda:0"}
diff --git a/recipes/timers-and-such/multistage/hparams/train_TAS_LM.yaml b/recipes/timers-and-such/multistage/hparams/train_TAS_LM.yaml
index d21f309c5..56eb59d20 100644
--- a/recipes/timers-and-such/multistage/hparams/train_TAS_LM.yaml
+++ b/recipes/timers-and-such/multistage/hparams/train_TAS_LM.yaml
@@ -39,7 +39,7 @@ NOISE_DATASET_URL: https://www.dropbox.com/scl/fi/a09pj97s5ifan81dqhi4n/noises.z
 noise_annotation: !ref <save_folder>/noise.csv #The data manifest files are created by the data preparation script
 
 
-# Training parameters
+####################### Training Parameters ####################################
 number_of_epochs: 1
 batch_size: 16
 lr: 0.0003
@@ -73,6 +73,8 @@ dataloader_opts:
 epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
     limit: !ref <number_of_epochs>
 
+############################## Augmentations ###################################
+
 # Download and prepare the dataset of noisy sequences for augmentation
 prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
     URL: !ref <NOISE_DATASET_URL>
@@ -81,57 +83,37 @@ prepare_noise_data: !name:speechbrain.augment.preparation.prepare_dataset_from_U
     csv_file: !ref <noise_annotation>
 
 # Add noise to input signal
-snr_low: 0  # Min SNR for noise augmentation
-snr_high: 15  # Max SNR for noise augmentation
-
 add_noise: !new:speechbrain.augment.time_domain.AddNoise
     csv_file: !ref <noise_annotation>
-    snr_low: !ref <snr_low>
-    snr_high: !ref <snr_high>
+    snr_low: 0
+    snr_high: 15
     noise_sample_rate: !ref <sample_rate>
     clean_sample_rate: !ref <sample_rate>
     num_workers: !ref <num_workers>
 
 # Speed perturbation
-speed_changes: [95, 100, 105]  # List of speed changes for time-stretching
-
 speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
     orig_freq: !ref <sample_rate>
-    speeds: !ref <speed_changes>
+    speeds: [95, 100, 105]
 
 # Frequency drop: randomly drops a number of frequency bands to zero.
-drop_freq_low: 0  # Min frequency band dropout probability
-drop_freq_high: 1  # Max frequency band dropout probability
-drop_freq_count_low: 1  # Min number of frequency bands to drop
-drop_freq_count_high: 3  # Max number of frequency bands to drop
-drop_freq_width: 0.05  # Width of frequency bands to drop
-
 drop_freq: !new:speechbrain.augment.time_domain.DropFreq
-    drop_freq_low: !ref <drop_freq_low>
-    drop_freq_high: !ref <drop_freq_high>
-    drop_freq_count_low: !ref <drop_freq_count_low>
-    drop_freq_count_high: !ref <drop_freq_count_high>
-    drop_freq_width: !ref <drop_freq_width>
+    drop_freq_low: 0
+    drop_freq_high: 1
+    drop_freq_count_low: 1
+    drop_freq_count_high: 3
+    drop_freq_width: 0.05
 
 # Time drop: randomly drops a number of temporal chunks.
-drop_chunk_count_low: 1  # Min number of audio chunks to drop
-drop_chunk_count_high: 5  # Max number of audio chunks to drop
-drop_chunk_length_low: 1000  # Min length of audio chunks to drop
-drop_chunk_length_high: 2000  # Max length of audio chunks to drop
-
 drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
-    drop_length_low: !ref <drop_chunk_length_low>
-    drop_length_high: !ref <drop_chunk_length_high>
-    drop_count_low: !ref <drop_chunk_count_low>
-    drop_count_high: !ref <drop_chunk_count_high>
+    drop_length_low: 1000
+    drop_length_high: 2000
+    drop_count_low: 1
+    drop_count_high: 5
 
-# Augmenter: Combines previously defined augmentations to perform data augmentation
 # Augmenter: Combines previously defined augmentations to perform data augmentation
 wav_augment: !new:speechbrain.augment.augmenter.Augmenter
-    parallel_augment: False
     concat_original: True
-    repeat_augment: 1
-    shuffle_augmentations: False
     min_augmentations: 4
     max_augmentations: 4
     augment_prob: 1.0
@@ -141,6 +123,8 @@ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
         !ref <drop_freq>,
         !ref <drop_chunk>]
 
+############################## Models ##########################################
+
 # Models
 asr_model: !apply:speechbrain.inference.ASR.EncoderDecoderASR.from_hparams
     source: speechbrain/asr-crdnn-rnnlm-librispeech
diff --git a/speechbrain/augment/augmenter.py b/speechbrain/augment/augmenter.py
index 25ea17480..55aee785c 100644
--- a/speechbrain/augment/augmenter.py
+++ b/speechbrain/augment/augmenter.py
@@ -438,20 +438,52 @@ class Augmenter(torch.nn.Module):
 
         return output, output_lengths
 
+    def replicate_multiple_labels(self, *args):
+        """
+        Replicates the labels along the batch axis a number of times that
+        corresponds to the number of augmentations. Indeed parallel and
+        concatenation augmentations alter the time dimension.
+
+        Arguments
+        ---------
+        args : torch.Tensor
+            Input label tensors to be replicated. Can be a uniq or a list of
+            Tensors.
+
+        Returns
+        -------
+        augmented_labels: torch.Tensor
+            Labels corresponding to the augmented input. Returns as many Tensor
+            as given in input.
+        """
+
+        # Determine whether to apply data augmentation
+        if not self.do_augment:
+            return args
+
+        list_of_augmented_labels = []
+
+        for labels in args:
+            list_of_augmented_labels.append(self.replicate_labels(labels))
+
+        return list_of_augmented_labels
+
     def replicate_labels(self, labels):
         """
         Replicates the labels along the batch axis a number of times that
-        corresponds to the number of augmentations.
+        corresponds to the number of augmentations. Indeed parallel and
+        concatenation augmentations alter the time dimension.
 
         Arguments
         ---------
         labels : torch.Tensor
-            Input label tensor to be replicated.
+            Input label tensors to be replicated.
 
         Returns
         -------
         augmented_labels: torch.Tensor
-            Labels corresponding to the augmented input.
+            Labels corresponding to the augmented input. Returns as many Tensor
+            as given in input.
         """
 
         # Determine whether to apply data augmentation
@@ -477,6 +509,7 @@ class Augmenter(torch.nn.Module):
         )
 
         augmented_labels = torch.cat(augmented_labels, dim=0)
+
         return augmented_labels
 
     def check_min_max_augmentations(self):
-- 
GitLab