diff --git a/main_lightning.ipynb b/main_lightning.ipynb
index 31cb0c90e9e2a9461e5e04fadca41e40544c97d6..d8c348bb835b33abca1244d72ffcff04239f82c6 100644
--- a/main_lightning.ipynb
+++ b/main_lightning.ipynb
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 43,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -38,7 +38,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Wed Jun 14 17:04:39 2023       \n",
+      "Fri Jun 16 07:18:31 2023       \n",
       "+---------------------------------------------------------------------------------------+\n",
       "| NVIDIA-SMI 531.29                 Driver Version: 531.29       CUDA Version: 12.1     |\n",
       "|-----------------------------------------+----------------------+----------------------+\n",
@@ -47,7 +47,7 @@
       "|                                         |                      |               MIG M. |\n",
       "|=========================================+======================+======================|\n",
       "|   0  NVIDIA GeForce RTX 2080 Ti    WDDM | 00000000:0E:00.0  On |                  N/A |\n",
-      "| 41%   49C    P8               43W / 260W|   2505MiB / 11264MiB |     17%      Default |\n",
+      "| 41%   45C    P8               37W / 260W|   3417MiB / 11264MiB |     23%      Default |\n",
       "|                                         |                      |                  N/A |\n",
       "+-----------------------------------------+----------------------+----------------------+\n",
       "                                                                                         \n",
@@ -56,39 +56,35 @@
       "|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n",
       "|        ID   ID                                                             Usage      |\n",
       "|=======================================================================================|\n",
-      "|    0   N/A  N/A      2412    C+G   ...inaries\\Win64\\EpicGamesLauncher.exe    N/A      |\n",
       "|    0   N/A  N/A      3144    C+G   ...a\\Local\\Mozilla Firefox\\firefox.exe    N/A      |\n",
       "|    0   N/A  N/A      3400    C+G   ..._x64__kzf8qxf38zg5c\\Skype\\Skype.exe    N/A      |\n",
       "|    0   N/A  N/A      3752    C+G   ...GeForce Experience\\NVIDIA Share.exe    N/A      |\n",
       "|    0   N/A  N/A      4240    C+G   ...1.0_x64__8wekyb3d8bbwe\\Video.UI.exe    N/A      |\n",
       "|    0   N/A  N/A      6468    C+G   ....Search_cw5n1h2txyewy\\SearchApp.exe    N/A      |\n",
       "|    0   N/A  N/A      6828    C+G   ...rm 2020.3.3\\jbr\\bin\\jcef_helper.exe    N/A      |\n",
-      "|    0   N/A  N/A      9500    C+G   ....0_x64__8wekyb3d8bbwe\\HxOutlook.exe    N/A      |\n",
       "|    0   N/A  N/A      9780    C+G   ..._x64__kzf8qxf38zg5c\\Skype\\Skype.exe    N/A      |\n",
+      "|    0   N/A  N/A     11044    C+G   ....0_x64__8wekyb3d8bbwe\\HxOutlook.exe    N/A      |\n",
       "|    0   N/A  N/A     11628    C+G   C:\\Windows\\explorer.exe                   N/A      |\n",
+      "|    0   N/A  N/A     11652    C+G   ...61.0_x64__8wekyb3d8bbwe\\GameBar.exe    N/A      |\n",
       "|    0   N/A  N/A     12416    C+G   ...2txyewy\\StartMenuExperienceHost.exe    N/A      |\n",
+      "|    0   N/A  N/A     12892    C+G   ..._8wekyb3d8bbwe\\Microsoft.Photos.exe    N/A      |\n",
       "|    0   N/A  N/A     14040    C+G   ...302.5.0_x64__8wekyb3d8bbwe\\Time.exe    N/A      |\n",
       "|    0   N/A  N/A     14792    C+G   ...GeForce Experience\\NVIDIA Share.exe    N/A      |\n",
       "|    0   N/A  N/A     16016    C+G   ...CBS_cw5n1h2txyewy\\TextInputHost.exe    N/A      |\n",
       "|    0   N/A  N/A     16612    C+G   ...ft Office\\root\\Office16\\OUTLOOK.EXE    N/A      |\n",
       "|    0   N/A  N/A     17024    C+G   ....Search_cw5n1h2txyewy\\SearchApp.exe    N/A      |\n",
-      "|    0   N/A  N/A     17124    C+G   ...oogle\\Chrome\\Application\\chrome.exe    N/A      |\n",
       "|    0   N/A  N/A     17368    C+G   ...l\\Microsoft\\Teams\\current\\Teams.exe    N/A      |\n",
       "|    0   N/A  N/A     20412    C+G   ...on\\114.0.1823.43\\msedgewebview2.exe    N/A      |\n",
       "|    0   N/A  N/A     20660    C+G   ...air\\Corsair iCUE5 Software\\iCUE.exe    N/A      |\n",
-      "|    0   N/A  N/A     23044    C+G   ...\\cef\\cef.win7x64\\steamwebhelper.exe    N/A      |\n",
+      "|    0   N/A  N/A     23236    C+G   ...ne\\Binaries\\Win64\\EpicWebHelper.exe    N/A      |\n",
       "|    0   N/A  N/A     23360    C+G   ...Canary\\app-1.0.66\\DiscordCanary.exe    N/A      |\n",
-      "|    0   N/A  N/A     24680    C+G   ...ne\\Binaries\\Win64\\EpicWebHelper.exe    N/A      |\n",
-      "|    0   N/A  N/A     25200    C+G   ...on\\wallpaper_engine\\wallpaper32.exe    N/A      |\n",
+      "|    0   N/A  N/A     25140    C+G   ...on\\wallpaper_engine\\wallpaper32.exe    N/A      |\n",
       "|    0   N/A  N/A     25596    C+G   ...e Stream\\76.0.3.0\\GoogleDriveFS.exe    N/A      |\n",
-      "|    0   N/A  N/A     25952    C+G   ..._8wekyb3d8bbwe\\Microsoft.Photos.exe    N/A      |\n",
       "|    0   N/A  N/A     26716    C+G   C:\\Program Files\\RaiderIO\\RaiderIO.exe    N/A      |\n",
       "|    0   N/A  N/A     27700    C+G   ...les (x86)\\Overwolf\\old_Overwolf.exe    N/A      |\n",
       "|    0   N/A  N/A     28444    C+G   ...cordPTB\\app-1.0.1027\\DiscordPTB.exe    N/A      |\n",
-      "|    0   N/A  N/A     29192    C+G   ...les (x86)\\Battle.net\\Battle.net.exe    N/A      |\n",
       "|    0   N/A  N/A     31192    C+G   ...wolf\\0.223.0.33\\OverwolfBrowser.exe    N/A      |\n",
       "|    0   N/A  N/A     31576    C+G   C:\\Program Files\\NordVPN\\NordVPN.exe      N/A      |\n",
-      "|    0   N/A  N/A     31956    C+G   ...ekyb3d8bbwe\\PhoneExperienceHost.exe    N/A      |\n",
       "|    0   N/A  N/A     32976    C+G   ...ft Office\\root\\Office16\\WINWORD.EXE    N/A      |\n",
       "|    0   N/A  N/A     34400    C+G   ...02.0_x86__zpdnekdrzrea0\\Spotify.exe    N/A      |\n",
       "|    0   N/A  N/A     34932    C+G   ...ft Office\\root\\Office16\\ONENOTE.EXE    N/A      |\n",
@@ -96,13 +92,19 @@
       "|    0   N/A  N/A     37420    C+G   ...l\\Microsoft\\Teams\\current\\Teams.exe    N/A      |\n",
       "|    0   N/A  N/A     37968    C+G   ...al\\Discord\\app-1.0.9013\\Discord.exe    N/A      |\n",
       "|    0   N/A  N/A     38508    C+G   ...t.LockApp_cw5n1h2txyewy\\LockApp.exe    N/A      |\n",
+      "|    0   N/A  N/A     40684    C+G   ...inaries\\Win64\\EpicGamesLauncher.exe    N/A      |\n",
       "|    0   N/A  N/A     42416    C+G   ...ft Office\\root\\Office16\\WINWORD.EXE    N/A      |\n",
-      "|    0   N/A  N/A     42952    C+G   ...crosoft\\Edge\\Application\\msedge.exe    N/A      |\n",
+      "|    0   N/A  N/A     42904    C+G   ...oogle\\Chrome\\Application\\chrome.exe    N/A      |\n",
       "|    0   N/A  N/A     44812    C+G   ...cal\\Microsoft\\OneDrive\\OneDrive.exe    N/A      |\n",
+      "|    0   N/A  N/A     44944    C+G   ...crosoft\\Edge\\Application\\msedge.exe    N/A      |\n",
+      "|    0   N/A  N/A     46752    C+G   ...\\cef\\cef.win7x64\\steamwebhelper.exe    N/A      |\n",
       "|    0   N/A  N/A     47144    C+G   ...a\\Local\\Mozilla Firefox\\firefox.exe    N/A      |\n",
       "|    0   N/A  N/A     47776    C+G   ...siveControlPanel\\SystemSettings.exe    N/A      |\n",
-      "|    0   N/A  N/A     49192    C+G   ...0_x64__8wekyb3d8bbwe\\HxAccounts.exe    N/A      |\n",
-      "|    0   N/A  N/A     49296    C+G   ...sair iCUE5 Software\\QmlRenderer.exe    N/A      |\n",
+      "|    0   N/A  N/A     47892    C+G   ...0_x64__8wekyb3d8bbwe\\HxAccounts.exe    N/A      |\n",
+      "|    0   N/A  N/A     52648      C   ...\\uwu\\miniconda3\\envs\\uni\\python.exe    N/A      |\n",
+      "|    0   N/A  N/A     53120    C+G   ...sair iCUE5 Software\\QmlRenderer.exe    N/A      |\n",
+      "|    0   N/A  N/A     57048    C+G   ...ager\\Mendeley Reference Manager.exe    N/A      |\n",
+      "|    0   N/A  N/A     58088      C   ...\\uwu\\miniconda3\\envs\\uni\\python.exe    N/A      |\n",
       "+---------------------------------------------------------------------------------------+\n"
      ]
     }
@@ -122,7 +124,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 44,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -250,8 +252,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
+   "execution_count": 64,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
+      "Requirement already satisfied: wandb in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (0.15.4)\n",
+      "Requirement already satisfied: docker-pycreds>=0.4.0 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (0.4.0)\n",
+      "Requirement already satisfied: protobuf!=4.21.0,<5,>=3.19.0 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (3.20.3)\n",
+      "Requirement already satisfied: GitPython!=3.1.29,>=1.0.0 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (3.1.31)\n",
+      "Requirement already satisfied: psutil>=5.0.0 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (5.9.0)\n",
+      "Requirement already satisfied: appdirs>=1.4.3 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (1.4.4)\n",
+      "Requirement already satisfied: pathtools in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (0.1.2)\n",
+      "Requirement already satisfied: setproctitle in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (1.3.2)\n",
+      "Requirement already satisfied: sentry-sdk>=1.0.0 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (1.25.1)\n",
+      "Requirement already satisfied: requests<3,>=2.0.0 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (2.28.1)\n",
+      "Requirement already satisfied: setuptools in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (65.5.0)\n",
+      "Requirement already satisfied: Click!=8.0.0,>=7.0 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (8.1.3)\n",
+      "Requirement already satisfied: PyYAML in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (6.0)\n",
+      "Requirement already satisfied: typing-extensions in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from wandb) (4.3.0)\n",
+      "Requirement already satisfied: colorama in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from Click!=8.0.0,>=7.0->wandb) (0.4.5)\n",
+      "Requirement already satisfied: six>=1.4.0 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from docker-pycreds>=0.4.0->wandb) (1.16.0)\n",
+      "Requirement already satisfied: gitdb<5,>=4.0.1 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from GitPython!=3.1.29,>=1.0.0->wandb) (4.0.10)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from requests<3,>=2.0.0->wandb) (3.4)\n",
+      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from requests<3,>=2.0.0->wandb) (1.26.12)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from requests<3,>=2.0.0->wandb) (2022.12.7)\n",
+      "Requirement already satisfied: charset-normalizer<3,>=2 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from requests<3,>=2.0.0->wandb) (2.0.4)\n",
+      "Requirement already satisfied: smmap<6,>=3.0.1 in c:\\users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages (from gitdb<5,>=4.0.1->GitPython!=3.1.29,>=1.0.0->wandb) (5.0.0)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": "True"
+     },
+     "execution_count": 64,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "!pip install wandb\n",
     "import wandb\n",
@@ -277,7 +317,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 46,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -313,7 +353,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 47,
    "metadata": {
     "id": "S_hdzQw7SJcf",
     "pycharm": {
@@ -366,7 +406,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 48,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -419,7 +459,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 49,
    "metadata": {
     "id": "F1B-z30LSJch",
     "pycharm": {
@@ -446,7 +486,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 50,
    "metadata": {
     "id": "CdN1RkZISJci",
     "pycharm": {
@@ -476,7 +516,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 51,
    "metadata": {
     "id": "ewZoXDzfSJcj",
     "pycharm": {
@@ -485,9 +525,9 @@
    },
    "outputs": [],
    "source": [
-    "train_split_percentage = 100  # percentage of SPLIT\n",
-    "validate_split_percentage = 10\n",
-    "test_split_percentage = 10"
+    "train_split_percentage = 1  # percentage of SPLIT\n",
+    "validate_split_percentage = 1\n",
+    "test_split_percentage = 1"
    ]
   },
   {
@@ -503,7 +543,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 52,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -530,7 +570,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 53,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -561,15 +601,15 @@
       "Adding time to model output: True\n",
       "\n",
       "Dataset configuration:\n",
-      "Train Split Percentage: 100\n",
-      "Validation Split Percentage: 10\n",
-      "Test Split Percentage: 10\n",
+      "Train Split Percentage: 1\n",
+      "Validation Split Percentage: 1\n",
+      "Test Split Percentage: 1\n",
       "\n",
       "Training configuration:\n",
       "Number of training epochs: 8\n",
       "Number of k-folds: 2\n",
       "Batch size: 64\n",
-      "Mixed Precision: 16\n",
+      "Mixed Precision: 16-mixed\n",
       "Using Lightning: True\n"
      ]
     }
@@ -618,7 +658,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 54,
    "metadata": {
     "id": "AeBz4MDhSJcl",
     "pycharm": {
@@ -644,7 +684,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 55,
    "metadata": {
     "id": "VE40qSLQSJcl",
     "pycharm": {
@@ -670,7 +710,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 56,
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
@@ -696,7 +736,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 57,
    "metadata": {
     "id": "r4QqkQRHSJcn",
     "pycharm": {
@@ -719,7 +759,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 58,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -746,45 +786,21 @@
      "output_type": "stream",
      "text": [
       "Dataset configuration:\n",
-      "Train Split Percentage: 100\n",
-      "Validation Split Percentage: 10\n",
-      "Test Split Percentage: 10\n",
+      "Train Split Percentage: 1\n",
+      "Validation Split Percentage: 1\n",
+      "Test Split Percentage: 1\n",
       "\n",
-      "Loading cnn_dailymail dataset 3.0.0 with split type: train[:100%]\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loading cnn_dailymail dataset 3.0.0 with split type: validation[:10%]\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loading cnn_dailymail dataset 3.0.0 with split type: test[:10%]\n"
+      "Loading cnn_dailymail dataset 3.0.0 with split type: train[:1%]\n",
+      "Loading cnn_dailymail dataset 3.0.0 with split type: validation[:1%]\n",
+      "Loading cnn_dailymail dataset 3.0.0 with split type: test[:1%]\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)\n",
+      "Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)\n",
       "Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)\n"
      ]
     }
@@ -812,7 +828,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 59,
    "metadata": {
     "id": "6KCU1KIcSJco",
     "pycharm": {
@@ -837,7 +853,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 59,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -872,7 +888,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 60,
    "metadata": {
     "id": "3RcbH9C6SJcp",
     "pycharm": {
@@ -895,7 +911,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 61,
    "metadata": {
     "id": "ilLDPafbSJcq",
     "pycharm": {
@@ -921,7 +937,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 62,
    "metadata": {
     "id": "hf3b7EULSJcq",
     "pycharm": {
@@ -960,7 +976,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 63,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -1028,96 +1044,52 @@
       "Adding time to model output: True\n",
       "\n",
       "Dataset configuration:\n",
-      "Train Split Percentage: 100\n",
-      "Validation Split Percentage: 10\n",
-      "Test Split Percentage: 10\n",
+      "Train Split Percentage: 1\n",
+      "Validation Split Percentage: 1\n",
+      "Test Split Percentage: 1\n",
       "\n",
       "Training configuration:\n",
       "Number of training epochs: 8\n",
       "Number of k-folds: 2\n",
       "Batch size: 64\n",
-      "Mixed Precision: 16\n",
+      "Mixed Precision: 16-mixed\n",
+      "Using Lightning: True\n",
+      "Loading cnn_dailymail dataset 3.0.0 with split type: train[:1%]\n",
+      "Loading cnn_dailymail dataset 3.0.0 with split type: validation[:1%]\n",
+      "Loading cnn_dailymail dataset 3.0.0 with split type: test[:1%]\n",
+      "Pad token is: 0\n",
+      "Pad token is: 0\n",
+      "Pad token is: 0\n",
+      "Pad token is: 0\n",
+      "Program configuration:\n",
+      "Verbose Level: 1\n",
+      "Adding time to model output: True\n",
+      "\n",
+      "Dataset configuration:\n",
+      "Train Split Percentage: 1\n",
+      "Validation Split Percentage: 1\n",
+      "Test Split Percentage: 1\n",
+      "\n",
+      "Training configuration:\n",
+      "Number of training epochs: 8\n",
+      "Number of k-folds: 2\n",
+      "Batch size: 64\n",
+      "Mixed Precision: 16-mixed\n",
       "Using Lightning: True\n"
      ]
     },
     {
      "data": {
-      "text/html": [
-       "<div style=\"display:none\">\n",
-       "                <audio onended=\"this.parentNode.removeChild(this)\"  controls=\"controls\" autoplay=\"autoplay\">\n",
-       "                    <source src=\"data:audio/wav;base64,UklGRl4RAABXQVZFZm10IBAAAAABAAEARKwAAIhYAQACABAAZGF0YToRAAAAAAUIAxDwF8YfeycJL2c2jz15RB1LdlF9VyxdfmJsZ/JrC3Czc+d2pHnme6t98X64f/5/xH8Jf859FHzdeSx3A3RlcFZs2Wf1YqxdBVgGUrRLFkUzPhA3ty8tKHsgqBi8EMAIugC1+Lfwx+jv4DfZpdFCyhXDJrx6tRqvC6lUo/qdA5lzlFCQnYxeiZeGSoR5gieBVYADgDGA4YAQgr+D6oWQiK+LQo9Hk7qXlZzVoXOna621s026KsFHyJzPItfR3qHmi+6G9ov+kAaQDoEWWx4XJq0tFDVGPDxD7klVUGtWK1yNYY5mJ2tUbxJzW3YteYZ7Yn2/fp5/+3/YfzR/EH5ufE56snefdBZxG22yaOBjqV4TWSRT4ExQRng/YTgRMY8p5CEWGi4SNAowAir6KvI36lrim9oC05bLX8Rjvau2PbAfqlek7J7imUCVCJFBjeyJD4eshMSCW4FxgAiAH4C3gNCBZ4N8hQyIFYuTjoSS45asm9mgZ6ZPrIuyFbnmv/jGQ87B1WjdM+UZ7RL1Ff0bBRwNERXwHLIkTyy/M/w6/UG7SDFPVlUmW5pgrWVZappubHLLdbN4InsVfYl+f3/0f+h/XH9PfsN8uno1eDd1w3HcbYdpyGSjXx5aPlQKTodHu0CvOWky8CpLI4MboBOpC6YDoPud86jrxuMB3GHU7MyqxaS+37djsTWrXqXhn8WaEJbEkeiNf4qMhxKFFIOTgZKAEYARgJKAk4EUgxKFjId/iuiNxJEQlsWa4Z9epTWrY7Hft6S+qsXszGHUAdzG46jrnfOg+6YDqQugE4MbSyPwKmkyrzm7QIdHCk4+VB5ao1/IZIdp3G3DcTd1NXi6esN8T35cf+h/9H9/f4l+FX0ie7N4y3VscppuWWqtZZpgJltWVTFPu0j9Qfw6vzNPLLIk8BwRFRwNGwUV/RL1Ge0z5WjdwdVDzvjG5r8VuYuyT6xnptmgrJvjloSSk44ViwyIfIVng9CBt4AfgAiAcYBbgcSCrIQPh+yJQY0IkUCV4pnsnlekH6o9sKu2Y71fxJbLAtOb2lriN+oq8ir6MAI0Ci4SFhrkIY8pETFhOHg/UEbgTCRTE1mpXuBjsmgbbRZxn3Syd056bnwQfjR/2H/7f55/v35ifYZ7LXlbdhJzVG8na45mjWErXGtWVVDuSTxDRjwUNa0tFyZbHoEWkA6QBov+hvaL7qHm0d4i15zPR8gqwU26tbNrrXOn1aGVnLqXR5NCj6+LkIjqhb+DEILhgDGAA4BVgCeBeYJKhJeGXomdjFCQc5QDmfqdVKMLqRqverUmvBXDQsql0TfZ7+DH6Lfwtfi6AMAIvBCoGHsgLSi3LxA3Mz4WRbRLBlIFWKxd9WLZZ1ZsZXADdCx33XkUfM59CX/Ef/5/uH/xfqt95nukeed2s3MLcPJrbGd+YixdfVd2UR1LeUSPPWc2CS97J8Yf8BcDEAUIAAD79/3vEOg64IXY99CZyXHCh7vjtIqug6jUooKdlJgOlPWPTYwZiVyGGoRVgg+BSIACgDyA94AyguyDI4bUiP2Lm4+qkyeYC51Uovun+q1MtOq6zcHwyEnQ09eF31jnRO9A90b/SwdJDzkXER/JJlsuvjXrPNpDhkrmUPVWrFwGYv1mjWuwb2NzonZpebZ7h33Zfqt//X/Pfx9/8H1BfBZ6cHdRdL5wuWxGaGtjK16NWJVSS0yzRdY+uTdkMN4oLyFfGXURegl1AXD5cPF/6aXh6dlT0uzKusPEvBK2q6+VqdWjc55ymdmUrJDujKWJ04Z6hJ6CQYFigAWAKIDMgPCBkoOyhU6IYYvqjuWSTpcgnFeh7abcrCCzsLmIwJ/H785x1hze6uXS7cz10P3WBdYNyRWmHWUl/ixqNKE7nUJVScNP4VWpWxRhHmbAavhuv3IUdvF4VHs8faV+j3/4f+F/SX8wfpl8hHr0d+t0bXF8bR1pVGQnX5lZsVN1TetGGkAIOb0xPyqYIs0a5xLuCusC5frk8u/qEONO27HTQcwExQO+RbfPsKqq2qRmn1Oap5VmkZSNNYpNh96E64J3gYGADIAYgKSAsYE9g0aFy4fJij2OJJJ5ljibXaDipcKr9rF5uEW/UcaXzRDVtdx95GDsV/Ra/GAEYwxYFDoc/yOfKxQzVjpcQSFInU7LVKJaH2A7ZfBpPG4YcoF1dHjueux8bX5uf+9/739uf21+7HzuenR4gXUYcjxu8Gk7ZR9golrLVJ1OIUhcQVY6FDOfK/8jOhxYFGMMYARa/Ff0YOx95LXcENWXzVHGRb95uPaxwqvipV2gOJt5liSSPY7JisuHRoU9g7GBpIAYgAyAgYB3geuC3oRNhzWKlI1mkaeVU5pmn9qkqqrPsEW3A74ExUHMsdNO2xDj7+rk8uX66wLuCucSzRqYIj8qvTEIORpA60Z1TbFTmVknX1RkHWl8bW1x63T0d4R6mXwwfkl/4X/4f49/pX48fVR78XgUdr9y+G7Aah5mFGGpW+FVw09VSZ1CoTtqNP4sZSWmHckV1g3WBdD9zPXS7erlHN5x1u/On8eIwLC5ILPcrO2mV6EgnE6X5ZLqjmGLToiyhZKD8IHMgCiABYBigEGBnoJ6hNOGpYnujKyQ2ZRymXOe1aOVqauvErbEvLrD7MpT0unZpeF/6XDxcPl1AXoJdRFfGS8h3ihkMLk31j6zRUtMlVKNWCtea2NGaLlsvnBRdHB3FnpBfPB9H3/Pf/1/q3/Zfod9tntpeaJ2Y3Owb41r/WYGYqxc9VbmUIZK2kPrPL41Wy7JJhEfORdJD0sHRv9A90TvWOeF39PXSdDwyM3B6rpMtPqt+6dUogudJ5iqk5uP/YvUiCOG7IMygveAPIACgEiAD4FVghqEXIYZiU2M9Y8OlJSYgp3UooOoiq7jtIe7ccKZyffQhdg64BDo/e/79wAABQgDEPAXxh97JwkvZzaPPXlEHUt2UX1XLF1+Ymxn8msLcLNz53akeeZ7q33xfrh//n/Efwl/zn0UfN15LHcDdGVwVmzZZ/VirF0FWAZStEsWRTM+EDe3Ly0oeyCoGLwQwAi6ALX4t/DH6O/gN9ml0ULKFcMmvHq1Gq8LqVSj+p0DmXOUUJCdjF6Jl4ZKhHmCJ4FVgAOAMYDhgBCCv4PqhZCIr4tCj0eTupeVnNWhc6drrbWzTboqwUfInM8i19HeoeaL7ob2i/6QBpAOgRZbHhcmrS0UNUY8PEPuSVVQa1YrXI1hjmYna1RvEnNbdi15hntifb9+nn/7f9h/NH8Qfm58Tnqyd590FnEbbbJo4GOpXhNZJFPgTFBGeD9hOBExjynkIRYaLhI0CjACKvoq8jfqWuKb2gLTlstfxGO9q7Y9sB+qV6TsnuKZQJUIkUGN7IkPh6yExIJbgXGACIAfgLeA0IFng3yFDIgVi5OOhJLjlqyb2aBnpk+si7IVuea/+MZDzsHVaN0z5RntEvUV/RsFHA0RFfAcsiRPLL8z/Dr9QbtIMU9WVSZbmmCtZVlqmm5scst1s3giexV9iX5/f/R/6H9cf09+w3y6ejV4N3XDcdxth2nIZKNfHlo+VApOh0e7QK85aTLwKksjgxugE6kLpgOg+53zqOvG4wHcYdTszKrFpL7ft2OxNatepeGfxZoQlsSR6I1/ioyHEoUUg5OBkoARgBGAkoCTgRSDEoWMh3+K6I3EkRCWxZrhn16lNatjsd+3pL6qxezMYdQB3MbjqOud86D7pgOpC6ATgxtLI/AqaTKvObtAh0cKTj5UHlqjX8hkh2ncbcNxN3U1eLp6w3xPflx/6H/0f39/iX4VfSJ7s3jLdWxymm5Zaq1lmmAmW1ZVMU+7SP1B/Dq/M08ssiTwHBEVHA0bBRX9EvUZ7TPlaN3B1UPO+MbmvxW5i7JPrGem2aCsm+OWhJKTjhWLDIh8hWeD0IG3gB+ACIBxgFuBxIKshA+H7IlBjQiRQJXimeyeV6Qfqj2wq7ZjvV/ElssC05vaWuI36iryKvowAjQKLhIWGuQhjykRMWE4eD9QRuBMJFMTWale4GOyaBttFnGfdLJ3TnpufBB+NH/Yf/t/nn+/fmJ9hnsteVt2EnNUbydrjmaNYStca1ZVUO5JPENGPBQ1rS0XJlsegRaQDpAGi/6G9ovuoebR3iLXnM9HyCrBTbq1s2utc6fVoZWcupdHk0KPr4uQiOqFv4MQguGAMYADgFWAJ4F5gkqEl4ZeiZ2MUJBzlAOZ+p1UowupGq96tSa8FcNCyqXRN9nv4Mfot/C1+LoAwAi8EKgYeyAtKLcvEDczPhZFtEsGUgVYrF31YtlnVmxlcAN0LHfdeRR8zn0Jf8R//n+4f/F+q33me6R553azcwtw8mtsZ35iLF19V3ZRHUt5RI89ZzYJL3snxh/wFwMQBQgAAPv3/e8Q6Drghdj30JnJccKHu+O0iq6DqNSigp2UmA6U9Y9NjBmJXIYahFWCD4FIgAGAPID3gDKC7IMjhtSI/Yubj6qTJ5gLnVSi+6f6rUy06rrNwfDISdDT14XfWOdE70D3Rv9LB0kPORcRH8kmWy6+Nes82kOGSuZQ9VasXAZi/WaNa7BvY3Oidml5tnuHfdl+q3/9f89/H3/wfUF8Fnpwd1F0vnC5bEZoa2MrXo1YlVJLTLNF1j65N2Qw3igvIV8ZdRF6CXUBcPlw8X/ppeHp2VPS7Mq6w8S8Erarr5Wp1aNznnKZ2ZSskO6MpYnThnqEnoJBgWKABYAogMyA8IGSg7KFTohhi+qO5ZJOlyCcV6HtptysILOwuYjAn8fvznHWHN7q5dLtzPXQ/dYF1g3JFaYdZSX+LGo0oTudQlVJw0/hValbFGEeZsBq+G6/chR28XhUezx9pX6Pf/h/4X9JfzB+mXyEevR363RtcXxtHWlUZCdfmVmxU3VN60YaQAg5vTE/KpgizRrnEu4K6wLl+uTy7+oQ407bsdNBzATFA75Ft8+wqqrapGafU5qnlWaRlI01ik2H3oTrgneBgYAMgBiApICxgT2DRoXLh8mKPY4kknmWOJtdoOKlwqv2sXm4Rb9RxpfNENW13H3kYOxX9Fr8YARjDFgUOhz/I58rFDNWOlxBIUidTstUolofYDtl8Gk8bhhygXV0eO567Hxtfm5/73/vf25/bX7sfO56dHiBdRhyPG7waTtlH2CiWstUnU4hSFxBVjoUM58r/yM6HFgUYwxgBFr8V/Rg7H3ktdwQ1ZfNUcZFv3m49rHCq+KlXaA4m3mWJJI9jsmKy4dGhT2DsYGkgBiADICBgHeB64LehE2HNYqUjWaRp5VTmmaf2qSqqs+wRbcDvgTFQcyx007bEOPv6uTy5frrAu4K5xLNGpgiPyq9MQg5GkDrRnVNsVOZWSdfVGQdaXxtbXHrdPR3hHqZfDB+SX/hf/h/j3+lfjx9VHvxeBR2v3L4bsBqHmYUYalb4VXDT1VJnUKhO2o0/ixlJaYdyRXWDdYF0P3M9dLt6uUc3nHW786fx4jAsLkgs9ys7aZXoSCcTpflkuqOYYtOiLKFkoPwgcyAKIAFgGKAQYGegnqE04alie6MrJDZlHKZc57Vo5Wpq68StsS8usPsylPS6dml4X/pcPFw+XUBegl1EV8ZLyHeKGQwuTfWPrNFS0yVUo1YK15rY0ZouWy+cFF0cHcWekF88H0ff89//X+rf9l+h322e2l5onZjc7BvjWv9ZgZirFz1VuZQhkraQ+s8vjVbLskmER85F0kPSwdG/0D3RO9Y54Xf09dJ0PDIzcHquky0+q37p1SiC50nmKqTm4/9i9SII4bsgzKC94A8gAKASIAPgVWCGoRchhmJTYz1jw6UlJiCndSig6iKruO0h7txwpnJ99CF2DrgEOj97/v3AAA=\" type=\"audio/wav\" />\n",
-       "                    Your browser does not support the audio element.\n",
-       "                </audio>\n",
-       "              </div>"
-      ],
-      "text/plain": [
-       "<jupyter_beeper.Beeper.InvisibleAudio object>"
-      ]
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": ""
      },
      "metadata": {},
      "output_type": "display_data"
     },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']\n",
-      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
-      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']\n",
-      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
-      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
-      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']\n",
-      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
-      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Training BertBiLSTM\n",
-      "Available GPUs: 1\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages\\lightning_fabric\\connector.py:555: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!\n",
-      "  rank_zero_warn(\n",
-      "Using 16bit Automatic Mixed Precision (AMP)\n",
-      "GPU available: True (cuda), used: True\n",
-      "TPU available: False, using: 0 TPU cores\n",
-      "IPU available: False, using: 0 IPUs\n",
-      "HPU available: False, using: 0 HPUs\n",
-      "C:\\Users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages\\pytorch_lightning\\trainer\\connectors\\logger_connector\\logger_connector.py:67: UserWarning: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default\n",
-      "  warning_cache.warn(\n",
-      "C:\\Users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages\\pytorch_lightning\\callbacks\\model_checkpoint.py:615: UserWarning: Checkpoint directory C:\\Users\\uwu\\PycharmProjects\\COMP3200\\Models exists and is not empty.\n",
-      "  rank_zero_warn(f\"Checkpoint directory {dirpath} exists and is not empty.\")\n",
-      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
-      "\n",
-      "  | Name      | Type       | Params\n",
-      "-----------------------------------------\n",
-      "0 | model     | BertBiLSTM | 165 M \n",
-      "1 | criterion | NLLLoss    | 0     \n",
-      "-----------------------------------------\n",
-      "56.4 M    Trainable params\n",
-      "109 M     Non-trainable params\n",
-      "165 M     Total params\n",
-      "663.376   Total estimated model params size (MB)\n"
-     ]
-    },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cddf8252852d4c4db3a905e2d7c4b8f4",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Sanity Checking: 0it [00:00, ?it/s]"
-      ]
+      "text/plain": "<IPython.core.display.HTML object>",
+      "text/html": ""
      },
      "metadata": {},
      "output_type": "display_data"
@@ -1126,62 +1098,39 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "C:\\Users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages\\pytorch_lightning\\trainer\\call.py:52: UserWarning: Detected KeyboardInterrupt, attempting graceful shutdown...\n",
-      "  rank_zero_warn(\"Detected KeyboardInterrupt, attempting graceful shutdown...\")\n",
-      "Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x0000020B93049EE0>\n",
-      "Traceback (most recent call last):\n",
-      "  File \"C:\\Users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages\\torch\\utils\\data\\dataloader.py\", line 1478, in __del__\n",
-      "    self._shutdown_workers()\n",
-      "  File \"C:\\Users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages\\torch\\utils\\data\\dataloader.py\", line 1436, in _shutdown_workers\n",
-      "    if self._persistent_workers or self._workers_status[worker_id]:\n",
-      "AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status'\n",
-      "C:\\Users\\uwu\\miniconda3\\envs\\uni\\lib\\site-packages\\lightning_fabric\\connector.py:555: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!\n",
-      "  rank_zero_warn(\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Training BertDoubleDense\n",
-      "Available GPUs: 1\n"
+      "Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)\n",
+      "Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)\n",
+      "Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)\n",
+      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']\n",
+      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']\n",
+      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.encoder.layer.6.crossattention.self.key.bias', 'bert.encoder.layer.10.crossattention.output.dense.bias', 'bert.encoder.layer.2.crossattention.self.query.weight', 'bert.encoder.layer.0.crossattention.output.dense.bias', 'bert.encoder.layer.5.crossattention.self.query.weight', 'bert.encoder.layer.4.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.0.crossattention.self.query.weight', 'bert.encoder.layer.2.crossattention.self.key.bias', 'bert.encoder.layer.6.crossattention.output.dense.weight', 'bert.encoder.layer.10.crossattention.self.value.bias', 'bert.encoder.layer.11.crossattention.self.value.bias', 'bert.encoder.layer.8.crossattention.self.value.bias', 'bert.encoder.layer.4.crossattention.self.key.bias', 'bert.encoder.layer.7.crossattention.output.dense.bias', 'bert.encoder.layer.0.crossattention.output.dense.weight', 'bert.encoder.layer.5.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.11.crossattention.output.dense.weight', 'bert.encoder.layer.7.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.4.crossattention.self.query.bias', 'bert.encoder.layer.9.crossattention.output.dense.bias', 'bert.encoder.layer.2.crossattention.output.dense.weight', 'bert.encoder.layer.4.crossattention.self.query.weight', 'bert.encoder.layer.10.crossattention.self.query.bias', 'bert.encoder.layer.0.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.5.crossattention.self.key.weight', 'bert.encoder.layer.7.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.9.crossattention.self.key.bias', 'bert.encoder.layer.11.crossattention.self.value.weight', 'bert.encoder.layer.0.crossattention.self.value.weight', 'bert.encoder.layer.3.crossattention.self.query.bias', 'bert.encoder.layer.10.crossattention.self.value.weight', 'bert.encoder.layer.3.crossattention.self.key.bias', 'bert.encoder.layer.9.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.2.crossattention.self.query.bias', 'bert.encoder.layer.10.crossattention.self.key.bias', 'bert.encoder.layer.3.crossattention.self.value.bias', 'bert.encoder.layer.11.crossattention.self.query.bias', 'bert.encoder.layer.3.crossattention.self.key.weight', 'bert.encoder.layer.5.crossattention.self.key.bias', 'bert.encoder.layer.9.crossattention.self.value.weight', 'bert.encoder.layer.10.crossattention.self.query.weight', 'bert.encoder.layer.0.crossattention.self.key.weight', 'bert.encoder.layer.9.crossattention.self.value.bias', 'bert.encoder.layer.3.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.1.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.9.crossattention.self.key.weight', 'bert.encoder.layer.6.crossattention.self.value.bias', 'bert.encoder.layer.9.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.9.crossattention.output.dense.weight', 'bert.encoder.layer.8.crossattention.self.query.weight', 'bert.encoder.layer.1.crossattention.self.query.weight', 'bert.encoder.layer.10.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.3.crossattention.output.dense.weight', 'bert.encoder.layer.4.crossattention.self.key.weight', 'bert.encoder.layer.4.crossattention.output.dense.bias', 'bert.encoder.layer.10.crossattention.output.dense.weight', 'bert.encoder.layer.10.crossattention.self.key.weight', 'bert.encoder.layer.8.crossattention.self.key.weight', 'bert.encoder.layer.9.crossattention.self.query.bias', 'bert.encoder.layer.5.crossattention.self.query.bias', 'bert.encoder.layer.1.crossattention.self.key.bias', 'bert.encoder.layer.1.crossattention.output.dense.bias', 'bert.encoder.layer.8.crossattention.output.dense.weight', 'bert.encoder.layer.1.crossattention.self.value.bias', 'bert.encoder.layer.2.crossattention.output.dense.bias', 'bert.encoder.layer.11.crossattention.self.key.bias', 'bert.encoder.layer.7.crossattention.self.key.weight', 'bert.encoder.layer.11.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.2.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.3.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.8.crossattention.self.key.bias', 'bert.encoder.layer.6.crossattention.self.value.weight', 'bert.encoder.layer.1.crossattention.output.dense.weight', 'bert.encoder.layer.0.crossattention.self.query.bias', 'bert.encoder.layer.1.crossattention.self.key.weight', 'bert.encoder.layer.10.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.5.crossattention.output.dense.bias', 'bert.encoder.layer.3.crossattention.output.dense.bias', 'bert.encoder.layer.8.crossattention.output.dense.bias', 'bert.encoder.layer.6.crossattention.self.key.weight', 'bert.encoder.layer.0.crossattention.self.value.bias', 'bert.encoder.layer.6.crossattention.self.query.weight', 'bert.encoder.layer.1.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.4.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.7.crossattention.self.query.weight', 'bert.encoder.layer.6.crossattention.self.query.bias', 'bert.encoder.layer.11.crossattention.output.dense.bias', 'bert.encoder.layer.3.crossattention.self.query.weight', 'bert.encoder.layer.8.crossattention.self.value.weight', 'bert.encoder.layer.9.crossattention.self.query.weight', 'bert.encoder.layer.8.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.4.crossattention.self.value.weight', 'bert.encoder.layer.4.crossattention.output.dense.weight', 'bert.encoder.layer.2.crossattention.self.key.weight', 'bert.encoder.layer.5.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.1.crossattention.self.query.bias', 'bert.encoder.layer.2.crossattention.self.value.weight', 'bert.encoder.layer.7.crossattention.self.value.weight', 'bert.encoder.layer.8.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.11.crossattention.self.query.weight', 'bert.encoder.layer.11.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.7.crossattention.output.dense.weight', 'bert.encoder.layer.2.crossattention.self.value.bias', 'bert.encoder.layer.4.crossattention.self.value.bias', 'bert.encoder.layer.5.crossattention.self.value.weight', 'bert.encoder.layer.7.crossattention.self.key.bias', 'bert.encoder.layer.0.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.6.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.2.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.7.crossattention.self.value.bias', 'bert.encoder.layer.0.crossattention.self.key.bias', 'bert.encoder.layer.11.crossattention.self.key.weight', 'bert.encoder.layer.5.crossattention.self.value.bias', 'bert.encoder.layer.7.crossattention.self.query.bias', 'bert.encoder.layer.6.crossattention.output.dense.bias', 'bert.encoder.layer.1.crossattention.self.value.weight', 'bert.encoder.layer.8.crossattention.self.query.bias', 'bert.encoder.layer.6.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.3.crossattention.self.value.weight', 'bert.encoder.layer.5.crossattention.output.dense.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']\n",
+      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.encoder.layer.6.crossattention.self.key.bias', 'bert.encoder.layer.10.crossattention.output.dense.bias', 'bert.encoder.layer.2.crossattention.self.query.weight', 'bert.encoder.layer.0.crossattention.output.dense.bias', 'bert.encoder.layer.5.crossattention.self.query.weight', 'bert.encoder.layer.4.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.0.crossattention.self.query.weight', 'bert.encoder.layer.2.crossattention.self.key.bias', 'bert.encoder.layer.6.crossattention.output.dense.weight', 'bert.encoder.layer.10.crossattention.self.value.bias', 'bert.encoder.layer.11.crossattention.self.value.bias', 'bert.encoder.layer.8.crossattention.self.value.bias', 'bert.encoder.layer.4.crossattention.self.key.bias', 'bert.encoder.layer.7.crossattention.output.dense.bias', 'bert.encoder.layer.0.crossattention.output.dense.weight', 'bert.encoder.layer.5.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.11.crossattention.output.dense.weight', 'bert.encoder.layer.7.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.4.crossattention.self.query.bias', 'bert.encoder.layer.9.crossattention.output.dense.bias', 'bert.encoder.layer.2.crossattention.output.dense.weight', 'bert.encoder.layer.4.crossattention.self.query.weight', 'bert.encoder.layer.10.crossattention.self.query.bias', 'bert.encoder.layer.0.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.5.crossattention.self.key.weight', 'bert.encoder.layer.7.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.9.crossattention.self.key.bias', 'bert.encoder.layer.11.crossattention.self.value.weight', 'bert.encoder.layer.0.crossattention.self.value.weight', 'bert.encoder.layer.3.crossattention.self.query.bias', 'bert.encoder.layer.10.crossattention.self.value.weight', 'bert.encoder.layer.3.crossattention.self.key.bias', 'bert.encoder.layer.9.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.2.crossattention.self.query.bias', 'bert.encoder.layer.10.crossattention.self.key.bias', 'bert.encoder.layer.3.crossattention.self.value.bias', 'bert.encoder.layer.11.crossattention.self.query.bias', 'bert.encoder.layer.3.crossattention.self.key.weight', 'bert.encoder.layer.5.crossattention.self.key.bias', 'bert.encoder.layer.9.crossattention.self.value.weight', 'bert.encoder.layer.10.crossattention.self.query.weight', 'bert.encoder.layer.0.crossattention.self.key.weight', 'bert.encoder.layer.9.crossattention.self.value.bias', 'bert.encoder.layer.3.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.1.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.9.crossattention.self.key.weight', 'bert.encoder.layer.6.crossattention.self.value.bias', 'bert.encoder.layer.9.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.9.crossattention.output.dense.weight', 'bert.encoder.layer.8.crossattention.self.query.weight', 'bert.encoder.layer.1.crossattention.self.query.weight', 'bert.encoder.layer.10.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.3.crossattention.output.dense.weight', 'bert.encoder.layer.4.crossattention.self.key.weight', 'bert.encoder.layer.4.crossattention.output.dense.bias', 'bert.encoder.layer.10.crossattention.output.dense.weight', 'bert.encoder.layer.10.crossattention.self.key.weight', 'bert.encoder.layer.8.crossattention.self.key.weight', 'bert.encoder.layer.9.crossattention.self.query.bias', 'bert.encoder.layer.5.crossattention.self.query.bias', 'bert.encoder.layer.1.crossattention.self.key.bias', 'bert.encoder.layer.1.crossattention.output.dense.bias', 'bert.encoder.layer.8.crossattention.output.dense.weight', 'bert.encoder.layer.1.crossattention.self.value.bias', 'bert.encoder.layer.2.crossattention.output.dense.bias', 'bert.encoder.layer.11.crossattention.self.key.bias', 'bert.encoder.layer.7.crossattention.self.key.weight', 'bert.encoder.layer.11.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.2.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.3.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.8.crossattention.self.key.bias', 'bert.encoder.layer.6.crossattention.self.value.weight', 'bert.encoder.layer.1.crossattention.output.dense.weight', 'bert.encoder.layer.0.crossattention.self.query.bias', 'bert.encoder.layer.1.crossattention.self.key.weight', 'bert.encoder.layer.10.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.5.crossattention.output.dense.bias', 'bert.encoder.layer.3.crossattention.output.dense.bias', 'bert.encoder.layer.8.crossattention.output.dense.bias', 'bert.encoder.layer.6.crossattention.self.key.weight', 'bert.encoder.layer.0.crossattention.self.value.bias', 'bert.encoder.layer.6.crossattention.self.query.weight', 'bert.encoder.layer.1.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.4.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.7.crossattention.self.query.weight', 'bert.encoder.layer.6.crossattention.self.query.bias', 'bert.encoder.layer.11.crossattention.output.dense.bias', 'bert.encoder.layer.3.crossattention.self.query.weight', 'bert.encoder.layer.8.crossattention.self.value.weight', 'bert.encoder.layer.9.crossattention.self.query.weight', 'bert.encoder.layer.8.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.4.crossattention.self.value.weight', 'bert.encoder.layer.4.crossattention.output.dense.weight', 'bert.encoder.layer.2.crossattention.self.key.weight', 'bert.encoder.layer.5.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.1.crossattention.self.query.bias', 'bert.encoder.layer.2.crossattention.self.value.weight', 'bert.encoder.layer.7.crossattention.self.value.weight', 'bert.encoder.layer.8.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.11.crossattention.self.query.weight', 'bert.encoder.layer.11.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.7.crossattention.output.dense.weight', 'bert.encoder.layer.2.crossattention.self.value.bias', 'bert.encoder.layer.4.crossattention.self.value.bias', 'bert.encoder.layer.5.crossattention.self.value.weight', 'bert.encoder.layer.7.crossattention.self.key.bias', 'bert.encoder.layer.0.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.6.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.2.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.7.crossattention.self.value.bias', 'bert.encoder.layer.0.crossattention.self.key.bias', 'bert.encoder.layer.11.crossattention.self.key.weight', 'bert.encoder.layer.5.crossattention.self.value.bias', 'bert.encoder.layer.7.crossattention.self.query.bias', 'bert.encoder.layer.6.crossattention.output.dense.bias', 'bert.encoder.layer.1.crossattention.self.value.weight', 'bert.encoder.layer.8.crossattention.self.query.bias', 'bert.encoder.layer.6.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.3.crossattention.self.value.weight', 'bert.encoder.layer.5.crossattention.output.dense.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']\n",
+      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.encoder.layer.6.crossattention.self.key.bias', 'bert.encoder.layer.10.crossattention.output.dense.bias', 'bert.encoder.layer.2.crossattention.self.query.weight', 'bert.encoder.layer.0.crossattention.output.dense.bias', 'bert.encoder.layer.5.crossattention.self.query.weight', 'bert.encoder.layer.4.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.0.crossattention.self.query.weight', 'bert.encoder.layer.2.crossattention.self.key.bias', 'bert.encoder.layer.6.crossattention.output.dense.weight', 'bert.encoder.layer.10.crossattention.self.value.bias', 'bert.encoder.layer.11.crossattention.self.value.bias', 'bert.encoder.layer.8.crossattention.self.value.bias', 'bert.encoder.layer.4.crossattention.self.key.bias', 'bert.encoder.layer.7.crossattention.output.dense.bias', 'bert.encoder.layer.0.crossattention.output.dense.weight', 'bert.encoder.layer.5.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.11.crossattention.output.dense.weight', 'bert.encoder.layer.7.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.4.crossattention.self.query.bias', 'bert.encoder.layer.9.crossattention.output.dense.bias', 'bert.encoder.layer.2.crossattention.output.dense.weight', 'bert.encoder.layer.4.crossattention.self.query.weight', 'bert.encoder.layer.10.crossattention.self.query.bias', 'bert.encoder.layer.0.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.5.crossattention.self.key.weight', 'bert.encoder.layer.7.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.9.crossattention.self.key.bias', 'bert.encoder.layer.11.crossattention.self.value.weight', 'bert.encoder.layer.0.crossattention.self.value.weight', 'bert.encoder.layer.3.crossattention.self.query.bias', 'bert.encoder.layer.10.crossattention.self.value.weight', 'bert.encoder.layer.3.crossattention.self.key.bias', 'bert.encoder.layer.9.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.2.crossattention.self.query.bias', 'bert.encoder.layer.10.crossattention.self.key.bias', 'bert.encoder.layer.3.crossattention.self.value.bias', 'bert.encoder.layer.11.crossattention.self.query.bias', 'bert.encoder.layer.3.crossattention.self.key.weight', 'bert.encoder.layer.5.crossattention.self.key.bias', 'bert.encoder.layer.9.crossattention.self.value.weight', 'bert.encoder.layer.10.crossattention.self.query.weight', 'bert.encoder.layer.0.crossattention.self.key.weight', 'bert.encoder.layer.9.crossattention.self.value.bias', 'bert.encoder.layer.3.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.1.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.9.crossattention.self.key.weight', 'bert.encoder.layer.6.crossattention.self.value.bias', 'bert.encoder.layer.9.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.9.crossattention.output.dense.weight', 'bert.encoder.layer.8.crossattention.self.query.weight', 'bert.encoder.layer.1.crossattention.self.query.weight', 'bert.encoder.layer.10.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.3.crossattention.output.dense.weight', 'bert.encoder.layer.4.crossattention.self.key.weight', 'bert.encoder.layer.4.crossattention.output.dense.bias', 'bert.encoder.layer.10.crossattention.output.dense.weight', 'bert.encoder.layer.10.crossattention.self.key.weight', 'bert.encoder.layer.8.crossattention.self.key.weight', 'bert.encoder.layer.9.crossattention.self.query.bias', 'bert.encoder.layer.5.crossattention.self.query.bias', 'bert.encoder.layer.1.crossattention.self.key.bias', 'bert.encoder.layer.1.crossattention.output.dense.bias', 'bert.encoder.layer.8.crossattention.output.dense.weight', 'bert.encoder.layer.1.crossattention.self.value.bias', 'bert.encoder.layer.2.crossattention.output.dense.bias', 'bert.encoder.layer.11.crossattention.self.key.bias', 'bert.encoder.layer.7.crossattention.self.key.weight', 'bert.encoder.layer.11.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.2.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.3.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.8.crossattention.self.key.bias', 'bert.encoder.layer.6.crossattention.self.value.weight', 'bert.encoder.layer.1.crossattention.output.dense.weight', 'bert.encoder.layer.0.crossattention.self.query.bias', 'bert.encoder.layer.1.crossattention.self.key.weight', 'bert.encoder.layer.10.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.5.crossattention.output.dense.bias', 'bert.encoder.layer.3.crossattention.output.dense.bias', 'bert.encoder.layer.8.crossattention.output.dense.bias', 'bert.encoder.layer.6.crossattention.self.key.weight', 'bert.encoder.layer.0.crossattention.self.value.bias', 'bert.encoder.layer.6.crossattention.self.query.weight', 'bert.encoder.layer.1.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.4.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.7.crossattention.self.query.weight', 'bert.encoder.layer.6.crossattention.self.query.bias', 'bert.encoder.layer.11.crossattention.output.dense.bias', 'bert.encoder.layer.3.crossattention.self.query.weight', 'bert.encoder.layer.8.crossattention.self.value.weight', 'bert.encoder.layer.9.crossattention.self.query.weight', 'bert.encoder.layer.8.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.4.crossattention.self.value.weight', 'bert.encoder.layer.4.crossattention.output.dense.weight', 'bert.encoder.layer.2.crossattention.self.key.weight', 'bert.encoder.layer.5.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.1.crossattention.self.query.bias', 'bert.encoder.layer.2.crossattention.self.value.weight', 'bert.encoder.layer.7.crossattention.self.value.weight', 'bert.encoder.layer.8.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.11.crossattention.self.query.weight', 'bert.encoder.layer.11.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.7.crossattention.output.dense.weight', 'bert.encoder.layer.2.crossattention.self.value.bias', 'bert.encoder.layer.4.crossattention.self.value.bias', 'bert.encoder.layer.5.crossattention.self.value.weight', 'bert.encoder.layer.7.crossattention.self.key.bias', 'bert.encoder.layer.0.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.6.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.2.crossattention.output.LayerNorm.weight', 'bert.encoder.layer.7.crossattention.self.value.bias', 'bert.encoder.layer.0.crossattention.self.key.bias', 'bert.encoder.layer.11.crossattention.self.key.weight', 'bert.encoder.layer.5.crossattention.self.value.bias', 'bert.encoder.layer.7.crossattention.self.query.bias', 'bert.encoder.layer.6.crossattention.output.dense.bias', 'bert.encoder.layer.1.crossattention.self.value.weight', 'bert.encoder.layer.8.crossattention.self.query.bias', 'bert.encoder.layer.6.crossattention.output.LayerNorm.bias', 'bert.encoder.layer.3.crossattention.self.value.weight', 'bert.encoder.layer.5.crossattention.output.dense.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
      ]
     },
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Using 16bit Automatic Mixed Precision (AMP)\n",
-      "GPU available: True (cuda), used: True\n",
-      "TPU available: False, using: 0 TPU cores\n",
-      "IPU available: False, using: 0 IPUs\n",
-      "HPU available: False, using: 0 HPUs\n",
-      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
-      "\n",
-      "  | Name      | Type            | Params\n",
-      "----------------------------------------------\n",
-      "0 | model     | BertDoubleDense | 133 M \n",
-      "1 | criterion | NLLLoss         | 0     \n",
-      "----------------------------------------------\n",
-      "24.1 M    Trainable params\n",
-      "109 M     Non-trainable params\n",
-      "133 M     Total params\n",
-      "534.177   Total estimated model params size (MB)\n"
+     "ename": "AttributeError",
+     "evalue": "module 'pytorch_lightning.loggers.wandb' has no attribute 'login'",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[1;31mAttributeError\u001B[0m                            Traceback (most recent call last)",
+      "Cell \u001B[1;32mIn [63], line 45\u001B[0m\n\u001B[0;32m     42\u001B[0m val_loader \u001B[38;5;241m=\u001B[39m DataLoader(validation_dataset, batch_size\u001B[38;5;241m=\u001B[39mbatch_size, num_workers\u001B[38;5;241m=\u001B[39mnum_cpus)\n\u001B[0;32m     44\u001B[0m output_config()\n\u001B[1;32m---> 45\u001B[0m \u001B[43mwandb\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mlogin\u001B[49m()\n\u001B[0;32m     47\u001B[0m b \u001B[38;5;241m=\u001B[39m jupyter_beeper\u001B[38;5;241m.\u001B[39mBeeper()\n\u001B[0;32m     48\u001B[0m b\u001B[38;5;241m.\u001B[39mbeep()\n",
+      "\u001B[1;31mAttributeError\u001B[0m: module 'pytorch_lightning.loggers.wandb' has no attribute 'login'"
      ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "601e8dbb8f484cec995018b06ca7c06f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Sanity Checking: 0it [00:00, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
     }
    ],
    "source": [
@@ -1216,7 +1165,7 @@
     "num_cpus = os.cpu_count()\n",
     "num_gpus = [torch.cuda.device(i) for i in range(torch.cuda.device_count())]\n",
     "\n",
-    "if num_gpus>= 8:\n",
+    "if len(num_gpus)>= 8:\n",
     "    print(\"POWAAAAAA\")\n",
     "    strategy = \"ddp_notebook\"\n",
     "else:\n",
@@ -1229,7 +1178,6 @@
     "val_loader = DataLoader(validation_dataset, batch_size=batch_size, num_workers=num_cpus)\n",
     "\n",
     "output_config()\n",
-    "wandb.login()\n",
     "\n",
     "b = jupyter_beeper.Beeper()\n",
     "b.beep()\n",
@@ -1328,20 +1276,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {
     "id": "4pxxA9b0Xx8U"
    },
-   "outputs": [
-    {
-     "ename": "SyntaxError",
-     "evalue": "positional argument follows keyword argument (2290879988.py, line 6)",
-     "output_type": "error",
-     "traceback": [
-      "\u001B[1;36m  Cell \u001B[1;32mIn [7], line 6\u001B[1;36m\u001B[0m\n\u001B[1;33m    precision=\"16\",load_from_checkpoint(\"Models/epoch=7-val_loss=0.86-rouge=0.00.ckpt\"))\u001B[0m\n\u001B[1;37m                                                                                       ^\u001B[0m\n\u001B[1;31mSyntaxError\u001B[0m\u001B[1;31m:\u001B[0m positional argument follows keyword argument\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "model = BertLightning(BertSingleDense())\n",
     "test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_cpus)\n",