Skip to content
Snippets Groups Projects
Commit 73e99f97 authored by eca1g19's avatar eca1g19
Browse files

Changed from 16 to 16-mixed fpp

parent 62981f71
Branches
No related tags found
No related merge requests found
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Memory Check Memory Check
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
!nvidia-smi !nvidia-smi
``` ```
%% Output %% Output
Wed Jun 14 17:04:39 2023 Wed Jun 14 17:04:39 2023
+---------------------------------------------------------------------------------------+ +---------------------------------------------------------------------------------------+
| NVIDIA-SMI 531.29 Driver Version: 531.29 CUDA Version: 12.1 | | NVIDIA-SMI 531.29 Driver Version: 531.29 CUDA Version: 12.1 |
|-----------------------------------------+----------------------+----------------------+ |-----------------------------------------+----------------------+----------------------+
| GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC | | GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. | | | | MIG M. |
|=========================================+======================+======================| |=========================================+======================+======================|
| 0 NVIDIA GeForce RTX 2080 Ti WDDM | 00000000:0E:00.0 On | N/A | | 0 NVIDIA GeForce RTX 2080 Ti WDDM | 00000000:0E:00.0 On | N/A |
| 41% 49C P8 43W / 260W| 2505MiB / 11264MiB | 17% Default | | 41% 49C P8 43W / 260W| 2505MiB / 11264MiB | 17% Default |
| | | N/A | | | | N/A |
+-----------------------------------------+----------------------+----------------------+ +-----------------------------------------+----------------------+----------------------+
+---------------------------------------------------------------------------------------+ +---------------------------------------------------------------------------------------+
| Processes: | | Processes: |
| GPU GI CI PID Type Process name GPU Memory | | GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage | | ID ID Usage |
|=======================================================================================| |=======================================================================================|
| 0 N/A N/A 2412 C+G ...inaries\Win64\EpicGamesLauncher.exe N/A | | 0 N/A N/A 2412 C+G ...inaries\Win64\EpicGamesLauncher.exe N/A |
| 0 N/A N/A 3144 C+G ...a\Local\Mozilla Firefox\firefox.exe N/A | | 0 N/A N/A 3144 C+G ...a\Local\Mozilla Firefox\firefox.exe N/A |
| 0 N/A N/A 3400 C+G ..._x64__kzf8qxf38zg5c\Skype\Skype.exe N/A | | 0 N/A N/A 3400 C+G ..._x64__kzf8qxf38zg5c\Skype\Skype.exe N/A |
| 0 N/A N/A 3752 C+G ...GeForce Experience\NVIDIA Share.exe N/A | | 0 N/A N/A 3752 C+G ...GeForce Experience\NVIDIA Share.exe N/A |
| 0 N/A N/A 4240 C+G ...1.0_x64__8wekyb3d8bbwe\Video.UI.exe N/A | | 0 N/A N/A 4240 C+G ...1.0_x64__8wekyb3d8bbwe\Video.UI.exe N/A |
| 0 N/A N/A 6468 C+G ....Search_cw5n1h2txyewy\SearchApp.exe N/A | | 0 N/A N/A 6468 C+G ....Search_cw5n1h2txyewy\SearchApp.exe N/A |
| 0 N/A N/A 6828 C+G ...rm 2020.3.3\jbr\bin\jcef_helper.exe N/A | | 0 N/A N/A 6828 C+G ...rm 2020.3.3\jbr\bin\jcef_helper.exe N/A |
| 0 N/A N/A 9500 C+G ....0_x64__8wekyb3d8bbwe\HxOutlook.exe N/A | | 0 N/A N/A 9500 C+G ....0_x64__8wekyb3d8bbwe\HxOutlook.exe N/A |
| 0 N/A N/A 9780 C+G ..._x64__kzf8qxf38zg5c\Skype\Skype.exe N/A | | 0 N/A N/A 9780 C+G ..._x64__kzf8qxf38zg5c\Skype\Skype.exe N/A |
| 0 N/A N/A 11628 C+G C:\Windows\explorer.exe N/A | | 0 N/A N/A 11628 C+G C:\Windows\explorer.exe N/A |
| 0 N/A N/A 12416 C+G ...2txyewy\StartMenuExperienceHost.exe N/A | | 0 N/A N/A 12416 C+G ...2txyewy\StartMenuExperienceHost.exe N/A |
| 0 N/A N/A 14040 C+G ...302.5.0_x64__8wekyb3d8bbwe\Time.exe N/A | | 0 N/A N/A 14040 C+G ...302.5.0_x64__8wekyb3d8bbwe\Time.exe N/A |
| 0 N/A N/A 14792 C+G ...GeForce Experience\NVIDIA Share.exe N/A | | 0 N/A N/A 14792 C+G ...GeForce Experience\NVIDIA Share.exe N/A |
| 0 N/A N/A 16016 C+G ...CBS_cw5n1h2txyewy\TextInputHost.exe N/A | | 0 N/A N/A 16016 C+G ...CBS_cw5n1h2txyewy\TextInputHost.exe N/A |
| 0 N/A N/A 16612 C+G ...ft Office\root\Office16\OUTLOOK.EXE N/A | | 0 N/A N/A 16612 C+G ...ft Office\root\Office16\OUTLOOK.EXE N/A |
| 0 N/A N/A 17024 C+G ....Search_cw5n1h2txyewy\SearchApp.exe N/A | | 0 N/A N/A 17024 C+G ....Search_cw5n1h2txyewy\SearchApp.exe N/A |
| 0 N/A N/A 17124 C+G ...oogle\Chrome\Application\chrome.exe N/A | | 0 N/A N/A 17124 C+G ...oogle\Chrome\Application\chrome.exe N/A |
| 0 N/A N/A 17368 C+G ...l\Microsoft\Teams\current\Teams.exe N/A | | 0 N/A N/A 17368 C+G ...l\Microsoft\Teams\current\Teams.exe N/A |
| 0 N/A N/A 20412 C+G ...on\114.0.1823.43\msedgewebview2.exe N/A | | 0 N/A N/A 20412 C+G ...on\114.0.1823.43\msedgewebview2.exe N/A |
| 0 N/A N/A 20660 C+G ...air\Corsair iCUE5 Software\iCUE.exe N/A | | 0 N/A N/A 20660 C+G ...air\Corsair iCUE5 Software\iCUE.exe N/A |
| 0 N/A N/A 23044 C+G ...\cef\cef.win7x64\steamwebhelper.exe N/A | | 0 N/A N/A 23044 C+G ...\cef\cef.win7x64\steamwebhelper.exe N/A |
| 0 N/A N/A 23360 C+G ...Canary\app-1.0.66\DiscordCanary.exe N/A | | 0 N/A N/A 23360 C+G ...Canary\app-1.0.66\DiscordCanary.exe N/A |
| 0 N/A N/A 24680 C+G ...ne\Binaries\Win64\EpicWebHelper.exe N/A | | 0 N/A N/A 24680 C+G ...ne\Binaries\Win64\EpicWebHelper.exe N/A |
| 0 N/A N/A 25200 C+G ...on\wallpaper_engine\wallpaper32.exe N/A | | 0 N/A N/A 25200 C+G ...on\wallpaper_engine\wallpaper32.exe N/A |
| 0 N/A N/A 25596 C+G ...e Stream\76.0.3.0\GoogleDriveFS.exe N/A | | 0 N/A N/A 25596 C+G ...e Stream\76.0.3.0\GoogleDriveFS.exe N/A |
| 0 N/A N/A 25952 C+G ..._8wekyb3d8bbwe\Microsoft.Photos.exe N/A | | 0 N/A N/A 25952 C+G ..._8wekyb3d8bbwe\Microsoft.Photos.exe N/A |
| 0 N/A N/A 26716 C+G C:\Program Files\RaiderIO\RaiderIO.exe N/A | | 0 N/A N/A 26716 C+G C:\Program Files\RaiderIO\RaiderIO.exe N/A |
| 0 N/A N/A 27700 C+G ...les (x86)\Overwolf\old_Overwolf.exe N/A | | 0 N/A N/A 27700 C+G ...les (x86)\Overwolf\old_Overwolf.exe N/A |
| 0 N/A N/A 28444 C+G ...cordPTB\app-1.0.1027\DiscordPTB.exe N/A | | 0 N/A N/A 28444 C+G ...cordPTB\app-1.0.1027\DiscordPTB.exe N/A |
| 0 N/A N/A 29192 C+G ...les (x86)\Battle.net\Battle.net.exe N/A | | 0 N/A N/A 29192 C+G ...les (x86)\Battle.net\Battle.net.exe N/A |
| 0 N/A N/A 31192 C+G ...wolf\0.223.0.33\OverwolfBrowser.exe N/A | | 0 N/A N/A 31192 C+G ...wolf\0.223.0.33\OverwolfBrowser.exe N/A |
| 0 N/A N/A 31576 C+G C:\Program Files\NordVPN\NordVPN.exe N/A | | 0 N/A N/A 31576 C+G C:\Program Files\NordVPN\NordVPN.exe N/A |
| 0 N/A N/A 31956 C+G ...ekyb3d8bbwe\PhoneExperienceHost.exe N/A | | 0 N/A N/A 31956 C+G ...ekyb3d8bbwe\PhoneExperienceHost.exe N/A |
| 0 N/A N/A 32976 C+G ...ft Office\root\Office16\WINWORD.EXE N/A | | 0 N/A N/A 32976 C+G ...ft Office\root\Office16\WINWORD.EXE N/A |
| 0 N/A N/A 34400 C+G ...02.0_x86__zpdnekdrzrea0\Spotify.exe N/A | | 0 N/A N/A 34400 C+G ...02.0_x86__zpdnekdrzrea0\Spotify.exe N/A |
| 0 N/A N/A 34932 C+G ...ft Office\root\Office16\ONENOTE.EXE N/A | | 0 N/A N/A 34932 C+G ...ft Office\root\Office16\ONENOTE.EXE N/A |
| 0 N/A N/A 34944 C+G ...5n1h2txyewy\ShellExperienceHost.exe N/A | | 0 N/A N/A 34944 C+G ...5n1h2txyewy\ShellExperienceHost.exe N/A |
| 0 N/A N/A 37420 C+G ...l\Microsoft\Teams\current\Teams.exe N/A | | 0 N/A N/A 37420 C+G ...l\Microsoft\Teams\current\Teams.exe N/A |
| 0 N/A N/A 37968 C+G ...al\Discord\app-1.0.9013\Discord.exe N/A | | 0 N/A N/A 37968 C+G ...al\Discord\app-1.0.9013\Discord.exe N/A |
| 0 N/A N/A 38508 C+G ...t.LockApp_cw5n1h2txyewy\LockApp.exe N/A | | 0 N/A N/A 38508 C+G ...t.LockApp_cw5n1h2txyewy\LockApp.exe N/A |
| 0 N/A N/A 42416 C+G ...ft Office\root\Office16\WINWORD.EXE N/A | | 0 N/A N/A 42416 C+G ...ft Office\root\Office16\WINWORD.EXE N/A |
| 0 N/A N/A 42952 C+G ...crosoft\Edge\Application\msedge.exe N/A | | 0 N/A N/A 42952 C+G ...crosoft\Edge\Application\msedge.exe N/A |
| 0 N/A N/A 44812 C+G ...cal\Microsoft\OneDrive\OneDrive.exe N/A | | 0 N/A N/A 44812 C+G ...cal\Microsoft\OneDrive\OneDrive.exe N/A |
| 0 N/A N/A 47144 C+G ...a\Local\Mozilla Firefox\firefox.exe N/A | | 0 N/A N/A 47144 C+G ...a\Local\Mozilla Firefox\firefox.exe N/A |
| 0 N/A N/A 47776 C+G ...siveControlPanel\SystemSettings.exe N/A | | 0 N/A N/A 47776 C+G ...siveControlPanel\SystemSettings.exe N/A |
| 0 N/A N/A 49192 C+G ...0_x64__8wekyb3d8bbwe\HxAccounts.exe N/A | | 0 N/A N/A 49192 C+G ...0_x64__8wekyb3d8bbwe\HxAccounts.exe N/A |
| 0 N/A N/A 49296 C+G ...sair iCUE5 Software\QmlRenderer.exe N/A | | 0 N/A N/A 49296 C+G ...sair iCUE5 Software\QmlRenderer.exe N/A |
+---------------------------------------------------------------------------------------+ +---------------------------------------------------------------------------------------+
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Pip Installs Pip Installs
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
#should be handled by requirements.txt - but isnt #should be handled by requirements.txt - but isnt
import os import os
package_install_override = False package_install_override = False
if False and not os.path.exists("installedRepos") or package_install_override: if False and not os.path.exists("installedRepos") or package_install_override:
!pip install absl-py !pip install absl-py
!pip install aiohttp !pip install aiohttp
!pip install aiosignal !pip install aiosignal
!pip install async-timeout !pip install async-timeout
!pip install cachetools !pip install cachetools
!pip install certifi !pip install certifi
!pip install click !pip install click
!pip install datasets !pip install datasets
!pip install dill !pip install dill
!pip install evaluate !pip install evaluate
!pip install filelock !pip install filelock
!pip install fonttools !pip install fonttools
!pip install frozenlist !pip install frozenlist
!pip install fsspec !pip install fsspec
!pip install google-api-core !pip install google-api-core
!pip install google-api-python-client !pip install google-api-python-client
!pip install google-auth !pip install google-auth
!pip install google-auth-httplib2 !pip install google-auth-httplib2
!pip install googleapis-common-protos !pip install googleapis-common-protos
!pip install httplib2 !pip install httplib2
!pip install huggingface-hub !pip install huggingface-hub
!pip install ipython-genutils !pip install ipython-genutils
!pip install joblib !pip install joblib
!pip install Jupyter-Beeper !pip install Jupyter-Beeper
!pip install lightning-utilities !pip install lightning-utilities
!pip install mkl-fft !pip install mkl-fft
!pip install mkl-random !pip install mkl-random
!pip install mkl-service !pip install mkl-service
!pip install mpmath !pip install mpmath
!pip install multidict !pip install multidict
!pip install multiprocess !pip install multiprocess
!pip install munkres !pip install munkres
!pip install networkx !pip install networkx
!pip install nltk !pip install nltk
!pip install oauth2client !pip install oauth2client
!pip install pandas !pip install pandas
!pip install Pillow !pip install Pillow
!pip install ply !pip install ply
!pip install protobuf !pip install protobuf
!pip install pyarrow !pip install pyarrow
!pip install pyasn1 !pip install pyasn1
!pip install pyasn1-modules !pip install pyasn1-modules
!pip install PyDrive !pip install PyDrive
!pip install pyenchant !pip install pyenchant
!pip install PyQt5 !pip install PyQt5
!pip install pytorch-beam-search !pip install pytorch-beam-search
!pip install pytorch-lightning !pip install pytorch-lightning
!pip install pywin32 !pip install pywin32
!pip install PyYAML !pip install PyYAML
!pip install pyzmq !pip install pyzmq
!pip install regex !pip install regex
!pip install responses !pip install responses
!pip install rouge-score !pip install rouge-score
!pip install rsa !pip install rsa
!pip install scikit-learn !pip install scikit-learn
!pip install scipy !pip install scipy
!pip install sentencepiece !pip install sentencepiece
!pip install seqeval !pip install seqeval
!pip install sympy !pip install sympy
!pip install threadpoolctl !pip install threadpoolctl
!pip install tokenizers !pip install tokenizers
!pip install torch !pip install torch
!pip install torch-utils !pip install torch-utils
!pip install torchaudio !pip install torchaudio
!pip install torchdata !pip install torchdata
!pip install torchmetrics !pip install torchmetrics
!pip install torchtext !pip install torchtext
!pip install torchvision !pip install torchvision
!pip install transformers !pip install transformers
!pip install uritemplate !pip install uritemplate
!pip install webencodings !pip install webencodings
!pip install wincertstore !pip install wincertstore
!pip install xxhash !pip install xxhash
!pip install yarl !pip install yarl
print("Installed all Packages!") print("Installed all Packages!")
f = open("installedRepos", "w") f = open("installedRepos", "w")
f.close() f.close()
else: else:
print("Packages should be installed already. If this is incorrect, change the override and re-run.") print("Packages should be installed already. If this is incorrect, change the override and re-run.")
package_install_override = False package_install_override = False
``` ```
%% Output %% Output
Packages should be installed already. If this is incorrect, change the override and re-run. Packages should be installed already. If this is incorrect, change the override and re-run.
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
#WandB Login #WandB Login
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
!pip install wandb !pip install wandb
import wandb import wandb
wandb.login() wandb.login()
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Set Git Creds Set Git Creds
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
!git config --global user.name "Ethan Aherne" !git config --global user.name "Ethan Aherne"
!git config --global user.email "eca1g19@soton.ac.uk" !git config --global user.email "eca1g19@soton.ac.uk"
!git config --get user.name !git config --get user.name
!git config --get user.email !git config --get user.email
``` ```
%% Output %% Output
Ethan Aherne Ethan Aherne
eca1g19@soton.ac.uk eca1g19@soton.ac.uk
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Imports Imports
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from sklearn.model_selection import KFold from sklearn.model_selection import KFold
from torch.optim import AdamW from torch.optim import AdamW
from torch.nn import CrossEntropyLoss from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
import scipy.stats as stats import scipy.stats as stats
import evaluate import evaluate
from torch.utils.data import Dataset from torch.utils.data import Dataset
from transformers import BertTokenizerFast from transformers import BertTokenizerFast
from datasets import load_dataset from datasets import load_dataset
from tqdm import tqdm # This should be removed but in case it breaks everything here it will stay from tqdm import tqdm # This should be removed but in case it breaks everything here it will stay
import torch.nn as nn import torch.nn as nn
import torch import torch
import warnings import warnings
from tqdm.notebook import trange, tqdm from tqdm.notebook import trange, tqdm
import numpy as np import numpy as np
from torch.utils.data import ConcatDataset from torch.utils.data import ConcatDataset
import string_utils import string_utils
import train_utils import train_utils
from lightning_models import BertLightning, Seq2SeqLightning from lightning_models import BertLightning, Seq2SeqLightning
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from lightning_fabric.loggers import CSVLogger from lightning_fabric.loggers import CSVLogger
import time import time
import jupyter_beeper import jupyter_beeper
from base_models import BertSingleDense, BertDoubleDense, BertBiLSTM from base_models import BertSingleDense, BertDoubleDense, BertBiLSTM
from pytorch_lightning import Trainer from pytorch_lightning import Trainer
from pytorch_lightning.loggers import WandbLogger, wandb from pytorch_lightning.loggers import WandbLogger, wandb
from sequence_to_sequence_models import EncoderDecoderBase, FrozenBertEncoder, SingleDenseBertDecoder, \ from sequence_to_sequence_models import EncoderDecoderBase, FrozenBertEncoder, SingleDenseBertDecoder, \
DoubleDenseBertDecoder, BiLSTMBertDecoder DoubleDenseBertDecoder, BiLSTMBertDecoder
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Output cuda/cpu Output cuda/cpu
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu"
print(f'==============') print(f'==============')
print(f"RUNNING ON {device.upper()}") print(f"RUNNING ON {device.upper()}")
print(f'==============') print(f'==============')
``` ```
%% Output %% Output
============== ==============
RUNNING ON CUDA RUNNING ON CUDA
============== ==============
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Program Config Program Config
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
verbose = 1 verbose = 1
add_time_to_model_name = True add_time_to_model_name = True
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Training config Training config
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
num_epochs = 8 num_epochs = 8
num_k_folds = 2 # For Cross-Validating to assess model performance num_k_folds = 2 # For Cross-Validating to assess model performance
batch_size = 64 batch_size = 64
gradient_accumulation_steps = 1 gradient_accumulation_steps = 1
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Dataset splits config # Dataset splits config
Set percentages of each pre-split portion of the cnn database to use Set percentages of each pre-split portion of the cnn database to use
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
train_split_percentage = 100 # percentage of SPLIT train_split_percentage = 100 # percentage of SPLIT
validate_split_percentage = 10 validate_split_percentage = 10
test_split_percentage = 10 test_split_percentage = 10
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Lightning Config Lightning Config
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
use_lightning = True use_lightning = True
use_fp16 = True use_fp16 = True
mixed_precision=16 mixed_precision="16-mixed"
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Print Configurations if verbose Print Configurations if verbose
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def output_config(): def output_config():
print(f"Program configuration:") print(f"Program configuration:")
print(f"Verbose Level: {verbose}") print(f"Verbose Level: {verbose}")
print(f"Adding time to model output: {add_time_to_model_name}\n") print(f"Adding time to model output: {add_time_to_model_name}\n")
print(f"Dataset configuration:") print(f"Dataset configuration:")
print(f"Train Split Percentage: {train_split_percentage}") print(f"Train Split Percentage: {train_split_percentage}")
print(f"Validation Split Percentage: {validate_split_percentage}") print(f"Validation Split Percentage: {validate_split_percentage}")
print(f"Test Split Percentage: {test_split_percentage}\n") print(f"Test Split Percentage: {test_split_percentage}\n")
print(f"Training configuration:") print(f"Training configuration:")
print(f"Number of training epochs: {num_epochs}") print(f"Number of training epochs: {num_epochs}")
print(f"Number of k-folds: {num_k_folds}") print(f"Number of k-folds: {num_k_folds}")
print(f"Batch size: {batch_size}") print(f"Batch size: {batch_size}")
# TODO Implement # TODO Implement
#print(f"Gradient accumulation steps: {gradient_accumulation_steps}") #print(f"Gradient accumulation steps: {gradient_accumulation_steps}")
#print(f"Effective Batch Size: {gradient_accumulation_steps * batch_size}\n") #print(f"Effective Batch Size: {gradient_accumulation_steps * batch_size}\n")
print(f"Mixed Precision: {mixed_precision}") print(f"Mixed Precision: {mixed_precision}")
print(f"Using Lightning: {use_lightning}") print(f"Using Lightning: {use_lightning}")
if verbose > 0: if verbose > 0:
output_config() output_config()
# TODO Implement # TODO Implement
#print(f"Gradient accumulation steps: {gradient_accumulation_steps}") #print(f"Gradient accumulation steps: {gradient_accumulation_steps}")
#print(f"Effective Batch Size: {gradient_accumulation_steps * batch_size}\n") #print(f"Effective Batch Size: {gradient_accumulation_steps * batch_size}\n")
``` ```
%% Output %% Output
Program configuration: Program configuration:
Verbose Level: 1 Verbose Level: 1
Adding time to model output: True Adding time to model output: True
Dataset configuration: Dataset configuration:
Train Split Percentage: 100 Train Split Percentage: 100
Validation Split Percentage: 10 Validation Split Percentage: 10
Test Split Percentage: 10 Test Split Percentage: 10
Training configuration: Training configuration:
Number of training epochs: 8 Number of training epochs: 8
Number of k-folds: 2 Number of k-folds: 2
Batch size: 64 Batch size: 64
Mixed Precision: 16 Mixed Precision: 16
Using Lightning: True Using Lightning: True
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Tokenizer Init and config Tokenizer Init and config
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased') tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Custom CNN Dailymail Dataset Class Custom CNN Dailymail Dataset Class
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from cnn_dailymail_dataset import CNNDailyMailDataset from cnn_dailymail_dataset import CNNDailyMailDataset
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Bert Class Initialization Bert Class Initialization
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from base_models import BertSingleDense, BertDoubleDense, BertBiLSTM from base_models import BertSingleDense, BertDoubleDense, BertBiLSTM
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Load Bert Tokenizer Fast Load Bert Tokenizer Fast
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased') tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Load Datasets Load Datasets
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
print(f"Dataset configuration:") print(f"Dataset configuration:")
print(f"Train Split Percentage: {train_split_percentage}") print(f"Train Split Percentage: {train_split_percentage}")
print(f"Validation Split Percentage: {validate_split_percentage}") print(f"Validation Split Percentage: {validate_split_percentage}")
print(f"Test Split Percentage: {test_split_percentage}\n") print(f"Test Split Percentage: {test_split_percentage}\n")
train_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='train', split_percentage=train_split_percentage, verbose=verbose) train_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='train', split_percentage=train_split_percentage, verbose=verbose)
validation_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='validation', validation_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='validation',
split_percentage=validate_split_percentage, verbose=verbose) split_percentage=validate_split_percentage, verbose=verbose)
test_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='test', split_percentage=test_split_percentage, test_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='test', split_percentage=test_split_percentage,
verbose=verbose) verbose=verbose)
``` ```
%% Output %% Output
Dataset configuration: Dataset configuration:
Train Split Percentage: 100 Train Split Percentage: 100
Validation Split Percentage: 10 Validation Split Percentage: 10
Test Split Percentage: 10 Test Split Percentage: 10
Loading cnn_dailymail dataset 3.0.0 with split type: train[:100%] Loading cnn_dailymail dataset 3.0.0 with split type: train[:100%]
Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de) Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)
Loading cnn_dailymail dataset 3.0.0 with split type: validation[:10%] Loading cnn_dailymail dataset 3.0.0 with split type: validation[:10%]
Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de) Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)
Loading cnn_dailymail dataset 3.0.0 with split type: test[:10%] Loading cnn_dailymail dataset 3.0.0 with split type: test[:10%]
Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de) Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Load Datasets into dataloaders Load Datasets into dataloaders
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size) validation_loader = DataLoader(validation_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size) test_loader = DataLoader(test_dataset, batch_size=batch_size)
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Define Model Object and config Define Model Object and config
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Define KFold Object, set to None if not cross validating Define KFold Object, set to None if not cross validating
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
cross_validation_k_folder = KFold(n_splits=num_k_folds) if num_k_folds > 0 else None cross_validation_k_folder = KFold(n_splits=num_k_folds) if num_k_folds > 0 else None
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Define loss function object Define loss function object
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
criterion = CrossEntropyLoss() criterion = CrossEntropyLoss()
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Load Rouge scorer Load Rouge scorer
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
rouge_score = evaluate.load("rouge") rouge_score = evaluate.load("rouge")
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
## Analyze Model scores ## Analyze Model scores
input is list of dictionary of scores on each input is list of dictionary of scores on each
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Train the model Train the model
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
output_config() output_config()
num_cpus = os.cpu_count() num_cpus = os.cpu_count()
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_cpus) train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_cpus)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, num_workers=num_cpus) validation_loader = DataLoader(validation_dataset, batch_size=batch_size, num_workers=num_cpus)
b = jupyter_beeper.Beeper() b = jupyter_beeper.Beeper()
b.beep() b.beep()
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased') tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
# Load Datasets # Load Datasets
train_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='train', split_percentage=train_split_percentage, train_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='train', split_percentage=train_split_percentage,
verbose=verbose) verbose=verbose)
validation_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='validation', validation_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='validation',
split_percentage=validate_split_percentage, verbose=verbose) split_percentage=validate_split_percentage, verbose=verbose)
test_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='test', split_percentage=test_split_percentage, test_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='test', split_percentage=test_split_percentage,
verbose=verbose) verbose=verbose)
encoder = FrozenBertEncoder() encoder = FrozenBertEncoder()
# Define Model Object # Define Model Object
decoders = [SingleDenseBertDecoder(),DoubleDenseBertDecoder(), BiLSTMBertDecoder()] decoders = [SingleDenseBertDecoder(),DoubleDenseBertDecoder(), BiLSTMBertDecoder()]
# Define KFold Object, set to None if not cross validating # Define KFold Object, set to None if not cross validating
cross_validation_k_fold = KFold(n_splits=num_k_folds) if num_k_folds > 0 else None cross_validation_k_fold = KFold(n_splits=num_k_folds) if num_k_folds > 0 else None
# Define Optimizer (AdamW) - Filters to only optimize params that are not frozen (i.e. not bert) # Define Optimizer (AdamW) - Filters to only optimize params that are not frozen (i.e. not bert)
# Define loss function object # Define loss function object
criterion = nn.NLLLoss() criterion = nn.NLLLoss()
num_cpus = os.cpu_count() num_cpus = os.cpu_count()
num_gpus = [torch.cuda.device(i) for i in range(torch.cuda.device_count())] num_gpus = [torch.cuda.device(i) for i in range(torch.cuda.device_count())]
if num_gpus>= 8: if num_gpus>= 8:
print("POWAAAAAA") print("POWAAAAAA")
strategy = "ddp_notebook" strategy = "ddp_notebook"
else: else:
strategy = None strategy = None
# Load Datasets into data-loaders # Load Datasets into data-loaders
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_cpus) test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_cpus)
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_cpus) train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_cpus)
val_loader = DataLoader(validation_dataset, batch_size=batch_size, num_workers=num_cpus) val_loader = DataLoader(validation_dataset, batch_size=batch_size, num_workers=num_cpus)
output_config() output_config()
wandb.login() wandb.login()
b = jupyter_beeper.Beeper() b = jupyter_beeper.Beeper()
b.beep() b.beep()
torch.set_float32_matmul_precision("high") torch.set_float32_matmul_precision("high")
if cross_validation_k_folder is not None: if cross_validation_k_folder is not None:
for fold, (train_idx, val_idx) in enumerate(cross_validation_k_folder.split(train_dataset)): for fold, (train_idx, val_idx) in enumerate(cross_validation_k_folder.split(train_dataset)):
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx) train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
val_sampler = torch.utils.data.SubsetRandomSampler(train_idx) val_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_cpus) train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_cpus)
val_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=val_sampler, num_workers=num_cpus) val_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=val_sampler, num_workers=num_cpus)
encoder = FrozenBertEncoder() encoder = FrozenBertEncoder()
for decoder in decoders: for decoder in decoders:
model = EncoderDecoderBase(encoder=encoder, decoder=decoder) model = EncoderDecoderBase(encoder=encoder, decoder=decoder)
model = Seq2SeqLightning(model) model = Seq2SeqLightning(model)
wandb_logger = WandbLogger(name=f"seq2seq_lightning_fold_{fold}", project="seq2seq_lightning") wandb_logger = WandbLogger(name=f"seq2seq_lightning_fold_{fold}", project="seq2seq_lightning")
checkpoint_callback = ModelCheckpoint( checkpoint_callback = ModelCheckpoint(
monitor='val_loss', monitor='val_loss',
dirpath=f'checkpoints_fold_{fold}', dirpath=f'checkpoints_fold_{fold}',
filename='seq2seq-{epoch:02d}-{val_loss:.2f}', filename='seq2seq-{epoch:02d}-{val_loss:.2f}',
save_top_k=3, save_top_k=3,
mode='min', mode='min',
) )
early_stopping = EarlyStopping(monitor='val_loss', patience=3, mode='min') early_stopping = EarlyStopping(monitor='val_loss', patience=3, mode='min')
if strategy is not None: if strategy is not None:
trainer = Trainer( trainer = Trainer(
max_epochs=num_epochs, max_epochs=num_epochs,
accelerator="auto", accelerator="auto",
devices=len(num_gpus), devices=len(num_gpus),
precision=mixed_precision if use_fp16 else 32, precision=mixed_precision if use_fp16 else 32,
logger=wandb_logger, logger=wandb_logger,
callbacks=[checkpoint_callback, early_stopping], callbacks=[checkpoint_callback, early_stopping],
strategy=strategy strategy=strategy
) )
else: else:
trainer = Trainer( trainer = Trainer(
max_epochs=num_epochs, max_epochs=num_epochs,
accelerator="auto", accelerator="auto",
devices=len(num_gpus), devices=len(num_gpus),
precision=mixed_precision if use_fp16 else 32, precision=mixed_precision if use_fp16 else 32,
logger=wandb_logger, logger=wandb_logger,
callbacks=[checkpoint_callback, early_stopping], callbacks=[checkpoint_callback, early_stopping],
) )
trainer.fit(model, train_loader, val_loader) trainer.fit(model, train_loader, val_loader)
else: else:
for decoder in decoders: for decoder in decoders:
model = EncoderDecoderBase(encoder=encoder, decoder=decoder) model = EncoderDecoderBase(encoder=encoder, decoder=decoder)
model = Seq2SeqLightning(model) model = Seq2SeqLightning(model)
wandb_logger = WandbLogger(name="seq2seq_lightning_run", project="seq2seq_lightning") wandb_logger = WandbLogger(name="seq2seq_lightning_run", project="seq2seq_lightning")
checkpoint_callback = ModelCheckpoint( checkpoint_callback = ModelCheckpoint(
monitor='val_loss', monitor='val_loss',
dirpath='checkpoints', dirpath='checkpoints',
filename='seq2seq-{epoch:02d}-{val_loss:.2f}', filename='seq2seq-{epoch:02d}-{val_loss:.2f}',
save_top_k=3, save_top_k=3,
mode='min', mode='min',
) )
early_stopping = EarlyStopping(monitor='val_loss', patience=3, mode='min') early_stopping = EarlyStopping(monitor='val_loss', patience=3, mode='min')
if strategy is not None: if strategy is not None:
trainer = Trainer( trainer = Trainer(
max_epochs=num_epochs, max_epochs=num_epochs,
accelerator="auto", accelerator="auto",
devices=len(num_gpus), devices=len(num_gpus),
precision=mixed_precision if use_fp16 else 32, precision=mixed_precision if use_fp16 else 32,
logger=wandb_logger, logger=wandb_logger,
callbacks=[checkpoint_callback, early_stopping], callbacks=[checkpoint_callback, early_stopping],
strategy=strategy strategy=strategy
) )
else: else:
trainer = Trainer( trainer = Trainer(
max_epochs=num_epochs, max_epochs=num_epochs,
accelerator="auto", accelerator="auto",
devices=len(num_gpus), devices=len(num_gpus),
precision=mixed_precision if use_fp16 else 32, precision=mixed_precision if use_fp16 else 32,
logger=wandb_logger, logger=wandb_logger,
callbacks=[checkpoint_callback, early_stopping], callbacks=[checkpoint_callback, early_stopping],
) )
trainer.fit(model, train_loader, val_loader) trainer.fit(model, train_loader, val_loader)
b.beep() b.beep()
wandb.finish() wandb.finish()
``` ```
%% Output %% Output
Program configuration: Program configuration:
Verbose Level: 1 Verbose Level: 1
Adding time to model output: True Adding time to model output: True
Dataset configuration: Dataset configuration:
Train Split Percentage: 100 Train Split Percentage: 100
Validation Split Percentage: 10 Validation Split Percentage: 10
Test Split Percentage: 10 Test Split Percentage: 10
Training configuration: Training configuration:
Number of training epochs: 8 Number of training epochs: 8
Number of k-folds: 2 Number of k-folds: 2
Batch size: 64 Batch size: 64
Mixed Precision: 16 Mixed Precision: 16
Using Lightning: True Using Lightning: True
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight'] Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). - This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight'] Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). - This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight'] Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). - This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training BertBiLSTM Training BertBiLSTM
Available GPUs: 1 Available GPUs: 1
C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\lightning_fabric\connector.py:555: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead! C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\lightning_fabric\connector.py:555: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
rank_zero_warn( rank_zero_warn(
Using 16bit Automatic Mixed Precision (AMP) Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs HPU available: False, using: 0 HPUs
C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:67: UserWarning: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:67: UserWarning: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
warning_cache.warn( warning_cache.warn(
C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:615: UserWarning: Checkpoint directory C:\Users\uwu\PycharmProjects\COMP3200\Models exists and is not empty. C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:615: UserWarning: Checkpoint directory C:\Users\uwu\PycharmProjects\COMP3200\Models exists and is not empty.
rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.") rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
| Name | Type | Params | Name | Type | Params
----------------------------------------- -----------------------------------------
0 | model | BertBiLSTM | 165 M 0 | model | BertBiLSTM | 165 M
1 | criterion | NLLLoss | 0 1 | criterion | NLLLoss | 0
----------------------------------------- -----------------------------------------
56.4 M Trainable params 56.4 M Trainable params
109 M Non-trainable params 109 M Non-trainable params
165 M Total params 165 M Total params
663.376 Total estimated model params size (MB) 663.376 Total estimated model params size (MB)
C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\pytorch_lightning\trainer\call.py:52: UserWarning: Detected KeyboardInterrupt, attempting graceful shutdown... C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\pytorch_lightning\trainer\call.py:52: UserWarning: Detected KeyboardInterrupt, attempting graceful shutdown...
rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...") rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x0000020B93049EE0> Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x0000020B93049EE0>
Traceback (most recent call last): Traceback (most recent call last):
File "C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\torch\utils\data\dataloader.py", line 1478, in __del__ File "C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\torch\utils\data\dataloader.py", line 1478, in __del__
self._shutdown_workers() self._shutdown_workers()
File "C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\torch\utils\data\dataloader.py", line 1436, in _shutdown_workers File "C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\torch\utils\data\dataloader.py", line 1436, in _shutdown_workers
if self._persistent_workers or self._workers_status[worker_id]: if self._persistent_workers or self._workers_status[worker_id]:
AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status' AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status'
C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\lightning_fabric\connector.py:555: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead! C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\lightning_fabric\connector.py:555: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
rank_zero_warn( rank_zero_warn(
Training BertDoubleDense Training BertDoubleDense
Available GPUs: 1 Available GPUs: 1
Using 16bit Automatic Mixed Precision (AMP) Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0] LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
| Name | Type | Params | Name | Type | Params
---------------------------------------------- ----------------------------------------------
0 | model | BertDoubleDense | 133 M 0 | model | BertDoubleDense | 133 M
1 | criterion | NLLLoss | 0 1 | criterion | NLLLoss | 0
---------------------------------------------- ----------------------------------------------
24.1 M Trainable params 24.1 M Trainable params
109 M Non-trainable params 109 M Non-trainable params
133 M Total params 133 M Total params
534.177 Total estimated model params size (MB) 534.177 Total estimated model params size (MB)
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
Load a model from a checkpoint (debugging from here): Load a model from a checkpoint (debugging from here):
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
model = BertLightning(BertSingleDense()) model = BertLightning(BertSingleDense())
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_cpus) test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_cpus)
batch_size = 8 # I am a mortal on local machine batch_size = 8 # I am a mortal on local machine
trainer = Trainer(devices=len(num_gpus), trainer = Trainer(devices=len(num_gpus),
accelerator="auto", accelerator="auto",
precision="16", precision="16",
load_from_checkpoint="Models/epoch=7-val_loss=0.86-rouge=0.00.ckpt") load_from_checkpoint="Models/epoch=7-val_loss=0.86-rouge=0.00.ckpt")
trainer.test(model, test_loader) trainer.test(model, test_loader)
``` ```
%% Output %% Output
Cell In [7], line 6 Cell In [7], line 6
precision="16",load_from_checkpoint("Models/epoch=7-val_loss=0.86-rouge=0.00.ckpt")) precision="16",load_from_checkpoint("Models/epoch=7-val_loss=0.86-rouge=0.00.ckpt"))
^ ^
SyntaxError: positional argument follows keyword argument SyntaxError: positional argument follows keyword argument
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
def evaluate_model_and_debug(model, data_loader, scorer, tokenizer, device='cpu'): def evaluate_model_and_debug(model, data_loader, scorer, tokenizer, device='cpu'):
# Set model to eval mode # Set model to eval mode
model.eval() model.eval()
# Init score tracker # Init score tracker
score_totals = [] score_totals = []
# Disable gradients for evaluation - performance # Disable gradients for evaluation - performance
with torch.no_grad(): with torch.no_grad():
# Init tqdm # Init tqdm
desc_string = "Evaluation" desc_string = "Evaluation"
progress_bar = tqdm(data_loader, desc=desc_string) progress_bar = tqdm(data_loader, desc=desc_string)
# Iterate over each batch # Iterate over each batch
for batch_id, batch in enumerate(progress_bar): for batch_id, batch in enumerate(progress_bar):
# Unpack batch into inputs and outputs # Unpack batch into inputs and outputs
input_ids = batch["input_ids"].to(device) input_ids = batch["input_ids"].to(device)
attention_mask = batch["attention_mask"].to(device) attention_mask = batch["attention_mask"].to(device)
labels = batch["labels"].to(device) labels = batch["labels"].to(device)
# Run Forward Pass # Run Forward Pass
outputs = model(input_ids=input_ids, attention_mask=attention_mask) outputs = model(input_ids=input_ids, attention_mask=attention_mask)
# Greedy decode for now # Greedy decode for now
max_values, predicted_indices = torch.max(outputs, dim=-1) max_values, predicted_indices = torch.max(outputs, dim=-1)
predicted_tokens = [[tokenizer.convert_ids_to_tokens(idx.item()) for idx in seq] for seq in predicted_indices] predicted_tokens = [[tokenizer.convert_ids_to_tokens(idx.item()) for idx in seq] for seq in predicted_indices]
# Argmax Decode # Argmax Decode
outputs_decoded = [tokenizer.decode(o, skip_special_tokens=True) for o in predicted_indices] outputs_decoded = [tokenizer.decode(o, skip_special_tokens=True) for o in predicted_indices]
labels_decoded = [tokenizer.decode(l, skip_special_tokens=True) for l in labels] labels_decoded = [tokenizer.decode(l, skip_special_tokens=True) for l in labels]
print(f"outputs shape: {outputs.shape}") print(f"outputs shape: {outputs.shape}")
print(f"labels shape: {labels.shape}") print(f"labels shape: {labels.shape}")
print(f"predicted_indices shape: {predicted_indices.shape}") print(f"predicted_indices shape: {predicted_indices.shape}")
print(f"predicted_indices[0] shape: {predicted_indices[0].shape}") print(f"predicted_indices[0] shape: {predicted_indices[0].shape}")
# Calculate performance score # Calculate performance score
score = scorer.compute(predictions=outputs_decoded, references=labels_decoded) score = scorer.compute(predictions=outputs_decoded, references=labels_decoded)
score_totals.append(score) score_totals.append(score)
# Updates progress bar text # Updates progress bar text
progress_bar.set_postfix({f'Batch {batch_id} Score': score}) progress_bar.set_postfix({f'Batch {batch_id} Score': score})
# Return scores list of dictionaries # Return scores list of dictionaries
return score_totals return score_totals
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
model = BertSingleDense().to(device) model = BertSingleDense().to(device)
checkpoint_path = "/content/Models/BertSingleDense/BertSingleDense_best.pt" checkpoint_path = "/content/Models/BertSingleDense/BertSingleDense_best.pt"
checkpoint_dir = os.path.dirname(checkpoint_path) checkpoint_dir = os.path.dirname(checkpoint_path)
model.load_state_dict(torch.load(checkpoint_path)) model.load_state_dict(torch.load(checkpoint_path))
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased') tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
rouge_score = evaluate.load("rouge") rouge_score = evaluate.load("rouge")
validation_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='validation', validation_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='validation',
split_percentage=1, verbose=verbose) split_percentage=1, verbose=verbose)
validation_data_loader = DataLoader(validation_dataset, batch_size=16) validation_data_loader = DataLoader(validation_dataset, batch_size=16)
model_evaluate_scores = evaluate_model_and_debug(model, validation_data_loader, model_evaluate_scores = evaluate_model_and_debug(model, validation_data_loader,
rouge_score, tokenizer, device=device) rouge_score, tokenizer, device=device)
print(model_evaluate_scores) print(model_evaluate_scores)
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment