Skip to content
Snippets Groups Projects
Commit 62981f71 authored by eca1g19's avatar eca1g19
Browse files

Fixed wandb

parent d4a7a78a
Branches
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
Memory Check
%% Cell type:code id: tags:
``` python
!nvidia-smi
```
%% Output
Wed Jun 14 17:04:39 2023
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 531.29 Driver Version: 531.29 CUDA Version: 12.1 |
|-----------------------------------------+----------------------+----------------------+
| GPU Name TCC/WDDM | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+======================+======================|
| 0 NVIDIA GeForce RTX 2080 Ti WDDM | 00000000:0E:00.0 On | N/A |
| 41% 49C P8 43W / 260W| 2505MiB / 11264MiB | 17% Default |
| | | N/A |
+-----------------------------------------+----------------------+----------------------+
+---------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=======================================================================================|
| 0 N/A N/A 2412 C+G ...inaries\Win64\EpicGamesLauncher.exe N/A |
| 0 N/A N/A 3144 C+G ...a\Local\Mozilla Firefox\firefox.exe N/A |
| 0 N/A N/A 3400 C+G ..._x64__kzf8qxf38zg5c\Skype\Skype.exe N/A |
| 0 N/A N/A 3752 C+G ...GeForce Experience\NVIDIA Share.exe N/A |
| 0 N/A N/A 4240 C+G ...1.0_x64__8wekyb3d8bbwe\Video.UI.exe N/A |
| 0 N/A N/A 6468 C+G ....Search_cw5n1h2txyewy\SearchApp.exe N/A |
| 0 N/A N/A 6828 C+G ...rm 2020.3.3\jbr\bin\jcef_helper.exe N/A |
| 0 N/A N/A 9500 C+G ....0_x64__8wekyb3d8bbwe\HxOutlook.exe N/A |
| 0 N/A N/A 9780 C+G ..._x64__kzf8qxf38zg5c\Skype\Skype.exe N/A |
| 0 N/A N/A 11628 C+G C:\Windows\explorer.exe N/A |
| 0 N/A N/A 12416 C+G ...2txyewy\StartMenuExperienceHost.exe N/A |
| 0 N/A N/A 14040 C+G ...302.5.0_x64__8wekyb3d8bbwe\Time.exe N/A |
| 0 N/A N/A 14792 C+G ...GeForce Experience\NVIDIA Share.exe N/A |
| 0 N/A N/A 16016 C+G ...CBS_cw5n1h2txyewy\TextInputHost.exe N/A |
| 0 N/A N/A 16612 C+G ...ft Office\root\Office16\OUTLOOK.EXE N/A |
| 0 N/A N/A 17024 C+G ....Search_cw5n1h2txyewy\SearchApp.exe N/A |
| 0 N/A N/A 17124 C+G ...oogle\Chrome\Application\chrome.exe N/A |
| 0 N/A N/A 17368 C+G ...l\Microsoft\Teams\current\Teams.exe N/A |
| 0 N/A N/A 20412 C+G ...on\114.0.1823.43\msedgewebview2.exe N/A |
| 0 N/A N/A 20660 C+G ...air\Corsair iCUE5 Software\iCUE.exe N/A |
| 0 N/A N/A 23044 C+G ...\cef\cef.win7x64\steamwebhelper.exe N/A |
| 0 N/A N/A 23360 C+G ...Canary\app-1.0.66\DiscordCanary.exe N/A |
| 0 N/A N/A 24680 C+G ...ne\Binaries\Win64\EpicWebHelper.exe N/A |
| 0 N/A N/A 25200 C+G ...on\wallpaper_engine\wallpaper32.exe N/A |
| 0 N/A N/A 25596 C+G ...e Stream\76.0.3.0\GoogleDriveFS.exe N/A |
| 0 N/A N/A 25952 C+G ..._8wekyb3d8bbwe\Microsoft.Photos.exe N/A |
| 0 N/A N/A 26716 C+G C:\Program Files\RaiderIO\RaiderIO.exe N/A |
| 0 N/A N/A 27700 C+G ...les (x86)\Overwolf\old_Overwolf.exe N/A |
| 0 N/A N/A 28444 C+G ...cordPTB\app-1.0.1027\DiscordPTB.exe N/A |
| 0 N/A N/A 29192 C+G ...les (x86)\Battle.net\Battle.net.exe N/A |
| 0 N/A N/A 31192 C+G ...wolf\0.223.0.33\OverwolfBrowser.exe N/A |
| 0 N/A N/A 31576 C+G C:\Program Files\NordVPN\NordVPN.exe N/A |
| 0 N/A N/A 31956 C+G ...ekyb3d8bbwe\PhoneExperienceHost.exe N/A |
| 0 N/A N/A 32976 C+G ...ft Office\root\Office16\WINWORD.EXE N/A |
| 0 N/A N/A 34400 C+G ...02.0_x86__zpdnekdrzrea0\Spotify.exe N/A |
| 0 N/A N/A 34932 C+G ...ft Office\root\Office16\ONENOTE.EXE N/A |
| 0 N/A N/A 34944 C+G ...5n1h2txyewy\ShellExperienceHost.exe N/A |
| 0 N/A N/A 37420 C+G ...l\Microsoft\Teams\current\Teams.exe N/A |
| 0 N/A N/A 37968 C+G ...al\Discord\app-1.0.9013\Discord.exe N/A |
| 0 N/A N/A 38508 C+G ...t.LockApp_cw5n1h2txyewy\LockApp.exe N/A |
| 0 N/A N/A 42416 C+G ...ft Office\root\Office16\WINWORD.EXE N/A |
| 0 N/A N/A 42952 C+G ...crosoft\Edge\Application\msedge.exe N/A |
| 0 N/A N/A 44812 C+G ...cal\Microsoft\OneDrive\OneDrive.exe N/A |
| 0 N/A N/A 47144 C+G ...a\Local\Mozilla Firefox\firefox.exe N/A |
| 0 N/A N/A 47776 C+G ...siveControlPanel\SystemSettings.exe N/A |
| 0 N/A N/A 49192 C+G ...0_x64__8wekyb3d8bbwe\HxAccounts.exe N/A |
| 0 N/A N/A 49296 C+G ...sair iCUE5 Software\QmlRenderer.exe N/A |
+---------------------------------------------------------------------------------------+
%% Cell type:markdown id: tags:
Pip Installs
%% Cell type:code id: tags:
``` python
#should be handled by requirements.txt - but isnt
import os
package_install_override = False
if False and not os.path.exists("installedRepos") or package_install_override:
!pip install absl-py
!pip install aiohttp
!pip install aiosignal
!pip install async-timeout
!pip install cachetools
!pip install certifi
!pip install click
!pip install datasets
!pip install dill
!pip install evaluate
!pip install filelock
!pip install fonttools
!pip install frozenlist
!pip install fsspec
!pip install google-api-core
!pip install google-api-python-client
!pip install google-auth
!pip install google-auth-httplib2
!pip install googleapis-common-protos
!pip install httplib2
!pip install huggingface-hub
!pip install ipython-genutils
!pip install joblib
!pip install Jupyter-Beeper
!pip install lightning-utilities
!pip install mkl-fft
!pip install mkl-random
!pip install mkl-service
!pip install mpmath
!pip install multidict
!pip install multiprocess
!pip install munkres
!pip install networkx
!pip install nltk
!pip install oauth2client
!pip install pandas
!pip install Pillow
!pip install ply
!pip install protobuf
!pip install pyarrow
!pip install pyasn1
!pip install pyasn1-modules
!pip install PyDrive
!pip install pyenchant
!pip install PyQt5
!pip install pytorch-beam-search
!pip install pytorch-lightning
!pip install pywin32
!pip install PyYAML
!pip install pyzmq
!pip install regex
!pip install responses
!pip install rouge-score
!pip install rsa
!pip install scikit-learn
!pip install scipy
!pip install sentencepiece
!pip install seqeval
!pip install sympy
!pip install threadpoolctl
!pip install tokenizers
!pip install torch
!pip install torch-utils
!pip install torchaudio
!pip install torchdata
!pip install torchmetrics
!pip install torchtext
!pip install torchvision
!pip install transformers
!pip install uritemplate
!pip install webencodings
!pip install wincertstore
!pip install xxhash
!pip install yarl
print("Installed all Packages!")
f = open("installedRepos", "w")
f.close()
else:
print("Packages should be installed already. If this is incorrect, change the override and re-run.")
package_install_override = False
```
%% Output
Packages should be installed already. If this is incorrect, change the override and re-run.
%% Cell type:markdown id: tags:
#WandB Login
%% Cell type:code id: tags:
``` python
!pip install wandb
import wandb
wandb.login()
```
%% Cell type:markdown id: tags:
Set Git Creds
%% Cell type:code id: tags:
``` python
!git config --global user.name "Ethan Aherne"
!git config --global user.email "eca1g19@soton.ac.uk"
!git config --get user.name
!git config --get user.email
```
%% Output
Ethan Aherne
eca1g19@soton.ac.uk
%% Cell type:markdown id: tags:
Imports
%% Cell type:code id: tags:
``` python
from sklearn.model_selection import KFold
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
import scipy.stats as stats
import evaluate
from torch.utils.data import Dataset
from transformers import BertTokenizerFast
from datasets import load_dataset
from tqdm import tqdm # This should be removed but in case it breaks everything here it will stay
import torch.nn as nn
import torch
import warnings
from tqdm.notebook import trange, tqdm
import numpy as np
from torch.utils.data import ConcatDataset
import string_utils
import train_utils
from lightning_models import BertLightning, Seq2SeqLightning
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from lightning_fabric.loggers import CSVLogger
import time
import jupyter_beeper
from base_models import BertSingleDense, BertDoubleDense, BertBiLSTM
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import WandbLogger, wandb
from sequence_to_sequence_models import EncoderDecoderBase, FrozenBertEncoder, SingleDenseBertDecoder, \
DoubleDenseBertDecoder, BiLSTMBertDecoder
```
%% Cell type:markdown id: tags:
Output cuda/cpu
%% Cell type:code id: tags:
``` python
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f'==============')
print(f"RUNNING ON {device.upper()}")
print(f'==============')
```
%% Output
==============
RUNNING ON CUDA
==============
%% Cell type:markdown id: tags:
Program Config
%% Cell type:code id: tags:
``` python
verbose = 1
add_time_to_model_name = True
```
%% Cell type:markdown id: tags:
Training config
%% Cell type:code id: tags:
``` python
num_epochs = 8
num_k_folds = 2 # For Cross-Validating to assess model performance
batch_size = 64
gradient_accumulation_steps = 1
```
%% Cell type:markdown id: tags:
# Dataset splits config
Set percentages of each pre-split portion of the cnn database to use
%% Cell type:code id: tags:
``` python
train_split_percentage = 100 # percentage of SPLIT
validate_split_percentage = 10
test_split_percentage = 10
```
%% Cell type:markdown id: tags:
Lightning Config
%% Cell type:code id: tags:
``` python
use_lightning = True
use_fp16 = True
mixed_precision=16
```
%% Cell type:markdown id: tags:
Print Configurations if verbose
%% Cell type:code id: tags:
``` python
def output_config():
print(f"Program configuration:")
print(f"Verbose Level: {verbose}")
print(f"Adding time to model output: {add_time_to_model_name}\n")
print(f"Dataset configuration:")
print(f"Train Split Percentage: {train_split_percentage}")
print(f"Validation Split Percentage: {validate_split_percentage}")
print(f"Test Split Percentage: {test_split_percentage}\n")
print(f"Training configuration:")
print(f"Number of training epochs: {num_epochs}")
print(f"Number of k-folds: {num_k_folds}")
print(f"Batch size: {batch_size}")
# TODO Implement
#print(f"Gradient accumulation steps: {gradient_accumulation_steps}")
#print(f"Effective Batch Size: {gradient_accumulation_steps * batch_size}\n")
print(f"Mixed Precision: {mixed_precision}")
print(f"Using Lightning: {use_lightning}")
if verbose > 0:
output_config()
# TODO Implement
#print(f"Gradient accumulation steps: {gradient_accumulation_steps}")
#print(f"Effective Batch Size: {gradient_accumulation_steps * batch_size}\n")
```
%% Output
Program configuration:
Verbose Level: 1
Adding time to model output: True
Dataset configuration:
Train Split Percentage: 100
Validation Split Percentage: 10
Test Split Percentage: 10
Training configuration:
Number of training epochs: 8
Number of k-folds: 2
Batch size: 64
Mixed Precision: 16
Using Lightning: True
%% Cell type:markdown id: tags:
Tokenizer Init and config
%% Cell type:code id: tags:
``` python
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
```
%% Cell type:markdown id: tags:
Custom CNN Dailymail Dataset Class
%% Cell type:code id: tags:
``` python
from cnn_dailymail_dataset import CNNDailyMailDataset
```
%% Cell type:markdown id: tags:
Bert Class Initialization
%% Cell type:code id: tags:
``` python
from base_models import BertSingleDense, BertDoubleDense, BertBiLSTM
```
%% Cell type:markdown id: tags:
Load Bert Tokenizer Fast
%% Cell type:code id: tags:
``` python
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
```
%% Cell type:markdown id: tags:
Load Datasets
%% Cell type:code id: tags:
``` python
print(f"Dataset configuration:")
print(f"Train Split Percentage: {train_split_percentage}")
print(f"Validation Split Percentage: {validate_split_percentage}")
print(f"Test Split Percentage: {test_split_percentage}\n")
train_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='train', split_percentage=train_split_percentage, verbose=verbose)
validation_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='validation',
split_percentage=validate_split_percentage, verbose=verbose)
test_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='test', split_percentage=test_split_percentage,
verbose=verbose)
```
%% Output
Dataset configuration:
Train Split Percentage: 100
Validation Split Percentage: 10
Test Split Percentage: 10
Loading cnn_dailymail dataset 3.0.0 with split type: train[:100%]
Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)
Loading cnn_dailymail dataset 3.0.0 with split type: validation[:10%]
Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)
Loading cnn_dailymail dataset 3.0.0 with split type: test[:10%]
Found cached dataset cnn_dailymail (C:/Users/uwu/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/1b3c71476f6d152c31c1730e83ccb08bcf23e348233f4fcc11e182248e6bf7de)
%% Cell type:markdown id: tags:
Load Datasets into dataloaders
%% Cell type:code id: tags:
``` python
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
```
%% Cell type:markdown id: tags:
Define Model Object and config
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
Define KFold Object, set to None if not cross validating
%% Cell type:code id: tags:
``` python
cross_validation_k_folder = KFold(n_splits=num_k_folds) if num_k_folds > 0 else None
```
%% Cell type:markdown id: tags:
Define loss function object
%% Cell type:code id: tags:
``` python
criterion = CrossEntropyLoss()
```
%% Cell type:markdown id: tags:
Load Rouge scorer
%% Cell type:code id: tags:
``` python
rouge_score = evaluate.load("rouge")
```
%% Cell type:markdown id: tags:
## Analyze Model scores
input is list of dictionary of scores on each
%% Cell type:markdown id: tags:
Train the model
%% Cell type:code id: tags:
``` python
output_config()
num_cpus = os.cpu_count()
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_cpus)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, num_workers=num_cpus)
b = jupyter_beeper.Beeper()
b.beep()
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
# Load Datasets
train_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='train', split_percentage=train_split_percentage,
verbose=verbose)
validation_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='validation',
split_percentage=validate_split_percentage, verbose=verbose)
test_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='test', split_percentage=test_split_percentage,
verbose=verbose)
encoder = FrozenBertEncoder()
# Define Model Object
decoders = [SingleDenseBertDecoder(),DoubleDenseBertDecoder(), BiLSTMBertDecoder()]
# Define KFold Object, set to None if not cross validating
cross_validation_k_fold = KFold(n_splits=num_k_folds) if num_k_folds > 0 else None
# Define Optimizer (AdamW) - Filters to only optimize params that are not frozen (i.e. not bert)
# Define loss function object
criterion = nn.NLLLoss()
num_cpus = os.cpu_count()
num_gpus = [torch.cuda.device(i) for i in range(torch.cuda.device_count())]
if num_gpus>= 8:
print("POWAAAAAA")
strategy = "ddp_notebook"
else:
strategy = None
# Load Datasets into data-loaders
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_cpus)
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_cpus)
val_loader = DataLoader(validation_dataset, batch_size=batch_size, num_workers=num_cpus)
output_config()
wandb.login()
b = jupyter_beeper.Beeper()
b.beep()
torch.set_float32_matmul_precision("high")
if cross_validation_k_folder is not None:
for fold, (train_idx, val_idx) in enumerate(cross_validation_k_folder.split(train_dataset)):
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
val_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_cpus)
val_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=val_sampler, num_workers=num_cpus)
encoder = FrozenBertEncoder()
for decoder in decoders:
model = EncoderDecoderBase(encoder=encoder, decoder=decoder)
model = Seq2SeqLightning(model)
wandb_logger = WandbLogger(name=f"seq2seq_lightning_fold_{fold}", project="seq2seq_lightning")
checkpoint_callback = ModelCheckpoint(
monitor='val_loss',
dirpath=f'checkpoints_fold_{fold}',
filename='seq2seq-{epoch:02d}-{val_loss:.2f}',
save_top_k=3,
mode='min',
)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, mode='min')
if strategy is not None:
trainer = Trainer(
max_epochs=num_epochs,
accelerator="auto",
devices=len(num_gpus),
precision=mixed_precision if use_fp16 else 32,
logger=wandb_logger,
callbacks=[checkpoint_callback, early_stopping],
strategy=strategy
)
else:
trainer = Trainer(
max_epochs=num_epochs,
accelerator="auto",
devices=len(num_gpus),
precision=mixed_precision if use_fp16 else 32,
logger=wandb_logger,
callbacks=[checkpoint_callback, early_stopping],
)
trainer.fit(model, train_loader, val_loader)
else:
for decoder in decoders:
model = EncoderDecoderBase(encoder=encoder, decoder=decoder)
model = Seq2SeqLightning(model)
wandb_logger = WandbLogger(name="seq2seq_lightning_run", project="seq2seq_lightning")
checkpoint_callback = ModelCheckpoint(
monitor='val_loss',
dirpath='checkpoints',
filename='seq2seq-{epoch:02d}-{val_loss:.2f}',
save_top_k=3,
mode='min',
)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, mode='min')
if strategy is not None:
trainer = Trainer(
max_epochs=num_epochs,
accelerator="auto",
devices=len(num_gpus),
precision=mixed_precision if use_fp16 else 32,
logger=wandb_logger,
callbacks=[checkpoint_callback, early_stopping],
strategy=strategy
)
else:
trainer = Trainer(
max_epochs=num_epochs,
accelerator="auto",
devices=len(num_gpus),
precision=mixed_precision if use_fp16 else 32,
logger=wandb_logger,
callbacks=[checkpoint_callback, early_stopping],
)
trainer.fit(model, train_loader, val_loader)
b.beep()
wandb.finish()
```
%% Output
Program configuration:
Verbose Level: 1
Adding time to model output: True
Dataset configuration:
Train Split Percentage: 100
Validation Split Percentage: 10
Test Split Percentage: 10
Training configuration:
Number of training epochs: 8
Number of k-folds: 2
Batch size: 64
Mixed Precision: 16
Using Lightning: True
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training BertBiLSTM
Available GPUs: 1
C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\lightning_fabric\connector.py:555: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
rank_zero_warn(
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\pytorch_lightning\trainer\connectors\logger_connector\logger_connector.py:67: UserWarning: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
warning_cache.warn(
C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:615: UserWarning: Checkpoint directory C:\Users\uwu\PycharmProjects\COMP3200\Models exists and is not empty.
rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
| Name | Type | Params
-----------------------------------------
0 | model | BertBiLSTM | 165 M
1 | criterion | NLLLoss | 0
-----------------------------------------
56.4 M Trainable params
109 M Non-trainable params
165 M Total params
663.376 Total estimated model params size (MB)
C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\pytorch_lightning\trainer\call.py:52: UserWarning: Detected KeyboardInterrupt, attempting graceful shutdown...
rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x0000020B93049EE0>
Traceback (most recent call last):
File "C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\torch\utils\data\dataloader.py", line 1478, in __del__
self._shutdown_workers()
File "C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\torch\utils\data\dataloader.py", line 1436, in _shutdown_workers
if self._persistent_workers or self._workers_status[worker_id]:
AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status'
C:\Users\uwu\miniconda3\envs\uni\lib\site-packages\lightning_fabric\connector.py:555: UserWarning: 16 is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
rank_zero_warn(
Training BertDoubleDense
Available GPUs: 1
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
| Name | Type | Params
----------------------------------------------
0 | model | BertDoubleDense | 133 M
1 | criterion | NLLLoss | 0
----------------------------------------------
24.1 M Trainable params
109 M Non-trainable params
133 M Total params
534.177 Total estimated model params size (MB)
%% Cell type:markdown id: tags:
Load a model from a checkpoint (debugging from here):
%% Cell type:code id: tags:
``` python
model = BertLightning(BertSingleDense())
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_cpus)
batch_size = 8 # I am a mortal on local machine
trainer = Trainer(devices=len(num_gpus),
accelerator="auto",
precision="16",
load_from_checkpoint="Models/epoch=7-val_loss=0.86-rouge=0.00.ckpt")
trainer.test(model, test_loader)
```
%% Output
Cell In [7], line 6
precision="16",load_from_checkpoint("Models/epoch=7-val_loss=0.86-rouge=0.00.ckpt"))
^
SyntaxError: positional argument follows keyword argument
%% Cell type:code id: tags:
``` python
def evaluate_model_and_debug(model, data_loader, scorer, tokenizer, device='cpu'):
# Set model to eval mode
model.eval()
# Init score tracker
score_totals = []
# Disable gradients for evaluation - performance
with torch.no_grad():
# Init tqdm
desc_string = "Evaluation"
progress_bar = tqdm(data_loader, desc=desc_string)
# Iterate over each batch
for batch_id, batch in enumerate(progress_bar):
# Unpack batch into inputs and outputs
input_ids = batch["input_ids"].to(device)
attention_mask = batch["attention_mask"].to(device)
labels = batch["labels"].to(device)
# Run Forward Pass
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
# Greedy decode for now
max_values, predicted_indices = torch.max(outputs, dim=-1)
predicted_tokens = [[tokenizer.convert_ids_to_tokens(idx.item()) for idx in seq] for seq in predicted_indices]
# Argmax Decode
outputs_decoded = [tokenizer.decode(o, skip_special_tokens=True) for o in predicted_indices]
labels_decoded = [tokenizer.decode(l, skip_special_tokens=True) for l in labels]
print(f"outputs shape: {outputs.shape}")
print(f"labels shape: {labels.shape}")
print(f"predicted_indices shape: {predicted_indices.shape}")
print(f"predicted_indices[0] shape: {predicted_indices[0].shape}")
# Calculate performance score
score = scorer.compute(predictions=outputs_decoded, references=labels_decoded)
score_totals.append(score)
# Updates progress bar text
progress_bar.set_postfix({f'Batch {batch_id} Score': score})
# Return scores list of dictionaries
return score_totals
```
%% Cell type:code id: tags:
``` python
model = BertSingleDense().to(device)
checkpoint_path = "/content/Models/BertSingleDense/BertSingleDense_best.pt"
checkpoint_dir = os.path.dirname(checkpoint_path)
model.load_state_dict(torch.load(checkpoint_path))
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
rouge_score = evaluate.load("rouge")
validation_dataset = CNNDailyMailDataset(tokenizer=tokenizer, split_type='validation',
split_percentage=1, verbose=verbose)
validation_data_loader = DataLoader(validation_dataset, batch_size=16)
model_evaluate_scores = evaluate_model_and_debug(model, validation_data_loader,
rouge_score, tokenizer, device=device)
print(model_evaluate_scores)
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment