latr
latr copied to clipboard
Question about prediction a sample
Hi Mr. @uakarsh In one of your source code files named "LaTr TextVQA Training with WandB 💥.ipynb" I was worked on it. Now I want to predict samples I use this docs and add this source code but I can't predict samples.
The steps I have done are as follows
1) I add predict_dataloader in DataModule
class DataModule(pl.LightningDataModule):
def __init__(self, train_dataset, val_dataset, batch_size = 1):
super(DataModule, self).__init__()
self.train_dataset = train_dataset
self.val_dataset = val_dataset
self.batch_size = batch_size
## The parameters for Dataloader have been taken from here: https://docs.mosaicml.com/en/v0.7.1/trainer/dataloaders.html#passing-a-pytorch-dataloader
def train_dataloader(self):
return DataLoader(self.train_dataset, batch_size = self.batch_size,
collate_fn = collate_fn, shuffle = True, num_workers = 2, pin_memory = True)
def val_dataloader(self):
return DataLoader(self.val_dataset, batch_size = self.batch_size,
collate_fn = collate_fn, shuffle = False, num_workers = 2, pin_memory = True)
def test_dataloader(self):
return DataLoader(self.val_dataset, batch_size = self.batch_size,
collate_fn = collate_fn, shuffle = False, num_workers = 2, pin_memory = True)
def predict_dataloader(self):
return DataLoader(self.val_dataset, batch_size = self.batch_size,
collate_fn = collate_fn, shuffle = False, num_workers = 2, pin_memory = True)
2) Then I added predict_step as below
def polynomial(base_lr, iter, max_iter = 1e5, power = 1):
return base_lr * ((1 - float(iter) / max_iter) ** power)
class LaTrForVQA(pl.LightningModule):
def __init__(self, config , learning_rate = 1e-4, max_steps = 100000//2):
super(LaTrForVQA, self).__init__()
self.config = config
self.save_hyperparameters()
self.latr = LaTr_for_finetuning(config)
self.training_losses = []
self.validation_losses = []
self.max_steps = max_steps
def configure_optimizers(self):
return torch.optim.AdamW(self.parameters(), lr = self.hparams['learning_rate'])
def forward(self, batch_dict):
boxes = batch_dict['boxes']
img = batch_dict['img']
question = batch_dict['question']
words = batch_dict['tokenized_words']
answer_vector = self.latr(lang_vect = words,
spatial_vect = boxes,
img_vect = img,
quest_vect = question
)
return answer_vector
def calculate_metrics(self, prediction, labels):
## Calculate the accuracy score between the prediction and ground label for a batch, with considering the pad sequence
batch_size = len(prediction)
ac_score = 0
for (pred, gt) in zip(prediction, labels):
ac_score+= calculate_acc_score(pred.detach().cpu(), gt.detach().cpu())
ac_score = ac_score/batch_size
return ac_score
def training_step(self, batch, batch_idx):
answer_vector = self.forward(batch)
## https://discuss.huggingface.co/t/bertformaskedlm-s-loss-and-scores-how-the-loss-is-computed/607/2
loss = nn.CrossEntropyLoss()(answer_vector.reshape(-1,self.config['classes']), batch['answer'].reshape(-1))
_, preds = torch.max(answer_vector, dim = -1)
## Calculating the accuracy score
train_acc = self.calculate_metrics(preds, batch['answer'])
train_acc = torch.tensor(train_acc)
## Logging
self.log('train_ce_loss', loss,prog_bar = True)
self.log('train_acc', train_acc, prog_bar = True)
self.training_losses.append(loss.item())
return loss
def validation_step(self, batch, batch_idx):
logits = self.forward(batch)
loss = nn.CrossEntropyLoss()(logits.reshape(-1,self.config['classes']), batch['answer'].reshape(-1))
_, preds = torch.max(logits, dim = -1)
## Validation Accuracy
val_acc = self.calculate_metrics(preds.cpu(), batch['answer'].cpu())
val_acc = torch.tensor(val_acc)
## Logging
self.log('val_ce_loss', loss, prog_bar = True)
self.log('val_acc', val_acc, prog_bar = True)
return {'val_loss': loss, 'val_acc': val_acc}
## For the fine-tuning stage, Warm-up period is set to 1,000 steps and again is linearly decayed to zero, pg. 12, of the paper
## Refer here: https://github.com/Lightning-AI/lightning/issues/328#issuecomment-550114178
def optimizer_step(self, epoch_nb, batch_nb, optimizer, optimizer_i, opt_closure = None, on_tpu=False,
using_native_amp=False, using_lbfgs=False):
## Warmup for 1000 steps
if self.trainer.global_step < 1000:
lr_scale = min(1., float(self.trainer.global_step + 1) / 1000.)
for pg in optimizer.param_groups:
pg['lr'] = lr_scale * self.hparams.learning_rate
## Linear Decay
else:
for pg in optimizer.param_groups:
pg['lr'] = polynomial(self.hparams.learning_rate, self.trainer.global_step, max_iter = self.max_steps)
optimizer.step(opt_closure)
optimizer.zero_grad()
def predict_step(self, batch, batch_idx):
answer_vector = self.forward(batch)
pred = torch.max(answer_vector, dim = -1)
return pred
def validation_epoch_end(self, outputs):
val_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
val_acc = torch.stack([x['val_acc'] for x in outputs]).mean()
self.log('val_loss_epoch_end', val_loss, on_epoch=True, sync_dist=True)
self.log('val_acc_epoch_end', val_acc, on_epoch=True, sync_dist=True)
self.val_prediction = []
3) In the last step I added predictor as below
preds = trainer.predict(model, datamodule)
preds = torch.concat(preds)
preds = preds.argmax(axis=1)
preds[:5]
tensor([5, 9, 9, 6, 1])
But when I runned it I have error as follow
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 100%
79/79 [00:20<00:00, 3.85it/s]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
[<ipython-input-61-02e3e8378def>](https://localhost:8080/#) in <module>
1 preds = trainer.predict(model, datamodule)
2
----> 3 preds = torch.concat(preds)
4
5 preds = preds.argmax(axis=1)
### TypeError: expected Tensor as element 0 in argument 0, but got torch.return_types.max
I don't know if my path is right or not and if it is true how to solve this problem.
Would u mind helping me with this case?
There is a small mistake in the predict_step
in the LatrForVQA class, and that is torch.max returns two values values, and indices, so you can fix the problem in two ways
- Instead of returning
pred
, you can returnpred.indices
, assuming you need the maximum argument - Once you got the answer vector, you can return
torch.argmax(answer_vector, axis = -1)
, assuming you need the argument along the last dimension
Hope it helps.
Regards, Akarsh
Hi @uakarsh Frist of all I really sorry for delay in reply because in this time I was working in your idea, but unfortunately, I couldn't solve my problem. So, I want to help me again of course I'd be appreciated very much.
- in part of 1 you mention instead of my source I return pred.indices and in part of 2 you mention I return torch.argmax(answer_vector, axis = -1). Do you mean the same changes that I have made in the code below? if it is true, I encounterred an error and attach that error. I try to solve that, but I couldn't.
class LaTrForVQA(pl.LightningModule):
def __init__(self, config , learning_rate = 1e-4, max_steps = 100000//2):
super(LaTrForVQA, self).__init__()
self.config = config
self.save_hyperparameters()
self.latr = LaTr_for_finetuning(config)
self.training_losses = []
self.validation_losses = []
self.max_steps = max_steps
def configure_optimizers(self):
return torch.optim.AdamW(self.parameters(), lr = self.hparams['learning_rate'])
def forward(self, batch_dict):
boxes = batch_dict['boxes']
img = batch_dict['img']
question = batch_dict['question']
words = batch_dict['tokenized_words']
answer_vector = self.latr(lang_vect = words,
spatial_vect = boxes,
img_vect = img,
quest_vect = question
)
return answer_vector
def calculate_metrics(self, prediction, labels):
## Calculate the accuracy score between the prediction and ground label for a batch, with considering the pad sequence
batch_size = len(prediction)
ac_score = 0
for (pred, gt) in zip(prediction, labels):
ac_score+= calculate_acc_score(pred.detach().cpu(), gt.detach().cpu())
ac_score = ac_score/batch_size
return ac_score
def training_step(self, batch, batch_idx):
answer_vector = self.forward(batch)
## https://discuss.huggingface.co/t/bertformaskedlm-s-loss-and-scores-how-the-loss-is-computed/607/2
loss = nn.CrossEntropyLoss()(answer_vector.reshape(-1,self.config['classes']), batch['answer'].reshape(-1))
_, preds = torch.max(answer_vector, dim = -1)
## Calculating the accuracy score
train_acc = self.calculate_metrics(preds, batch['answer'])
train_acc = torch.tensor(train_acc)
## Logging
self.log('train_ce_loss', loss,prog_bar = True)
self.log('train_acc', train_acc, prog_bar = True)
self.training_losses.append(loss.item())
return loss
def validation_step(self, batch, batch_idx):
logits = self.forward(batch)
loss = nn.CrossEntropyLoss()(logits.reshape(-1,self.config['classes']), batch['answer'].reshape(-1))
_, preds = torch.max(logits, dim = -1)
## Validation Accuracy
val_acc = self.calculate_metrics(preds.cpu(), batch['answer'].cpu())
val_acc = torch.tensor(val_acc)
## Logging
self.log('val_ce_loss', loss, prog_bar = True)
self.log('val_acc', val_acc, prog_bar = True)
return {'val_loss': loss, 'val_acc': val_acc}
## For the fine-tuning stage, Warm-up period is set to 1,000 steps and again is linearly decayed to zero, pg. 12, of the paper
## Refer here: https://github.com/Lightning-AI/lightning/issues/328#issuecomment-550114178
def optimizer_step(self, epoch_nb, batch_nb, optimizer, optimizer_i, opt_closure = None, on_tpu=False,
using_native_amp=False, using_lbfgs=False):
## Warmup for 1000 steps
if self.trainer.global_step < 1000:
lr_scale = min(1., float(self.trainer.global_step + 1) / 1000.)
for pg in optimizer.param_groups:
pg['lr'] = lr_scale * self.hparams.learning_rate
## Linear Decay
else:
for pg in optimizer.param_groups:
pg['lr'] = polynomial(self.hparams.learning_rate, self.trainer.global_step, max_iter = self.max_steps)
optimizer.step(opt_closure)
optimizer.zero_grad()
def predict_step(self, batch, batch_idx):
answer_vector = self.forward(batch)
print("answer_vector shape :",answer_vector.shape)
print("val [0,0,0]",)
pred,indices = torch.argmax(answer_vector, axis = -1)
#print(pred.shape)
#print(pred[0])
return pred
when I run
preds,indices = trainer.predict(model, datamodule)
I encountered below error
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting DataLoader 0: 0%
0/79 [00:00<?, ?it/s]
answer_vector shape : torch.Size([1, 512, 32128])
val [0,0,0]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
[<ipython-input-38-20865f6cb5d4>](https://localhost:8080/#) in <module>
----> 1 preds = trainer.predict(model, datamodule)
13 frames
[<ipython-input-30-a8120dfbe56d>](https://localhost:8080/#) in predict_step(self, batch, batch_idx)
108 print("answer_vector shape :",answer_vector.shape)
109 print("val [0,0,0]",)
--> 110 pred,indices = torch.argmax(answer_vector, axis = -1)
111 #print(pred.shape)
112 #print(pred[0])
ValueError: not enough values to unpack (expected 2, got 1)
Is it possible that I did not understand your meaning correctly? Could I please write the correct source code for prediction with a sample?
- After my problem is solved, my question will be that if I want to make a prediction, if it be from data loader how many predictions of answers it will return ?
- After my problem is solved, my question will be that, if I must first give a question and an image as input and output will be an answer. is that correct? How do I do this?
Best regards,