novelty-detection
novelty-detection copied to clipboard
Loss exploding after few steps
`from functools import reduce from operator import mul from typing import Tuple import numpy as np import torch import torchvision import torch.nn as nn from models.loss_functions.lsaloss import LSALoss from models.base import BaseModule from models.blocks_2d import DownsampleBlock from models.blocks_2d import ResidualBlock from models.blocks_2d import UpsampleBlock from models.estimator_1D import Estimator1D import cv2
class Encoder(BaseModule): """ CIFAR10 model encoder. """ def init(self, input_shape, code_length): # type: (Tuple[int, int, int], int) -> None """ Class constructor:
:param input_shape: the shape of CIFAR10 samples.
:param code_length: the dimensionality of latent vectors.
"""
super(Encoder, self).__init__()
self.input_shape = input_shape
self.code_length = code_length
c, h, w = input_shape
print (c,h,w)
activation_fn = nn.LeakyReLU()
# Convolutional network
self.conv = nn.Sequential(
nn.Conv2d(in_channels=c, out_channels=32, kernel_size=3, bias=False),
activation_fn,
ResidualBlock(channel_in=32, channel_out=32, activation_fn=activation_fn),
DownsampleBlock(channel_in=32, channel_out=64, activation_fn=activation_fn),
DownsampleBlock(channel_in=64, channel_out=128, activation_fn=activation_fn),
DownsampleBlock(channel_in=128, channel_out=256, activation_fn=activation_fn),
)
self.deepest_shape = (256, h // 8, w // 8)
# FC network
self.fc = nn.Sequential(
nn.Linear(in_features=reduce(mul, self.deepest_shape), out_features=256),
nn.BatchNorm1d(num_features=256),
activation_fn,
nn.Linear(in_features=256, out_features=code_length),
nn.Sigmoid()
)
def forward(self, x):
# types: (torch.Tensor) -> torch.Tensor
"""
Forward propagation.
:param x: the input batch of images.
:return: the batch of latent vectors.
"""
h = x
print (type(h))
h = self.conv(h)
h = h.view(len(h), -1)
o = self.fc(h)
return o
class Decoder(BaseModule): """ CIFAR10 model decoder. """ def init(self, code_length, deepest_shape, output_shape): # type: (int, Tuple[int, int, int], Tuple[int, int, int]) -> None """ Class constructor.
:param code_length: the dimensionality of latent vectors.
:param deepest_shape: the dimensionality of the encoder's deepest convolutional map.
:param output_shape: the shape of CIFAR10 samples.
"""
super(Decoder, self).__init__()
self.code_length = code_length
self.deepest_shape = deepest_shape
self.output_shape = output_shape
print (self.output_shape,"--")
activation_fn = nn.LeakyReLU()
# FC network
self.fc = nn.Sequential(
nn.Linear(in_features=code_length, out_features=256),
nn.BatchNorm1d(num_features=256),
activation_fn,
nn.Linear(in_features=256, out_features=reduce(mul, deepest_shape)),
nn.BatchNorm1d(num_features=reduce(mul, deepest_shape)),
activation_fn
)
# Convolutional network
self.conv = nn.Sequential(
UpsampleBlock(channel_in=256, channel_out=128, activation_fn=activation_fn),
UpsampleBlock(channel_in=128, channel_out=64, activation_fn=activation_fn),
UpsampleBlock(channel_in=64, channel_out=32, activation_fn=activation_fn),
ResidualBlock(channel_in=32, channel_out=32, activation_fn=activation_fn),
nn.Conv2d(in_channels=32, out_channels=3, kernel_size=1, bias=False)
)
def forward(self, x):
# types: (torch.Tensor) -> torch.Tensor
"""
Forward propagation.
:param x: the batch of latent vectors.
:return: the batch of reconstructions.
"""
h = x
h = self.fc(h)
h = h.view(len(h), *self.deepest_shape)
h = self.conv(h)
o = h
return o
class LSACIFAR10(BaseModule): """ LSA model for CIFAR10 one-class classification. """ def init(self, input_shape, code_length, cpd_channels): # type: (Tuple[int, int, int], int, int) -> None """ Class constructor.
:param input_shape: the shape of CIFAR10 samples.
:param code_length: the dimensionality of latent vectors.
:param cpd_channels: number of bins in which the multinomial works.
"""
super(LSACIFAR10, self).__init__()
self.input_shape = input_shape
self.code_length = code_length
# Build encoder
self.encoder = Encoder(
input_shape=input_shape,
code_length=code_length
)
# Build decoder
self.decoder = Decoder(
code_length=code_length,
deepest_shape=self.encoder.deepest_shape,
output_shape=input_shape
)
# Build estimator
self.estimator = Estimator1D(
code_length=code_length,
fm_list=[32, 32, 32, 32],
cpd_channels=cpd_channels
)
def forward(self, x):
# type: (torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
"""
Forward propagation.
:param x: the input batch of images.
:return: a tuple of torch.Tensors holding reconstructions, latent vectors and CPD estimates.
"""
h = x
# Produce representations
z = self.encoder(h)
# Estimate CPDs with autoregression
z_dist = self.estimator(z)
# Reconstruct x
x_r = self.decoder(z)
# print (x_r.shape)
x_r = x_r.view(-1, *self.input_shape)
return x_r, z, z_dist
def load_dataset(data_path="/home/jbmai/Downloads/Defect Images-20190705T133320Z-001"): # data_path = 'data/train/'
torchvision.transforms.Grayscale(num_output_channels=1)
trainTransform = torchvision.transforms.Compose([
torchvision.transforms.Resize(size=(128,128), interpolation=2),
torchvision.transforms.ToTensor(),
])
train_dataset = torchvision.datasets.ImageFolder(
root=data_path,
transform=trainTransform)
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=64,
num_workers=0,
shuffle=True
)
return train_loader
net = LSACIFAR10(input_shape=[3,128,128],code_length = 32,cpd_channels =100) lossFunction = LSALoss(cpd_channels=100) optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
try:
checkpoint = torch.load("savedWeights/enc.pth")
net.encoder.load_state_dict(checkpoint)
checkpoint = torch.load("savedWeights/est.pth")
net.estimator.load_state_dict(checkpoint)
checkpoint = torch.load("savedWeights/dec.pth")
net.decoder.load_state_dict(checkpoint)
except Exception as e: print (e)
for epoch in range(1000): # loop over the dataset multiple times
running_loss = 0.0
d = load_dataset()
for i, (data,l) in enumerate(d):
# get the inputs; data is a list of [inputs, labels]
# print (data.shape)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
# print (data)
x_r,z,z_dist = net.forward(data)
# print (x_r.shape)
# print(data.shape)
loss = lossFunction(data,x_r,z,z_dist)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 5 == 0: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 5))
running_loss = 0.0
if (epoch % 5)== 0 :
# print ("--------------------{} epoch-----------".format(epoch))
# net.encoder.eval()
# net.estimator.eval()
# net.decoder.eval()
# z = net.encoder(data)
# z_dist = net.estimator(z)
# x_r = net.decoder(z).permute(0,2,3,1).detach().numpy()
# out =x_r
# print (type(out))
# for i in range(out.shape[0]):
# # print (out.shape)
# # # out.permute(0,2,3,1)
# # print (out.shape)
# cv2.imwrite("constructedImages/outDec{}_{}.jpg".format(epoch,i),out[i,:,:,:]*255)
# # cv2.waitKey(0)
# # cv2.
# net.encoder.train()
# net.estimator.train()
# net.decoder.eval()
torch.save(net.encoder.state_dict(),("savedWeights/enc.pth"))
torch.save(net.estimator.state_dict(),("savedWeights/est.pth"))
torch.save(net.decoder.state_dict(),("savedWeights/dec.pth"))
print('Finished Training')`
Output :
<class 'torch.Tensor'> [1, 1] loss: 727109273.600 <class 'torch.Tensor'> [2, 1] loss: 2495627954514337382531072.000
Hi can you help me rectify the issue.
Managed to correct above by changing hyperparameters,but after few steps i get
[1, 71] loss: 3293.943 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 76] loss: 3096.135 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 81] loss: 3161.412 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 86] loss: 3176.714 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 91] loss: 2969.099 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 96] loss: 3247.497 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 101] loss: 3068.246 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 106] loss: 2769.233 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 111] loss: 2766.316 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 116] loss: 2718.537 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 121] loss: 3055.004 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 126] loss: 2576.473 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 131] loss: 2947.091 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 136] loss: 2869.674 <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> [1, 141] loss: 2717.064 <class 'torch.Tensor'> Traceback (most recent call last): File "modelTrainer.py", line 261, in <module> x_r,z,z_dist = net.forward(data) File "modelTrainer.py", line 188, in forward z = self.encoder(h) File "/home/jbmai/DefectsDetection/NoveltyDetection/models/base.py", line 33, in __call__ return super(BaseModule, self).__call__(*args, **kwargs) File "/home/jbmai/anaconda3/envs/torchenv/lib/python3.7/site-packages/torch/nn/modules/module.py", line 493, in __call__ result = self.forward(*input, **kwargs) File "modelTrainer.py", line 73, in forward o = self.fc(h) File "/home/jbmai/anaconda3/envs/torchenv/lib/python3.7/site-packages/torch/nn/modules/module.py", line 493, in __call__ result = self.forward(*input, **kwargs) File "/home/jbmai/anaconda3/envs/torchenv/lib/python3.7/site-packages/torch/nn/modules/container.py", line 92, in forward input = module(input) File "/home/jbmai/anaconda3/envs/torchenv/lib/python3.7/site-packages/torch/nn/modules/module.py", line 493, in __call__ result = self.forward(*input, **kwargs) File "/home/jbmai/anaconda3/envs/torchenv/lib/python3.7/site-packages/torch/nn/modules/batchnorm.py", line 83, in forward exponential_average_factor, self.eps) File "/home/jbmai/anaconda3/envs/torchenv/lib/python3.7/site-packages/torch/nn/functional.py", line 1693, in batch_norm raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size)) ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 256])
ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 256])
Hi @learnermaxRL
I would advise using Adam for optimization.
As for the latter error you reported, it is likely to be due to a singleton batch (i.e., a batch with only one sample). It may be due to the DataLoader, try setting the drop_last flag to True.
D
Thanks,corrected it,however i can see that reconstructed images have negative values in the tensor,is it desirable?
sample slice of x_r *255 = [ 19.986425 33.786083 109.08704 ]]
[[ 49.809772 -32.651962 -1.5576267] [ 53.66301 -72.02914 48.711018 ] [ 39.252117 -81.27754 75.4854 ] ... [ 21.410696 -71.10042 68.18309 ] [ -8.615957 -179.66095 8.810505 ] [ 44.986786 29.80011 93.024506 ]]
[[ -68.59759 64.74513 51.421898 ] [ -18.552599 29.491028 69.56346 ] [ -49.379646 45.368095 29.86158 ] ... [ -84.07668 41.966274 100.4433 ] [ 1.8273218 58.350666 60.632793 ]
If yes than how to get back RGB image from them?
Negative values are not undesirable per se, as long as it is a concious choice. What is the range of your input images?
D
input images are RGB images with standard 0-255 range being fed through dataloader pytorch `def load_dataset(data_path="/home/ji/Downloads/aug/"): # data_path = 'data/train/'
torchvision.transforms.Grayscale(num_output_channels=1)
trainTransform = torchvision.transforms.Compose([
torchvision.transforms.Resize(size=(128,128), interpolation=2),
torchvision.transforms.ToTensor(),
])
train_dataset = torchvision.datasets.ImageFolder(
root=data_path,
transform=trainTransform)
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=16,
num_workers=0,
shuffle=True,
drop_last=True
# pin_memory=True
)
return train_loader
`
I would advise standardizing input images. E.g., try making each channel zero-mean and unit std.
D
that makes sense,however standardization wouldnt guarantee negative output right,?
Also i checked my input data is between 0 and 1 already,
Pytorch default backend 1 for images are Pillow, and when you use ToTensor() 2 class, PyTorch automatically converts all images into [0,1].
So shouldn't the reconstruction output be standardized before calculating reconstruction loss or perhaps use another activation function?
I would advise using a linear activation function for the reconstruction and providing the groundtruth image to the same loss in the same range as the input.
Are you talking about the x_r from decoder,should i use sigmoid on top of last layer in decoder output,, i am trying to train the model to learn the reconstruction of the image itself,so GT will be the same image
x_r,z,z_dist = net.forward(data) # print (x_r.shape) # print(data.shape) loss = lossFunction.forward(data,x_r,z,z_dist) loss.backward() optimizer.step()
Is there anything else I need to do?
net = LSACIFAR10(input_shape=[3,128,128],code_length = 64,cpd_channels =100) lossFunction = LSALoss(cpd_channels=100)
The sigmoid is not mandatory. I would advise not to use it.
The rest of the code seems fine. Would an [0-1] input deliver those reconstructions?
D
Sorry didnt understand that part,my input is already [0,1],its the reconstruction which has negative values
Does the reconstruction loss go down?
How have you done the reconstruction of image,as in what was your input range in cifar images for instance and what was the activation function of decoder and range of reconstructed output?
It has high values ~2500,since i am using sigmoid the learning is pretty slow,i suspect the gradients are very less due to higher output before sigmoid.After 100 epochs on dataset of size 1400 with batch size 64 the loss only decrease by approx 50 units.
Can you guide me as to what have you done
- range of CIFAR-10 images: [0-1]
- decoder activation function: None
- range of the reconstruction: approximately [0-1]
I see ,but how come the values are negative in my case,i mean weights arent negative,my input [0,1] no non linearity applied on top of your code,what is going wrong here?
I saw you are using 128x128 images. The number of downsampling in the model is tuned on 32x32 images. You might then have a huge linear layer before the bottleneck of the autoencoder, with many parameters slowing down learning.
As for the negative values, it is really weird. I would try optimizing the reconstruction loss only (plain autoencoder) and see if the problem fades.
Thanks,I ll let you know the progress,let me lighten up the model a bit.. :)
Keep in mind that, if I am right, the best way to lighten up the model would be to add downsample and upsample blocks.
Yeah sure will do,but how do I deal with reconstruction with negative values,using sigmoid is the last option I would prefer.Any help?
Did you encounter negative values while reconstruction ,if yes how did you deal with it,(apart from using activation),is standardizing the x_r right at inference time/validation time?