SOP Question about how to use optimizer

Question about how to use optimizer_loss

Open nikokks opened this issue 1 year ago • 1 comments

Hi !

Firstly thanks a lot for your work :)

I am trying to use SOP on another work but I throws an error

model1 = model1(num_classes=10)
model2 = model2(num_classes=10)
model1 = model1.cuda()
model2 = model2.cuda()

optimizer = optim.SGD([{'params': model1.parameters()},
                           {'params': model2.parameters()}] , lr=1e-3)

SOP1_loss = overparametrization_loss(num_examp=50000, num_classes=10 ratio_consistency=0.9, ratio_balance=0.1)
SOP2_loss = overparametrization_loss(num_examp=50000, num_classes=10, ratio_consistency=0.9, ratio_balance=0.1)

print(SOP1.u)

optimizer_loss = optim.SGD([{'params': SOP1_loss.u, 'lr': 1, 'weight_decay':0},
                        {'params': SOP1_loss.v, 'lr': 10, 'weight_decay':0},
                        {'params': SOP2_loss.u, 'lr': 1, 'weight_decay':0},
                        {'params': SOP2_loss.v, 'lr': 10, 'weight_decay':0}])

and when running it it throws this below

tensor([[ 1.3391e-08],
        [ 2.0517e-09],
        [-1.6879e-08],
        ...,
        [ 2.4108e-09],
        [ 1.3343e-09],
        [-1.0457e-08]], device='cuda:0', grad_fn=<ToCopyBackward0>)
Traceback (most recent call last):
    optimizer_loss = optim.SGD([{'params': SOP1_loss.u, 'lr': 1, 'weight_decay':0},
  File "/.local/lib/python3.8/site-packages/torch/optim/sgd.py", line 27, in __init__
    super().__init__(params, defaults)
  File "/.local/lib/python3.8/site-packages/torch/optim/optimizer.py", line 192, in __init__
    self.add_param_group(param_group)
  File ".local/lib/python3.8/site-packages/torch/optim/optimizer.py", line 515, in add_param_group
    raise ValueError("can't optimize a non-leaf Tensor")
ValueError: can't optimize a non-leaf Tensor

Does someone have an idea of what is going on ? I think I use it incorrectly

Oct 17 '23 09:10 nikokks

Hello, I have tried multiple modifications on the code but couldn't succeed to do what I want. This is ths code of optimizer_overparametrization I have

class overparametrization_loss(nn.Module):
    def __init__(self, num_examp, num_classes=10, ratio_consistency = 0, ratio_balance = 0):
        super(overparametrization_loss, self).__init__()
        self.num_classes = num_classes
        self.config = ConfigParser.get_instance()
        self.USE_CUDA = torch.cuda.is_available()
        self.num_examp = num_examp

        self.ratio_consistency = ratio_consistency
        self.ratio_balance = ratio_balance

        self.u = nn.Parameter(torch.empty(num_examp, 1, dtype=torch.float32))
        self.v = nn.Parameter(torch.empty(num_examp, num_classes, dtype=torch.float32))

        self.init_param(mean=self.config['reparam_arch']['args']['mean'], std=self.config['reparam_arch']['args']['std'])

    def init_param(self, mean=0., std=1e-8):
        torch.nn.init.normal_(self.u, mean=mean, std=std)
        torch.nn.init.normal_(self.v, mean=mean, std=std)


    def forward(self, index, outputs, label):
        # label = torch.zeros(len(label), self.config['num_classes']).cuda().scatter_(1, label.view(-1,1), 1)

        if len(outputs) > len(index):
            output, output2 = torch.chunk(outputs, 2)

            ensembled_output = 0.5 * (output + output2).detach()

        else:
            output = outputs

            ensembled_output = output.detach()

        eps = 1e-4

        U_square = self.u[index]**2 * label 
        V_square = self.v[index]**2 * (1 - label) 

        U_square = torch.clamp(U_square, 0, 1)
        V_square = torch.clamp(V_square, 0, 1)

        E =  U_square - V_square


        self.E = E

        original_prediction = F.softmax(output, dim=1)

        prediction = torch.clamp(original_prediction + U_square - V_square.detach(), min = eps)

        prediction = F.normalize(prediction, p = 1, eps = eps)

        prediction = torch.clamp(prediction, min = eps, max = 1.0)

        label_one_hot = self.soft_to_hard(output.detach())


        MSE_loss = F.mse_loss((label_one_hot + U_square - V_square), label,  reduction='sum') / len(label)

        
        loss = torch.mean(-torch.sum((label) * torch.log(prediction), dim = -1))



        loss += MSE_loss


        if self.ratio_balance > 0:
            avg_prediction = torch.mean(prediction, dim=0)
            prior_distr = 1.0/self.num_classes * torch.ones_like(avg_prediction)

            avg_prediction = torch.clamp(avg_prediction, min = eps, max = 1.0)

            balance_kl =  torch.mean(-(prior_distr * torch.log(avg_prediction)).sum(dim=0))

            loss += self.ratio_balance * balance_kl

        if (len(outputs) > len(index)) and (self.ratio_consistency > 0):

            consistency_loss = self.consistency_loss(index, output, output2)

            loss += self.ratio_consistency * torch.mean(consistency_loss)



        return  loss

    def consistency_loss(self, index, output1, output2):            
        preds1 = F.softmax(output1, dim=1).detach()
        preds2 = F.log_softmax(output2, dim=1)
        loss_kldiv = F.kl_div(preds2, preds1, reduction='none')
        loss_kldiv = torch.sum(loss_kldiv, dim=1)
        return loss_kldiv

    def soft_to_hard(self, x):
        with torch.no_grad():
            return (torch.zeros(len(x), self.config['num_classes'])).cuda().scatter_(1, (x.argmax(dim=1)).view(-1,1), 1)

Can Someone help me debug this stuff plz ? :)

Oct 23 '23 19:10 nikokks

SOP SOP copied to clipboard

Question about how to use optimizer_loss

SOP
SOP copied to clipboard