LoRA
LoRA copied to clipboard
Fix Conv1d (Convnd) implementation
The current Conv1d (and Conv3d) is not working due to the incompatible shape of (lora_A @ lora_B). I changed only the lora_B's initialization. The shape of lora_B now depends on the dimensions of conv.weight, so it works for 1d to n-d case.
Before
self.lora_B = nn.Parameter(
self.conv.weight.new_zeros((out_channels//self.conv.groups*kernel_size, r*kernel_size))
)
After
self.lora_B = nn.Parameter(
self.conv.weight.new_zeros((out_channels//self.conv.groups*kernel_size**(self.conv.weight.dim()-3), r*kernel_size))
)
Fixes #115
Hi Nan, @sentient-codebot great implementation! Wondering whether you have attempted nn.convtranspose1d, nn.convtranspose2d, nn.convtranspose3d. Been bugging me quite a while. Would you mind sharing your implementation in case you did? Thanks!
Hi, I am trying to make lora applicable to ConvTranspose3d with this code:
`class ConvTransposeLoRA(nn.Module, LoRALayer): def init(self, conv_module, in_channels, out_channels, kernel_size, r=0, lora_alpha=1, lora_dropout=0., merge_weights=True, **kwargs): super(ConvTransposeLoRA, self).init() self.conv = conv_module(in_channels, out_channels, kernel_size, **kwargs) LoRALayer.init(self, r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, merge_weights=merge_weights) assert isinstance(kernel_size, int) # Actual trainable parameters if r > 0: self.lora_A = nn.Parameter( self.conv.weight.new_zeros((r * kernel_size, in_channels * kernel_size)) ) self.lora_B = nn.Parameter( self.conv.weight.new_zeros((out_channels//self.conv.groups*kernel_size, r * kernel_size)) ) self.scaling = self.lora_alpha / self.r # Freezing the pre-trained weight matrix self.conv.weight.requires_grad = False self.reset_parameters() self.merged = False
def reset_parameters(self):
self.conv.reset_parameters()
if hasattr(self, 'lora_A'):
# initialize A the same way as the default for nn.Linear and B to zero
nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
nn.init.zeros_(self.lora_B)
def train(self, mode=True):
super(ConvTransposeLoRA, self).train(mode)
if mode:
if self.merge_weights and self.merged:
if self.r > 0:
# Make sure that the weights are not merged
self.conv.weight.data -= (self.lora_B @ self.lora_A).view(self.conv.weight.shape) * self.scaling
self.merged = False
else:
if self.merge_weights and not self.merged:
if self.r > 0:
# Merge the weights and mark it
self.conv.weight.data += (self.lora_B @ self.lora_A).view(self.conv.weight.shape) * self.scaling
self.merged = True
def forward(self, x, output_size = None):
if self.r > 0 and not self.merged:
print(x.shape)
num_spatial_dims = 3
output_size = (33, 33, 33)
output_padding = nn.ConvTranspose3d._output_padding(x,
output_size,
self.conv.stride,
self.conv.padding,
self.conv.kernel_size, # type: ignore[arg-type]
num_spatial_dims,
self.conv.dilation) # type: ignore[arg-type]
return F.conv_transpose3d(x,
self.conv.weight + (self.lora_B @ self.lora_A).view(self.conv.weight.shape) * self.scaling,
self.conv.bias,
self.conv.stride,
self.conv.padding,
output_padding,
self.conv.groups,
self.conv.dilation)
return self.conv(x, output_size)`
However, a problem occurs as output_size is None, and I cannot define output_size as it fluctuates. Would you know how to solve this?