attention-cnn icon indicating copy to clipboard operation
attention-cnn copied to clipboard

Why not directly cut the picture into quarters, but need the following operation?

Open Pandaxia8 opened this issue 3 years ago • 3 comments

def downsample_concatenate(X, kernel):
    b, h, w, c = X.shape
    Y = X.contiguous().view(b, h, w // kernel, c * kernel)
    Y = Y.permute(0, 2, 1, 3).contiguous()
    Y = Y.view(b, w // kernel, h // kernel, kernel * kernel * c).contiguous()
    Y = Y.permute(0, 2, 1, 3).contiguous()
    return Y

I don't understand why not directly cut the pictures into quarters, could you tell me what will happen if I just replace the above codes with the following? Thank you! :)

def downsample_concatenate(X, kernel):
    b, h, w, c = X.shape
    Y = X.contiguous().view(b, h//kernel, w // kernel, c * kernel*kernel)
    return Y

Pandaxia8 avatar Dec 02 '21 14:12 Pandaxia8

Hello,

You can check the explanation here #5 maybe?

Here is the difference:

import torch

def downsample_concatenate(X, kernel):
    b, h, w, c = X.shape
    Y = X.contiguous().view(b, h, w // kernel, c * kernel)
    Y = Y.permute(0, 2, 1, 3).contiguous()
    Y = Y.view(b, w // kernel, h // kernel, kernel * kernel * c).contiguous()
    Y = Y.permute(0, 2, 1, 3).contiguous()
    return Y

def downsample_concatenate2(X, kernel):
    b, h, w, c = X.shape
    Y = X.contiguous().view(b, h//kernel, w // kernel, c * kernel*kernel)
    return Y

x = torch.arange(16).view(1, 4, 4, 1)
downsample_concatenate(x, 2)
# tensor([[[[ 0,  1,  4,  5],
#           [ 2,  3,  6,  7]],
# 
#          [[ 8,  9, 12, 13],
#           [10, 11, 14, 15]]]])

downsample_concatenate2(x, 2)
# tensor([[[[ 0,  1,  2,  3],
#           [ 4,  5,  6,  7]],
# 
#          [[ 8,  9, 10, 11],
#           [12, 13, 14, 15]]]])

jbcdnr avatar Dec 02 '21 14:12 jbcdnr

Another beautiful solution with einops:

einops.rearrange(x, "batch (h p_h) (w p_w) c -> batch h w (p_h p_w c)", p_h=kernel, p_w=kernel)

jbcdnr avatar Dec 02 '21 14:12 jbcdnr

Hello,

You can check the explanation here #5 maybe?

Here is the difference:

import torch

def downsample_concatenate(X, kernel):
    b, h, w, c = X.shape
    Y = X.contiguous().view(b, h, w // kernel, c * kernel)
    Y = Y.permute(0, 2, 1, 3).contiguous()
    Y = Y.view(b, w // kernel, h // kernel, kernel * kernel * c).contiguous()
    Y = Y.permute(0, 2, 1, 3).contiguous()
    return Y

def downsample_concatenate2(X, kernel):
    b, h, w, c = X.shape
    Y = X.contiguous().view(b, h//kernel, w // kernel, c * kernel*kernel)
    return Y

x = torch.arange(16).view(1, 4, 4, 1)
downsample_concatenate(x, 2)
# tensor([[[[ 0,  1,  4,  5],
#           [ 2,  3,  6,  7]],
# 
#          [[ 8,  9, 12, 13],
#           [10, 11, 14, 15]]]])

downsample_concatenate2(x, 2)
# tensor([[[[ 0,  1,  2,  3],
#           [ 4,  5,  6,  7]],
# 
#          [[ 8,  9, 10, 11],
#           [12, 13, 14, 15]]]])

I see. Thank you very much for your answer! : )

Pandaxia8 avatar Dec 02 '21 15:12 Pandaxia8