yolov7 icon indicating copy to clipboard operation
yolov7 copied to clipboard

U7: feature map visualisation

Open hariouat opened this issue 2 years ago • 3 comments

i add a feature map visualisation in yolov7 like this..

  1. in utils/plots.py add fuction feature_visualisation:

def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')): """ x: Features to be visualized module_type: Module type stage: Module stage within model n: Maximum number of feature maps to plot save_dir: Directory to save results """ if 'Detect' not in module_type: batch, channels, height, width = x.shape # batch, channels, height, width if height > 1 and width > 1: #f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}features.png" # filename f = f"stage{stage}{module_type.split('.')[-1]}_features.png"

        blocks = torch.chunk(x[0].cpu(), channels, dim=0)  # select batch index 0, block by channels
        n = min(n, channels)  # number of plots
        fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
        ax = ax.ravel()
        plt.subplots_adjust(wspace=0.05, hspace=0.05)
        for i in range(n):
            ax[i].imshow(blocks[i].squeeze())  # cmap='gray'
            ax[i].axis('off')

        print(f'Saving {f}... ({n}/{channels})')
        plt.savefig(f, dpi=300, bbox_inches='tight')
        plt.close()
        #np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy())  # npy save
  1. in models/experimental.py change the class Ensemble like this: class Ensemble(nn.ModuleList):

    Ensemble of models

    def init(self): super(Ensemble, self).init()

    def forward(self, x, augment=False,visualize=False): y = [] for module in self: y.append(module(x, augment,visualize)[0]) # y = torch.stack(y).max(0)[0] # max ensemble # y = torch.stack(y).mean(0) # mean ensemble y = torch.cat(y, 1) # nms ensemble return y, None # inference, train output

  2. in models/common.py change the class autoshape like this:

class autoShape(nn.Module): # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS conf = 0.25 # NMS confidence threshold iou = 0.45 # NMS IoU threshold classes = None # (optional list) filter by class

def __init__(self, model):
    super(autoShape, self).__init__()
    self.model = model.eval()

def autoshape(self):
    print('autoShape already enabled, skipping... ')  # model already converted to model.autoshape()
    return self

@torch.no_grad()
def forward(self, imgs, size=640, augment=False, profile=False,visualize=False):
    # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
    #   filename:   imgs = 'data/samples/zidane.jpg'
    #   URI:             = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
    #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
    #   PIL:             = Image.open('image.jpg')  # HWC x(640,1280,3)
    #   numpy:           = np.zeros((640,1280,3))  # HWC
    #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
    #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images

    t = [time_synchronized()]
    p = next(self.model.parameters())  # for device and type
    if isinstance(imgs, torch.Tensor):  # torch
        with amp.autocast(enabled=p.device.type != 'cpu'):
            return self.model(imgs.to(p.device).type_as(p), augment, profile,visualize)  # inference

    # Pre-process
    n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])  # number of images, list of images
    shape0, shape1, files = [], [], []  # image and inference shapes, filenames
    for i, im in enumerate(imgs):
        f = f'image{i}'  # filename
        if isinstance(im, str):  # filename or uri
            im, f = np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im)), im
        elif isinstance(im, Image.Image):  # PIL Image
            im, f = np.asarray(im), getattr(im, 'filename', f) or f
        files.append(Path(f).with_suffix('.jpg').name)
        if im.shape[0] < 5:  # image in CHW
            im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
        im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3)  # enforce 3ch input
        s = im.shape[:2]  # HWC
        shape0.append(s)  # image shape
        g = (size / max(s))  # gain
        shape1.append([y * g for y in s])
        imgs[i] = im  # update
    shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
    x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
    x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
    x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
    x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
    t.append(time_synchronized())

    with amp.autocast(enabled=p.device.type != 'cpu'):
        # Inference
        y = self.model(x, augment, profile,visualize)[0]  # forward
        t.append(time_synchronized())

        # Post-process
        y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)  # NMS
        for i in range(n):
            scale_coords(shape1, y[i][:, :4], shape0[i])

        t.append(time_synchronized())
        return Detections(imgs, y, files, t, self.names, x.shape)

4.in models/yolo.py : 4.1. from utils.plots import feature_visualization 4.2: in class model change forward and forward_once:

def forward(self, x, augment=False, profile=False,visualize=False):
    if augment:
        img_size = x.shape[-2:]  # height, width
        s = [1, 0.83, 0.67]  # scales
        f = [None, 3, None]  # flips (2-ud, 3-lr)
        y = []  # outputs
        for si, fi in zip(s, f):
            xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
            yi = self.forward_once(xi)[0]  # forward
            # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
            yi[..., :4] /= si  # de-scale
            if fi == 2:
                yi[..., 1] = img_size[0] - yi[..., 1]  # de-flip ud
            elif fi == 3:
                yi[..., 0] = img_size[1] - yi[..., 0]  # de-flip lr
            y.append(yi)
        return torch.cat(y, 1), None  # augmented inference, train
    else:
        return self.forward_once(x, profile,visualize)  # single-scale inference, train

def forward_once(self, x, profile=False,visualize=False):
    y, dt = [], []  # outputs
    for m in self.model:
        if m.f != -1:  # if not from previous layer
            x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers

        if not hasattr(self, 'traced'):
            self.traced=False

        if self.traced:
            if isinstance(m, Detect) or isinstance(m, IDetect) or isinstance(m, IAuxDetect) or isinstance(m, IKeypoint):
                break

        if profile:
            c = isinstance(m, (Detect, IDetect, IAuxDetect, IBin))
            o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
            for _ in range(10):
                m(x.copy() if c else x)
            t = time_synchronized()
            for _ in range(10):
                m(x.copy() if c else x)
            dt.append((time_synchronized() - t) * 100)
            print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))

        x = m(x)  # run
        
        y.append(x if m.i in self.save else None)  # save output
        
        if visualize and m.type == 'models.common.Multiply':
            feature_visualization(x, m.type, m.i, save_dir=visualize)

    if profile:
        print('%.1fms total' % sum(dt))
    return x
  1. in detect.py 5.1 def detect(save_img=False): source, weights, view_img, save_txt, imgsz, trace,visualize = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, not opt.no_trace,not opt.visualize

5.2 # Warmup if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]): old_img_b = img.shape[0] old_img_h = img.shape[2] old_img_w = img.shape[3] for i in range(3): model(img, augment=opt.augment,visualize=opt.visualize)[0]

    # Inference
    t1 = time_synchronized()
    with torch.no_grad():   # Calculating gradients would cause a GPU memory leak
        visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
        pred = model(img, augment=opt.augment,visualize=opt.visualize)[0]
    t2 = time_synchronized()

5.3

parser.add_argument('--visualize', action='store_true', help='visualize features')

hariouat avatar Feb 17 '23 08:02 hariouat

Hello, can this method visualize the feature maps of each layer in YOLOV7?

yangyahu-1994 avatar Jan 17 '24 02:01 yangyahu-1994

Hello, can this method visualize the feature maps of each layer in YOLOV7?

Yes, You have to specify the name of the layer you want to visualise

hariouat avatar Jan 17 '24 07:01 hariouat

Hello, can this method visualize the feature maps of each layer in YOLOV7?

Yes, You have to specify the name of the layer you want to visualise

Thanks. I designed the detection head as a decoupled detection head. Can this method visualize the feature maps of classification and regression branches?

yangyahu-1994 avatar Jan 17 '24 08:01 yangyahu-1994