dinov2 icon indicating copy to clipboard operation
dinov2 copied to clipboard

how to use dinov2 for image retrieval [image similarity search] in large image dataset as shown in demo https://dinov2.metademolab.com/demos?category=retrieval?

Open aaiguy opened this issue 1 year ago • 1 comments

hello, How to train nearest neighbors model on extracted embeddings of images from different classes of folders using dinov2 model and retrieve nearest similar image for query image as shown in this website demo ??? I tried below approach using sklearn nearest neighbors

import torch
from sklearn.neighbors import NearestNeighbors 
import pickle
from PIL import Image
import torchvision.transforms as T
import os 
# import hubconf
import tqdm
from tqdm import tqdm_notebook
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
print('device:',device)
# dinov2_vits14 = hubconf.dinov2_vits14()
dinov2_vits14 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitg14')
dinov2_vits14.to(device)
def extract_features(filename):
    img = Image.open(filename)

    transform = T.Compose([
    T.Resize(224),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize(mean=[0.5], std=[0.5]),
    ])

    img = transform(img)[:3].unsqueeze(0)

    with torch.no_grad():
        features = dinov2_vits14(img.to('cuda'))[0]

    # print(features.shape)
    return features.numpy()

extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']

def get_file_list(root_dir):
    file_list = []
    for root, directories, filenames in os.walk(root_dir):
        for filename in filenames:
            if any(ext in filename for ext in extensions):
                filepath = os.path.join(root, filename)
                if os.path.exists(filepath):
                  file_list.append(filepath)
                else:
                  print(filepath)
    return file_list
def extract_features(filename):
    img = Image.open(filename)

    transform = T.Compose([
    T.Resize(224),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize(mean=[0.5], std=[0.5]),
    ])

    img = transform(img)[:3].unsqueeze(0)

    with torch.no_grad():
        features = dinov2_vits14(img.to('cuda'))[0]

    # print(features.shape)
    return features.cpu().numpy()

extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']

def get_file_list(root_dir):
    file_list = []
    for root, directories, filenames in os.walk(root_dir):
        for filename in filenames:
            if any(ext in filename for ext in extensions):
                filepath = os.path.join(root, filename)
                if os.path.exists(filepath):
                  file_list.append(filepath)
                else:
                  print(filepath)
    return file_list

# # path to the your datasets
root_dir = 'image_folder' 
filenames = sorted(get_file_list(root_dir))
print('Total files :', len(filenames))
feature_list = []
for i in tqdm.tqdm(range(len(filenames))):
    feature_list.append(extract_features(filenames[i]))
pickle.dump(feature_list,open('dino-all-feature-list.pickle','wb'))
pickle.dump(filenames,open('dino-all-filenames.pickle','wb'))
neighbors = NearestNeighbors(n_neighbors=5, algorithm='brute',metric='euclidean').fit(feature_list)
# Save the model to a file
with open('dino-all-neighbors2.pkl', 'wb') as f:
    pickle.dump(neighbors, f)

with above dinov2 based trained model i get around 70% accuracy on testing data for retrieving similar class images, is there a way to improve my approach in better manner to improvise the accuracy ??

Originally posted by @aaiguy in https://github.com/facebookresearch/dinov2/issues/2#issuecomment-1532493165

aaiguy avatar May 03 '23 10:05 aaiguy