deep_sort_pytorch
deep_sort_pytorch copied to clipboard
Refactor code in train.py
Hi @ZQPei , currently I'm working on a tracking-by-detection project very related to deepsort. I've tried a lot to make your repo works for me. Do you mind rearrange your code in file train.py? Here, I rewrite your train.py code, I push all the related block in functions like the following:
import os, time
import numpy as np
import argparse
import torch
import torchvision
import torch.backends.cudnn as cudnn
from model import Net
def get_parser():
parser = argparse.ArgumentParser(description='Train feature extractor for DeepSort')
parser.add_argument('--data-dir', default='./Mars', type=str, help='Path to data directory, e.g. ./Mars or ./Market1501')
parser.add_argument('--no-cuda', action='store_true')
parser.add_argument("--gpu-id", default=0, type=int)
parser.add_argument('--learning-rate', default=0.1, type=float, help='learning rate')
parser.add_argument('--interval', '-i', default=20, type=int)
parser.add_argument('--resume', '-r', action='store_true')
return parser
def setup_device(gpu_id, no_cuda):
device = "cuda:{}".format(gpu_id) if torch.cuda.is_available() and not no_cuda else "cpu"
if torch.cuda.is_available() and not no_cuda:
cudnn.benchmark = True
return device
def load_data(data_dir):
train_dir = os.path.join(data_dir, 'train')
test_dir = os.path.join(data_dir, 'test')
transform_train = torchvision.transforms.Compose([torchvision.transforms.RandomCrop((128, 64), padding=4), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
transform_test = torchvision.transforms.Compose([torchvision.transforms.RandomCrop((128, 64), padding=4), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
trainloader = torch.utils.data.DataLoader(torchvision.datasets.ImageFolder(train_dir, transform=transform_train), batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(torchvision.datasets.ImageFolder(test_dir, transform=transform_test), batch_size=64, shuffle=True)
num_classes = len(trainloader.dataset.classes)
return trainloader, testloader, num_classes
def define_net(num_classes, resume):
start_epoch = 0
best_acc = 0.
net = Net(num_classes=num_classes)
if resume:
assert os.path.isfile('./checkpoint/ckpt.t7'), 'Error: no checkpoint file found!'
print('Loading from ./checkpoint/ckpt.t7')
checkpoint = torch.load('./checkpoint/ckpt.t7')
net_dict = checkpoint['net_dict']
net.load_state_dict(net_dict)
best_acc = checkpoint['acc']
start_epoch = checkpoint['epoch']
return net, best_acc, start_epoch
def setup_loss_optimizer(net, learning_rate):
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), learning_rate, momentum=0.9, weight_decay=5e-4)
return criterion, optimizer
def train(interval, epoch, net, trainloader, device, criterion, optimizer):
print('\nEpoch: %d'%(epoch+1))
net.train()
training_loss = 0.
train_loss = 0.
correct = 0
total = 0
start = time.time()
for idx, (inputs, labels) in enumerate(trainloader):
#forward
inputs, labels = inputs.to(device), labels.to(device)
outputs = net(inputs)
loss = criterion(outputs, labels)
#backward
optimizer.zero_grad()
loss.backward
optimizer.step()
#accumurating
training_loss += loss.item()
train_loss += loss.item()
correct += outputs.max(dim=1)[1].eq(labels).sum().item()
total += labels.size(0)
#print
if (idx+1)%interval == 0:
end = time.time()
print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(100.*(idx+1)/len(trainloader), end-start, train_loss/len(trainloader), correct, total, 100.*correct/total))
training_loss = 0.
start = time.time()
return train_loss/len(trainloader), 1. - correct/total, net
def test(epoch, net, testloader, device, criterion, best_acc):
net.eval()
test_loss = 0.
correct = 0
total = 0
start = time.time()
with torch.no_grad():
for idx, (inputs, labels) in enumerate(testloader):
inputs, labels = inputs.to(device), labels.to(device)
outputs = net(inputs)
loss = criterion(outputs, labels)
test_loss += loss.item()
correct += outputs.max(dim=1)[1].eq(labels).sum().item()
total += labels.size(0)
print('Testing ...')
end = time.time()
print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(100.*(idx+1)/len(testloader), end-start, test_loss/len(testloader), correct, total, 100.*correct/total))
#save ckpt
acc = 100.*correct/total
if acc > best_acc:
best_acc = acc
print('Saving parameters to ./checkpoint/ckpt.t7')
checkpoint = {
'net_dict': net.state_dict(),
'acc': acc,
'epoch': epoch,
}
if not os.path.isdir('./checkpoint'):
os.mkdir('checkpoint')
torch.save(checkpoint, './checkpoint/ckpt.t7')
return test_loss/len(testloader), 1. - correct/total, best_acc, net
def lr_decay(learning_rate):
global optimizer
for params in optimizer.param_groups:
params['lr'] *= learning_rate
lr = params['lr']
print('Learning rate adjusted to {}'.format(lr))
return lr
def main():
args = get_parser().parse_args()
device = setup_device(args.gpu_id, args.no_cuda)
trainloader, testloader, num_classes = load_data(args.data_dir)
net, best_acc, start_epoch = define_net(num_classes, args.resume)
net.to(device)
criterion, optimizer = setup_loss_optimizer(net, args.learning_rate)
for epoch in range(start_epoch, start_epoch+40):
train_loss, train_err, net = train(args.interval, epoch, net, trainloader, device, criterion, optimizer)
test_loss, test_err, best_acc, net = test(epoch, net, testloader, device, criterion, best_acc)
if (epoch+1)%20 == 0:
lr_decay(args.learning_rate)
return
if __name__ == '__main__':
main()
And I think we should puts the training log to a log-directory in order to show the training processing by tensorboard? Any suggestion? Thanks for reading!
Hi, @pvtien96 You can open a pull request and I will merge it to our code. Thank you for your contribution to this repo!
I have three questions:
- why training the model without use yolo3 to detect? 2 . I try to using MARS-v160809 dataset to tain a model, it's ok in traing, but get a very low accuracy in test.what's the problem?
- And how to train the model with using MARS-v160809 dataset?
@394781865 I'm not sure whether I truly understood your questions.
- You can combine deepsort with any detection model, like yolo and its variations (yolov2, yolov3, yolov4...), faster rcnn, ssd...
- I didn't test the model successfully.
- I used to ask like you. You should read all the issues in this repo and find out yourself. Hope this helps.
I have three questions:
- why training the model without use yolo3 to detect? 2 . I try to using MARS-v160809 dataset to tain a model, it's ok in traing, but get a very low accuracy in test.what's the problem?
- And how to train the model with using MARS-v160809 dataset?
Hi , i encountered the same problem, i am training the MARS datasets, and get very low accuracy as well as the training speed is very very very slow, i trained the datasets for 3 days ,just run 9 epoches . Did you figure out the problem ?
Hi @pvtien96
I'm curious that you refactored the code above but did not mention/change that the current torchvision.datasets.ImageFolder(train_dir, transform=transform_train)
simply does not work with the Market1501 dataset structure.
I created this custom Dataset
loader:
import natsort
import os
from PIL import Image
from torch.utils.data import Dataset
class Market1501DataSet(Dataset):
def __init__(self, main_dir, transform):
self.main_dir = main_dir
self.transform = transform
all_imgs = os.listdir(main_dir)
self.total_imgs = natsort.natsorted(all_imgs)
# __getitem__ must return an index of list "classes"
image_names = [x[0:4] for x in self.total_imgs]
self.classes_dict = dict.fromkeys(image_names)
for i, key in enumerate(self.classes_dict):
self.classes_dict[key] = i
self.classes = list(self.classes_dict)
def __len__(self):
return len(self.total_imgs)
## Returns: Tuple (image, target) where target is the index of the target category.
def __getitem__(self, idx):
img_loc = os.path.join(self.main_dir, self.total_imgs[idx])
image = Image.open(img_loc).convert("RGB")
tensor_image = self.transform(image)
label = self.total_imgs[idx][0:4]
index_of_label = self.classes_dict[label]
return (tensor_image, index_of_label)
and then in train.py
# train_dir = os.path.join(root,"bounding_box_train") # EDITed this change out after rearranging the Market1501 dataset structure
# test_dir = os.path.join(root,"bounding_box_test") # EDITed this change out after rearranging the Market1501 dataset structure
train_dir = os.path.join(root,"train")
test_dir = os.path.join(root,"test")
market_train = Market1501DataSet(train_dir, transform=transform_train)
market_test = Market1501DataSet(test_dir, transform=transform_test)
trainloader = torch.utils.data.DataLoader(
market_train,
batch_size=64,shuffle=True
)
testloader = torch.utils.data.DataLoader(
market_test,
batch_size=64,shuffle=True
)
What did you do?
(I'd like to discuss this with you and the author because I'm not sure if this is strictly correct, since we'll be training on 751 classes, but then testing in 750 completely different classes but with the same index ids.. (so the network should be confused))
EDIT: K rearranged the Market1501 dataset so that two new folders train
and test
contain half the images of each person, so that we train with 1500 classes, and test on the same 1500 classes