1.Accelerate the association step.

2.Provide more trained models with different input resoulution.
This commit is contained in:
Zhongdao 2020-01-29 21:45:07 +08:00
parent 7216bcaadf
commit c40826179b
12 changed files with 994 additions and 166 deletions

28
cfg/yolov3.cfg → cfg/yolov3_1088x608.cfg Executable file → Normal file
View File

@ -1,26 +1,10 @@
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=16
subdivisions=1
width=608
height=1088
width=1088
height=608
embedding_dim=512
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
@ -611,7 +595,7 @@ layers = -3
size=3
stride=1
pad=1
filters=512
filters=$embedding_dim
activation=linear
[route]
@ -712,7 +696,7 @@ layers = -3
size=3
stride=1
pad=1
filters=512
filters=$embedding_dim
activation=linear
[route]
@ -815,7 +799,7 @@ layers = -3
size=3
stride=1
pad=1
filters=512
filters=$embedding_dim
activation=linear
[route]

817
cfg/yolov3_576x320.cfg Normal file
View File

@ -0,0 +1,817 @@
[net]
batch=16
subdivisions=1
width= 576
height=320
embedding_dim=512
channels=3
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# Downsample
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
######################
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=linear
######### embedding ###########
[route]
layers = -3
[convolutional]
size=3
stride=1
pad=1
filters=$embedding_dim
activation=linear
[route]
layers = -3, -1
###############################
[yolo]
mask = 8,9,10,11
anchors = 6,16, 8,23, 11,32, 16,45, 21,64, 30,90, 43,128, 60,180, 85,255, 120,360, 170,420, 340, 320
classes=1
num=12
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -7
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 61
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=linear
######### embedding ###########
[route]
layers = -3
[convolutional]
size=3
stride=1
pad=1
filters=$embedding_dim
activation=linear
[route]
layers = -3, -1
###############################
[yolo]
mask = 4,5,6,7
anchors = 6,16, 8,23, 11,32, 16,45, 21,64, 30,90, 43,128, 60,180, 85,255, 120,320, 170,320, 340,320
classes=1
num=12
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -7
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 36
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=linear
######### embedding ###########
[route]
layers = -3
[convolutional]
size=3
stride=1
pad=1
filters=$embedding_dim
activation=linear
[route]
layers = -3, -1
###############################
[yolo]
mask = 0,1,2,3
anchors = 6,16, 8,23, 11,32, 16,45, 21,64, 30,90, 43,128, 60,180, 85,255, 120,320, 170,320, 340,320
classes=1
num=12
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1

View File

@ -1,26 +1,10 @@
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=16
subdivisions=1
width=480
height=864
width=864
height=480
embedding_dim=512
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
@ -611,7 +595,7 @@ layers = -3
size=3
stride=1
pad=1
filters=512
filters=$embedding_dim
activation=linear
[route]
@ -712,7 +696,7 @@ layers = -3
size=3
stride=1
pad=1
filters=512
filters=$embedding_dim
activation=linear
[route]
@ -815,7 +799,7 @@ layers = -3
size=3
stride=1
pad=1
filters=512
filters=$embedding_dim
activation=linear
[route]

View File

@ -74,7 +74,8 @@ def create_modules(module_defs):
nC = int(module_def['classes']) # number of classes
img_size = (int(hyperparams['width']),int(hyperparams['height']))
# Define detection layer
yolo_layer = YOLOLayer(anchors, nC, hyperparams['nID'], img_size, yolo_layer_count, cfg=hyperparams['cfg'])
yolo_layer = YOLOLayer(anchors, nC, int(hyperparams['nID']),
int(hyperparams['embedding_dim']), img_size, yolo_layer_count)
modules.add_module('yolo_%d' % i, yolo_layer)
yolo_layer_count += 1
@ -108,7 +109,7 @@ class Upsample(nn.Module):
class YOLOLayer(nn.Module):
def __init__(self, anchors, nC, nID, img_size, yolo_layer, cfg):
def __init__(self, anchors, nC, nID, nE, img_size, yolo_layer):
super(YOLOLayer, self).__init__()
self.layer = yolo_layer
nA = len(anchors)
@ -117,7 +118,7 @@ class YOLOLayer(nn.Module):
self.nC = nC # number of classes (80)
self.nID = nID # number of identities
self.img_size = 0
self.emb_dim = 512
self.emb_dim = nE
self.shift = [1, 3, 5]
self.SmoothL1Loss = nn.SmoothL1Loss()
@ -127,7 +128,9 @@ class YOLOLayer(nn.Module):
self.s_c = nn.Parameter(-4.15*torch.ones(1)) # -4.15
self.s_r = nn.Parameter(-4.85*torch.ones(1)) # -4.85
self.s_id = nn.Parameter(-2.3*torch.ones(1)) # -2.3
self.emb_scale = math.sqrt(2) * math.log(self.nID-1)
self.emb_scale = math.sqrt(2) * math.log(self.nID-1) if self.nID>1 else 1
def forward(self, p_cat, img_size, targets=None, classifier=None, test_emb=False):
@ -178,7 +181,7 @@ class YOLOLayer(nn.Module):
if test_emb:
if np.prod(embedding.shape)==0 or np.prod(tids.shape) == 0:
return torch.zeros(0, self. emb_dim+1).cuda()
return torch.zeros(0, self.emb_dim+1).cuda()
emb_and_gt = torch.cat([embedding, tids.float()], dim=1)
return emb_and_gt
@ -210,21 +213,23 @@ class YOLOLayer(nn.Module):
class Darknet(nn.Module):
"""YOLOv3 object detection model"""
def __init__(self, cfg_path, img_size=(1088, 608), nID=1591, test_emb=False):
def __init__(self, cfg_dict, nID=0, test_emb=False):
super(Darknet, self).__init__()
self.module_defs = parse_model_cfg(cfg_path)
self.module_defs[0]['cfg'] = cfg_path
if isinstance(cfg_dict, str):
cfg_dict = parse_model_cfg(cfg_dict)
self.module_defs = cfg_dict
self.module_defs[0]['nID'] = nID
self.img_size = [int(self.module_defs[0]['width']), int(self.module_defs[0]['height'])]
self.emb_dim = int(self.module_defs[0]['embedding_dim'])
self.hyperparams, self.module_list = create_modules(self.module_defs)
self.img_size = img_size
self.loss_names = ['loss', 'box', 'conf', 'id', 'nT']
self.losses = OrderedDict()
for ln in self.loss_names:
self.losses[ln] = 0
self.emb_dim = 512
self.classifier = nn.Linear(self.emb_dim, nID)
self.test_emb=test_emb
self.test_emb = test_emb
self.classifier = nn.Linear(self.emb_dim, nID) if nID>0 else None
def forward(self, x, targets=None, targets_len=None):
@ -256,7 +261,8 @@ class Darknet(nn.Module):
for name, loss in zip(self.loss_names, losses):
self.losses[name] += loss
elif self.test_emb:
targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
if targets is not None:
targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
x = module[0](x, self.img_size, targets, self.classifier, self.test_emb)
else: # get detections
x = module[0](x, self.img_size)
@ -282,7 +288,8 @@ def shift_tensor_vertically(t, delta):
def create_grids(self, img_size, nGh, nGw):
self.stride = img_size[0]/nGw
assert self.stride == img_size[1] / nGh
assert self.stride == img_size[1] / nGh, \
"{} v.s. {}/{}".format(self.stride, img_size[1], nGh)
# build xy offsets
grid_x = torch.arange(nGw).repeat((nGh, 1)).view((1, 1, nGh, nGw)).float()

15
test.py
View File

@ -16,12 +16,10 @@ def test(
data_cfg,
weights,
batch_size=16,
img_size=416,
iou_thres=0.5,
conf_thres=0.3,
nms_thres=0.45,
print_interval=40,
nID=14455,
):
# Configure run
@ -32,9 +30,11 @@ def test(
nC = 1
test_path = data_cfg_dict['test']
dataset_root = data_cfg_dict['root']
cfg_dict = parse_model_cfg(cfg)
img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]
# Initialize model
model = Darknet(cfg, img_size, nID)
model = Darknet(cfg_dict, test_emb=False)
# Load weights
if weights.endswith('.pt'): # pytorch format
@ -149,12 +149,10 @@ def test_emb(
data_cfg,
weights,
batch_size=16,
img_size=416,
iou_thres=0.5,
conf_thres=0.3,
nms_thres=0.45,
print_interval=40,
nID=14455,
):
# Configure run
@ -163,9 +161,11 @@ def test_emb(
f.close()
test_paths = data_cfg_dict['test_emb']
dataset_root = data_cfg_dict['root']
cfg_dict = parse_model_cfg(cfg)
img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]
# Initialize model
model = Darknet(cfg, img_size, nID, test_emb=True)
model = Darknet(cfg_dict, test_emb=True)
# Load weights
if weights.endswith('.pt'): # pytorch format
@ -231,7 +231,6 @@ if __name__ == '__main__':
parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
parser.add_argument('--img-size', type=int, default=(1088, 608), help='size of each image dimension')
parser.add_argument('--print-interval', type=int, default=10, help='size of each image dimension')
parser.add_argument('--test-emb', action='store_true', help='test embedding')
opt = parser.parse_args()
@ -244,7 +243,6 @@ if __name__ == '__main__':
opt.data_cfg,
opt.weights,
opt.batch_size,
opt.img_size,
opt.iou_thres,
opt.conf_thres,
opt.nms_thres,
@ -256,7 +254,6 @@ if __name__ == '__main__':
opt.data_cfg,
opt.weights,
opt.batch_size,
opt.img_size,
opt.iou_thres,
opt.conf_thres,
opt.nms_thres,

View File

@ -5,13 +5,14 @@ import logging
import argparse
import motmetrics as mm
import torch
from tracker.multitracker import JDETracker
from utils import visualization as vis
from utils.log import logger
from utils.timer import Timer
from utils.evaluation import Evaluator
from utils.parse_config import parse_model_cfg
import utils.datasets as datasets
import torch
from utils.utils import *
@ -84,6 +85,10 @@ def main(opt, data_root='/data/MOT16/train', det_root=None, seqs=('MOT16-05',),
mkdir_if_missing(result_root)
data_type = 'mot'
# Read config
cfg_dict = parse_model_cfg(opt.cfg)
opt.img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]
# run tracking
accs = []
n_frame = 0
@ -134,7 +139,6 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='track.py')
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
parser.add_argument('--weights', type=str, default='weights/latest.pt', help='path to weights file')
parser.add_argument('--img-size', type=int, default=[1088, 608], nargs='+', help='pixels')
parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
parser.add_argument('--nms-thres', type=float, default=0.4, help='iou threshold for non-maximum suppression')
@ -162,6 +166,8 @@ if __name__ == '__main__':
MOT17-11-SDP
MOT17-13-SDP
'''
seqs_str = '''MOT17-02-SDP
'''
data_root = '/home/wangzd/datasets/MOT/MOT17/images/train'
else:
seqs_str = '''MOT16-01

View File

@ -1,8 +1,10 @@
import cv2
import torch
import torch.nn.functional as F
import numpy as np
import scipy
from scipy.spatial.distance import cdist
from sklearn.utils import linear_assignment_
import lap
from cython_bbox import bbox_overlaps as bbox_ious
from utils import kalman_filter
@ -25,32 +27,19 @@ def merge_matches(m1, m2, shape):
return match, unmatched_O, unmatched_Q
def _indices_to_matches(cost_matrix, indices, thresh):
matched_cost = cost_matrix[tuple(zip(*indices))]
matched_mask = (matched_cost <= thresh)
matches = indices[matched_mask]
unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
return matches, unmatched_a, unmatched_b
def linear_assignment(cost_matrix, thresh):
"""
Simple linear assignment
:type cost_matrix: np.ndarray
:type thresh: float
:return: matches, unmatched_a, unmatched_b
"""
if cost_matrix.size == 0:
return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
cost_matrix[cost_matrix > thresh] = thresh + 1e-4
indices = linear_assignment_.linear_assignment(cost_matrix)
return _indices_to_matches(cost_matrix, indices, thresh)
matches, unmatched_a, unmatched_b = [], [], []
cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
for ix, mx in enumerate(x):
if mx >= 0:
matches.append([ix, mx])
unmatched_a = np.where(x < 0)[0]
unmatched_b = np.where(y < 0)[0]
matches = np.asarray(matches)
return matches, unmatched_a, unmatched_b
def ious(atlbrs, btlbrs):
"""
@ -104,21 +93,9 @@ def embedding_distance(tracks, detections, metric='cosine'):
if cost_matrix.size == 0:
return cost_matrix
det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
for i, track in enumerate(tracks):
cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
return cost_matrix
track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
cost_matrix = np.maximum(0.0, cdist(track_features, det_features)) # Nomalized features
def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
if cost_matrix.size == 0:
return cost_matrix
gating_dim = 2 if only_position else 4
gating_threshold = kalman_filter.chi2inv95[gating_dim]
measurements = np.asarray([det.to_xyah() for det in detections])
for row, track in enumerate(tracks):
gating_distance = kf.gating_distance(
track.mean, track.covariance, measurements, only_position)
cost_matrix[row, gating_distance > gating_threshold] = np.inf
return cost_matrix
@ -130,10 +107,7 @@ def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda
measurements = np.asarray([det.to_xyah() for det in detections])
for row, track in enumerate(tracks):
gating_distance = kf.gating_distance(
track.mean, track.covariance, measurements, only_position)
track.mean, track.covariance, measurements, only_position, metric='maha')
cost_matrix[row, gating_distance > gating_threshold] = np.inf
#print(cost_matrix[row])
#print(gating_distance)
#print('-'*90)
cost_matrix[row] = lambda_ * cost_matrix[row] + (1-lambda_)* gating_distance
return cost_matrix

View File

@ -6,6 +6,7 @@ import os
import os.path as osp
import time
import torch
import torch.nn.functional as F
from utils.utils import *
from utils.log import logger
@ -16,6 +17,7 @@ from .basetrack import BaseTrack, TrackState
class STrack(BaseTrack):
shared_kalman = KalmanFilter()
def __init__(self, tlwh, score, temp_feat, buffer_size=30):
@ -41,7 +43,7 @@ class STrack(BaseTrack):
else:
self.smooth_feat = self.alpha *self.smooth_feat + (1-self.alpha) * feat
self.features.append(feat)
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
def predict(self):
mean_state = self.mean.copy()
@ -49,6 +51,19 @@ class STrack(BaseTrack):
mean_state[7] = 0
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
@staticmethod
def multi_predict(stracks):
if len(stracks) > 0:
multi_mean = np.asarray([st.mean.copy() for st in stracks])
multi_covariance = np.asarray([st.covariance for st in stracks])
for i,st in enumerate(stracks):
if st.state != TrackState.Tracked:
multi_mean[i][7] = 0
multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
stracks[i].mean = mean
stracks[i].covariance = cov
def activate(self, kalman_filter, frame_id):
"""Start a new tracklet"""
@ -97,7 +112,7 @@ class STrack(BaseTrack):
self.update_features(new_track.curr_feat)
@property
@jit
#@jit(nopython=True)
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
@ -110,7 +125,7 @@ class STrack(BaseTrack):
return ret
@property
@jit
#@jit(nopython=True)
def tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
@ -120,7 +135,7 @@ class STrack(BaseTrack):
return ret
@staticmethod
@jit
#@jit(nopython=True)
def tlwh_to_xyah(tlwh):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
@ -134,14 +149,14 @@ class STrack(BaseTrack):
return self.tlwh_to_xyah(self.tlwh)
@staticmethod
@jit
#@jit(nopython=True)
def tlbr_to_tlwh(tlbr):
ret = np.asarray(tlbr).copy()
ret[2:] -= ret[:2]
return ret
@staticmethod
@jit
#@jit(nopython=True)
def tlwh_to_tlbr(tlwh):
ret = np.asarray(tlwh).copy()
ret[2:] += ret[:2]
@ -151,10 +166,11 @@ class STrack(BaseTrack):
return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
class JDETracker(object):
def __init__(self, opt, frame_rate=30):
self.opt = opt
self.model = Darknet(opt.cfg, opt.img_size, nID=14455)
self.model = Darknet(opt.cfg)
# load_darknet_weights(self.model, opt.weights)
self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'], strict=False)
self.model.cuda().eval()
@ -183,17 +199,16 @@ class JDETracker(object):
pred = self.model(im_blob)
pred = pred[pred[:, :, 4] > self.opt.conf_thres]
if len(pred) > 0:
dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu()
dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres,
self.opt.nms_thres)[0]
scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
dets, embs = dets[:, :5].cpu().numpy(), dets[:, 6:].cpu().numpy()
'''Detections'''
detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for
(tlbrs, f) in zip(dets[:, :5], dets[:, 6:])]
detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for
(tlbrs, f) in zip(dets, embs)]
else:
detections = []
t2 = time.time()
# print('Forward: {} s'.format(t2-t1))
''' Add newly detected tracklets to tracked_stracks'''
unconfirmed = []
tracked_stracks = [] # type: list[STrack]
@ -206,11 +221,8 @@ class JDETracker(object):
''' Step 2: First association, with embedding'''
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
# Predict the current location with KF
for strack in strack_pool:
strack.predict()
STrack.multi_predict(strack_pool)
dists = matching.embedding_distance(strack_pool, detections)
#dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections)
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)
@ -271,13 +283,10 @@ class JDETracker(object):
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_stracks.append(track)
t4 = time.time()
# print('Ramained match {} s'.format(t4-t3))
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
# self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack]
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
self.lost_stracks.extend(lost_stracks)
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
@ -292,8 +301,6 @@ class JDETracker(object):
logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
t5 = time.time()
# print('Final {} s'.format(t5-t4))
return output_stracks
def joint_stracks(tlista, tlistb):

View File

@ -13,7 +13,6 @@ from torchvision.transforms import transforms as T
def train(
cfg,
data_cfg,
img_size=(1088,608),
resume=False,
epochs=100,
batch_size=16,
@ -33,16 +32,19 @@ def train(
trainset_paths = data_config['train']
dataset_root = data_config['root']
f.close()
cfg_dict = parse_model_cfg(cfg)
img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]
transforms = T.Compose([T.ToTensor()])
# Get dataloader
transforms = T.Compose([T.ToTensor()])
dataset = JointDataset(dataset_root, trainset_paths, img_size, augment=True, transforms=transforms)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True,
num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate_fn)
# Initialize model
model = Darknet(cfg, img_size, dataset.nID)
model = Darknet(cfg_dict, dataset.nID)
cutoff = -1 # backbone reaches to cutoff layer
start_epoch = 0
@ -87,14 +89,13 @@ def train(
p.requires_grad = False if 'batch_norm' in name else True
model_info(model)
t0 = time.time()
for epoch in range(epochs):
epoch += start_epoch
logger.info(('%8s%12s' + '%10s' * 6) % (
'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time'))
# Freeze darknet53.conv.74 for first epoch
if freeze_backbone and (epoch < 2):
@ -108,7 +109,7 @@ def train(
for i, (imgs, targets, _, _, targets_len) in enumerate(dataloader):
if sum([len(x) for x in targets]) < 1: # if no targets continue
continue
# SGD burn-in
burnin = min(1000, len(dataloader))
if (epoch == 0) & (i <= burnin):
@ -154,8 +155,8 @@ def train(
# Calculate mAP
if epoch % opt.test_interval ==0:
with torch.no_grad():
mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40)
test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40)
# Call scheduler.step() after opimizer.step() with pytorch > 1.1.0
@ -166,9 +167,8 @@ if __name__ == '__main__':
parser.add_argument('--epochs', type=int, default=30, help='number of epochs')
parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
parser.add_argument('--accumulated-batches', type=int, default=1, help='number of batches before optimizer step')
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
parser.add_argument('--cfg', type=str, default='cfg/yolov3_1088x608.cfg', help='cfg file path')
parser.add_argument('--data-cfg', type=str, default='cfg/ccmcpe.json', help='coco.data file path')
parser.add_argument('--img-size', type=int, default=[1088, 608], nargs='+', help='pixels')
parser.add_argument('--resume', action='store_true', help='resume training flag')
parser.add_argument('--print-interval', type=int, default=40, help='print interval')
parser.add_argument('--test-interval', type=int, default=9, help='test interval')
@ -181,7 +181,6 @@ if __name__ == '__main__':
train(
opt.cfg,
opt.data_cfg,
img_size=opt.img_size,
resume=opt.resume,
epochs=opt.epochs,
batch_size=opt.batch_size,

View File

@ -2,7 +2,7 @@ import os
import numpy as np
import copy
import motmetrics as mm
mm.lap.default_solver = 'lap'
from utils.io import read_results, unzip_objs
@ -39,18 +39,20 @@ class Evaluator(object):
ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
ignore_tlwhs = unzip_objs(ignore_objs)[0]
# remove ignored results
keep = np.ones(len(trk_tlwhs), dtype=bool)
iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
match_ious = iou_distance[match_is, match_js]
if len(iou_distance) > 0:
match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
match_ious = iou_distance[match_is, match_js]
match_js = np.asarray(match_js, dtype=int)
match_js = match_js[np.logical_not(np.isnan(match_ious))]
keep[match_js] = False
trk_tlwhs = trk_tlwhs[keep]
trk_ids = trk_ids[keep]
match_js = np.asarray(match_js, dtype=int)
match_js = match_js[np.logical_not(np.isnan(match_ious))]
keep[match_js] = False
trk_tlwhs = trk_tlwhs[keep]
trk_ids = trk_ids[keep]
# get distance matrix
iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)

View File

@ -1,4 +1,5 @@
# vim: expandtab:ts=4:sw=4
import numba
import numpy as np
import scipy.linalg
@ -116,7 +117,7 @@ class KalmanFilter(object):
self._std_weight_velocity * mean[3]]
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
mean = np.dot(self._motion_mat, mean)
mean = np.dot(mean, self._motion_mat.T)
covariance = np.linalg.multi_dot((
self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
@ -150,6 +151,48 @@ class KalmanFilter(object):
covariance = np.linalg.multi_dot((
self._update_mat, covariance, self._update_mat.T))
return mean, covariance + innovation_cov
def multi_predict(self, mean, covariance):
"""Run Kalman filter prediction step (Vectorized version).
Parameters
----------
mean : ndarray
The Nx8 dimensional mean matrix of the object states at the previous
time step.
covariance : ndarray
The Nx8x8 dimensional covariance matrics of the object states at the
previous time step.
Returns
-------
(ndarray, ndarray)
Returns the mean vector and covariance matrix of the predicted
state. Unobserved velocities are initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[:, 3],
self._std_weight_position * mean[:, 3],
1e-2 * np.ones_like(mean[:, 3]),
self._std_weight_position * mean[:, 3]]
std_vel = [
self._std_weight_velocity * mean[:, 3],
self._std_weight_velocity * mean[:, 3],
1e-5 * np.ones_like(mean[:, 3]),
self._std_weight_velocity * mean[:, 3]]
sqr = np.square(np.r_[std_pos, std_vel]).T
motion_cov = []
for i in range(len(mean)):
motion_cov.append(np.diag(sqr[i]))
motion_cov = np.asarray(motion_cov)
mean = np.dot(mean, self._motion_mat.T)
left = np.dot(self._motion_mat, covariance).transpose((1,0,2))
covariance = np.dot(left, self._motion_mat.T) + motion_cov
return mean, covariance
def update(self, mean, covariance, measurement):
"""Run Kalman filter correction step.
@ -186,7 +229,7 @@ class KalmanFilter(object):
return new_mean, new_covariance
def gating_distance(self, mean, covariance, measurements,
only_position=False):
only_position=False, metric='maha'):
"""Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
@ -219,11 +262,17 @@ class KalmanFilter(object):
if only_position:
mean, covariance = mean[:2], covariance[:2, :2]
measurements = measurements[:, :2]
cholesky_factor = np.linalg.cholesky(covariance)
d = measurements - mean
z = scipy.linalg.solve_triangular(
cholesky_factor, d.T, lower=True, check_finite=False,
overwrite_b=True)
squared_maha = np.sum(z * z, axis=0)
return squared_maha
if metric == 'gaussian':
return np.sum(d * d, axis=1)
elif metric == 'maha':
cholesky_factor = np.linalg.cholesky(covariance)
z = scipy.linalg.solve_triangular(
cholesky_factor, d.T, lower=True, check_finite=False,
overwrite_b=True)
squared_maha = np.sum(z * z, axis=0)
return squared_maha
else:
raise ValueError('invalid distance metric')

View File

@ -14,7 +14,9 @@ def parse_model_cfg(path):
else:
key, value = line.split("=")
value = value.strip()
module_defs[-1][key.rstrip()] = value.strip()
if value[0] == '$':
value = module_defs[0].get(value.strip('$'), None)
module_defs[-1][key.rstrip()] = value
return module_defs