1.Accelerate the association step.

2.Provide more trained models with different input resoulution.
2020-01-29 21:45:07 +08:00 · 2020-01-29 21:45:07 +08:00 · c40826179b
commit c40826179b
parent 7216bcaadf
12 changed files with 994 additions and 166 deletions
--- a/cfg/yolov3_1088x608.cfg
+++ b/cfg/yolov3_1088x608.cfg
@ -1,26 +1,10 @@
 [net]
-# Testing
-#batch=1
-#subdivisions=1
-# Training
 batch=16
 subdivisions=1
-width=608
-height=1088
+width=1088
+height=608
+embedding_dim=512
 channels=3
-momentum=0.9
-decay=0.0005
-angle=0
-saturation = 1.5
-exposure = 1.5
-hue=.1
-
-learning_rate=0.001
-burn_in=1000
-max_batches = 500200
-policy=steps
-steps=400000,450000
-scales=.1,.1

 [convolutional]
 batch_normalize=1
@ -611,7 +595,7 @@ layers = -3
 size=3
 stride=1
 pad=1
-filters=512
+filters=$embedding_dim
 activation=linear

 [route]
@ -712,7 +696,7 @@ layers = -3
 size=3
 stride=1
 pad=1
-filters=512
+filters=$embedding_dim
 activation=linear

 [route]
@ -815,7 +799,7 @@ layers = -3
 size=3
 stride=1
 pad=1
-filters=512
+filters=$embedding_dim
 activation=linear

 [route]
--- a/cfg/yolov3_576x320.cfg
+++ b/cfg/yolov3_576x320.cfg
@ -0,0 +1,817 @@
+[net]
+batch=16
+subdivisions=1
+width= 576
+height=320
+embedding_dim=512
+channels=3
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+######################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=24
+activation=linear
+
+######### embedding ###########
+[route]
+layers = -3
+
+[convolutional]
+size=3
+stride=1
+pad=1
+filters=$embedding_dim
+activation=linear
+
+[route]
+layers = -3, -1
+###############################
+
+
+[yolo]
+mask = 8,9,10,11
+anchors = 6,16, 8,23, 11,32, 16,45,   21,64, 30,90, 43,128, 60,180,   85,255, 120,360, 170,420, 340, 320              
+classes=1
+num=12
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+[route]
+layers = -7
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 61
+
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=24
+activation=linear
+
+######### embedding ###########
+[route]
+layers = -3
+
+[convolutional]
+size=3
+stride=1
+pad=1
+filters=$embedding_dim
+activation=linear
+
+[route]
+layers = -3, -1
+###############################
+
+[yolo]
+mask = 4,5,6,7 
+anchors = 6,16, 8,23, 11,32, 16,45,   21,64, 30,90, 43,128, 60,180,   85,255, 120,320, 170,320, 340,320              
+classes=1
+num=12
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+[route]
+layers = -7
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 36
+
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=24
+activation=linear
+
+
+
+######### embedding ###########
+[route]
+layers = -3
+
+[convolutional]
+size=3
+stride=1
+pad=1
+filters=$embedding_dim
+activation=linear
+
+[route]
+layers = -3, -1
+###############################
+
+[yolo]
+mask = 0,1,2,3
+anchors = 6,16, 8,23, 11,32, 16,45,   21,64, 30,90, 43,128, 60,180,   85,255, 120,320, 170,320, 340,320              
+classes=1
+num=12
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
--- a/cfg/yolov3_864x480.cfg
+++ b/cfg/yolov3_864x480.cfg
@ -1,26 +1,10 @@
 [net]
-# Testing
-#batch=1
-#subdivisions=1
-# Training
 batch=16
 subdivisions=1
-width=480
-height=864
+width=864
+height=480
+embedding_dim=512
 channels=3
-momentum=0.9
-decay=0.0005
-angle=0
-saturation = 1.5
-exposure = 1.5
-hue=.1
-
-learning_rate=0.001
-burn_in=1000
-max_batches = 500200
-policy=steps
-steps=400000,450000
-scales=.1,.1

 [convolutional]
 batch_normalize=1
@ -611,7 +595,7 @@ layers = -3
 size=3
 stride=1
 pad=1
-filters=512
+filters=$embedding_dim
 activation=linear

 [route]
@ -712,7 +696,7 @@ layers = -3
 size=3
 stride=1
 pad=1
-filters=512
+filters=$embedding_dim
 activation=linear

 [route]
@ -815,7 +799,7 @@ layers = -3
 size=3
 stride=1
 pad=1
-filters=512
+filters=$embedding_dim
 activation=linear

 [route]
--- a/models.py
+++ b/models.py
@ -74,7 +74,8 @@ def create_modules(module_defs):
            nC = int(module_def['classes'])  # number of classes
            img_size = (int(hyperparams['width']),int(hyperparams['height']))
            # Define detection layer
-            yolo_layer = YOLOLayer(anchors, nC, hyperparams['nID'], img_size, yolo_layer_count, cfg=hyperparams['cfg'])
+            yolo_layer = YOLOLayer(anchors, nC, int(hyperparams['nID']), 
+                                   int(hyperparams['embedding_dim']), img_size, yolo_layer_count)
            modules.add_module('yolo_%d' % i, yolo_layer)
            yolo_layer_count += 1

@ -108,7 +109,7 @@ class Upsample(nn.Module):


 class YOLOLayer(nn.Module):
-    def __init__(self, anchors, nC, nID, img_size, yolo_layer, cfg):
+    def __init__(self, anchors, nC, nID, nE, img_size, yolo_layer):
        super(YOLOLayer, self).__init__()
        self.layer = yolo_layer
        nA = len(anchors)
@ -117,7 +118,7 @@ class YOLOLayer(nn.Module):
        self.nC = nC  # number of classes (80)
        self.nID = nID # number of identities
        self.img_size = 0
-        self.emb_dim = 512
+        self.emb_dim = nE 
        self.shift = [1, 3, 5]

        self.SmoothL1Loss  = nn.SmoothL1Loss()
@ -127,7 +128,9 @@ class YOLOLayer(nn.Module):
        self.s_c = nn.Parameter(-4.15*torch.ones(1))  # -4.15
        self.s_r = nn.Parameter(-4.85*torch.ones(1))  # -4.85
        self.s_id = nn.Parameter(-2.3*torch.ones(1))  # -2.3
-        self.emb_scale = math.sqrt(2) * math.log(self.nID-1)
+        
+        self.emb_scale = math.sqrt(2) * math.log(self.nID-1) if self.nID>1 else 1
+
        

    def forward(self, p_cat,  img_size, targets=None, classifier=None, test_emb=False):
@ -178,7 +181,7 @@ class YOLOLayer(nn.Module):
            
            if  test_emb:
                if np.prod(embedding.shape)==0  or np.prod(tids.shape) == 0:
-                    return torch.zeros(0, self. emb_dim+1).cuda()
+                    return torch.zeros(0, self.emb_dim+1).cuda()
                emb_and_gt = torch.cat([embedding, tids.float()], dim=1)
                return emb_and_gt
            
@ -210,21 +213,23 @@ class YOLOLayer(nn.Module):
 class Darknet(nn.Module):
    """YOLOv3 object detection model"""

-    def __init__(self, cfg_path, img_size=(1088, 608), nID=1591, test_emb=False):
+    def __init__(self, cfg_dict, nID=0, test_emb=False):
        super(Darknet, self).__init__()
-
-        self.module_defs = parse_model_cfg(cfg_path)
-        self.module_defs[0]['cfg'] = cfg_path
+        if isinstance(cfg_dict, str):
+            cfg_dict = parse_model_cfg(cfg_dict)
+        self.module_defs = cfg_dict 
        self.module_defs[0]['nID'] = nID
+        self.img_size = [int(self.module_defs[0]['width']), int(self.module_defs[0]['height'])]
+        self.emb_dim = int(self.module_defs[0]['embedding_dim'])
        self.hyperparams, self.module_list = create_modules(self.module_defs)
-        self.img_size = img_size
        self.loss_names = ['loss', 'box', 'conf', 'id', 'nT']
        self.losses = OrderedDict()
        for ln in self.loss_names:
            self.losses[ln] = 0
-        self.emb_dim = 512
-        self.classifier = nn.Linear(self.emb_dim, nID)
-        self.test_emb=test_emb
+        self.test_emb = test_emb
+        
+        self.classifier = nn.Linear(self.emb_dim, nID) if nID>0 else None
+


    def forward(self, x, targets=None, targets_len=None):
@ -256,7 +261,8 @@ class Darknet(nn.Module):
                    for name, loss in zip(self.loss_names, losses):
                        self.losses[name] += loss
                elif self.test_emb:
-                    targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
+                    if targets is not None:
+                        targets = [targets[i][:int(l)] for i,l in enumerate(targets_len)]
                    x = module[0](x, self.img_size, targets, self.classifier, self.test_emb)
                else:  # get detections
                    x = module[0](x, self.img_size)
@ -282,7 +288,8 @@ def shift_tensor_vertically(t, delta):

 def create_grids(self, img_size, nGh, nGw):
    self.stride = img_size[0]/nGw
-    assert self.stride == img_size[1] / nGh
+    assert self.stride == img_size[1] / nGh, \
+            "{} v.s. {}/{}".format(self.stride, img_size[1], nGh)

    # build xy offsets
    grid_x = torch.arange(nGw).repeat((nGh, 1)).view((1, 1, nGh, nGw)).float()
--- a/test.py
+++ b/test.py
@ -16,12 +16,10 @@ def test(
        data_cfg,
        weights,
        batch_size=16,
-        img_size=416,
        iou_thres=0.5,
        conf_thres=0.3,
        nms_thres=0.45,
        print_interval=40,
-        nID=14455,
 ):

    # Configure run
@ -32,9 +30,11 @@ def test(
    nC = 1
    test_path = data_cfg_dict['test']
    dataset_root = data_cfg_dict['root']
+    cfg_dict = parse_model_cfg(cfg)
+    img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]

    # Initialize model
-    model = Darknet(cfg, img_size, nID)
+    model = Darknet(cfg_dict, test_emb=False)

    # Load weights
    if weights.endswith('.pt'):  # pytorch format
@ -149,12 +149,10 @@ def test_emb(
            data_cfg,
            weights,
            batch_size=16,
-            img_size=416,
            iou_thres=0.5,
            conf_thres=0.3,
            nms_thres=0.45,
            print_interval=40,
-            nID=14455,
 ):

    # Configure run
@ -163,9 +161,11 @@ def test_emb(
    f.close()
    test_paths = data_cfg_dict['test_emb']
    dataset_root = data_cfg_dict['root']
+    cfg_dict = parse_model_cfg(cfg)
+    img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]

    # Initialize model
-    model = Darknet(cfg, img_size, nID, test_emb=True)
+    model = Darknet(cfg_dict, test_emb=True)

    # Load weights
    if weights.endswith('.pt'):  # pytorch format
@ -231,7 +231,6 @@ if __name__ == '__main__':
    parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
    parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
    parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
-    parser.add_argument('--img-size', type=int, default=(1088, 608), help='size of each image dimension')
    parser.add_argument('--print-interval', type=int, default=10, help='size of each image dimension')
    parser.add_argument('--test-emb', action='store_true', help='test embedding')
    opt = parser.parse_args()
@ -244,7 +243,6 @@ if __name__ == '__main__':
                opt.data_cfg,
                opt.weights,
                opt.batch_size,
-                opt.img_size,
                opt.iou_thres,
                opt.conf_thres,
                opt.nms_thres,
@ -256,7 +254,6 @@ if __name__ == '__main__':
                opt.data_cfg,
                opt.weights,
                opt.batch_size,
-                opt.img_size,
                opt.iou_thres,
                opt.conf_thres,
                opt.nms_thres,
--- a/track.py
+++ b/track.py
@ -5,13 +5,14 @@ import logging
 import argparse
 import motmetrics as mm

+import torch
 from tracker.multitracker import JDETracker
 from utils import visualization as vis
 from utils.log import logger
 from utils.timer import Timer
 from utils.evaluation import Evaluator
+from utils.parse_config import parse_model_cfg
 import utils.datasets as datasets
-import torch
 from utils.utils import *


@ -84,6 +85,10 @@ def main(opt, data_root='/data/MOT16/train', det_root=None, seqs=('MOT16-05',),
    mkdir_if_missing(result_root)
    data_type = 'mot'

+    # Read config
+    cfg_dict = parse_model_cfg(opt.cfg)
+    opt.img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]
+
    # run tracking
    accs = []
    n_frame = 0
@ -134,7 +139,6 @@ if __name__ == '__main__':
    parser = argparse.ArgumentParser(prog='track.py')
    parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
    parser.add_argument('--weights', type=str, default='weights/latest.pt', help='path to weights file')
-    parser.add_argument('--img-size', type=int, default=[1088, 608], nargs='+', help='pixels')
    parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
    parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
    parser.add_argument('--nms-thres', type=float, default=0.4, help='iou threshold for non-maximum suppression')
@ -162,6 +166,8 @@ if __name__ == '__main__':
                      MOT17-11-SDP
                      MOT17-13-SDP
                    '''
+        seqs_str = '''MOT17-02-SDP
+                    '''
        data_root = '/home/wangzd/datasets/MOT/MOT17/images/train'
    else:
        seqs_str = '''MOT16-01
--- a/tracker/matching.py
+++ b/tracker/matching.py
@ -1,8 +1,10 @@
 import cv2
+import torch
+import torch.nn.functional as F
 import numpy as np
 import scipy
 from scipy.spatial.distance import cdist
-from sklearn.utils import linear_assignment_
+import lap

 from cython_bbox import bbox_overlaps as bbox_ious
 from utils import kalman_filter
@ -25,32 +27,19 @@ def merge_matches(m1, m2, shape):
    return match, unmatched_O, unmatched_Q


-def _indices_to_matches(cost_matrix, indices, thresh):
-    matched_cost = cost_matrix[tuple(zip(*indices))]
-    matched_mask = (matched_cost <= thresh)
-
-    matches = indices[matched_mask]
-    unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
-    unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
-
-    return matches, unmatched_a, unmatched_b
-
-
 def linear_assignment(cost_matrix, thresh):
-    """
-    Simple linear assignment
-    :type cost_matrix: np.ndarray
-    :type thresh: float
-    :return: matches, unmatched_a, unmatched_b
-    """
    if cost_matrix.size == 0:
        return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
-
-    cost_matrix[cost_matrix > thresh] = thresh + 1e-4
-    indices = linear_assignment_.linear_assignment(cost_matrix)
-
-    return _indices_to_matches(cost_matrix, indices, thresh)
-
+    matches, unmatched_a, unmatched_b = [], [], []
+    cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
+    for ix, mx in enumerate(x):
+        if mx >= 0:
+            matches.append([ix, mx])
+    unmatched_a = np.where(x < 0)[0]
+    unmatched_b = np.where(y < 0)[0]
+    matches = np.asarray(matches)
+    return matches, unmatched_a, unmatched_b
+            

 def ious(atlbrs, btlbrs):
    """
@ -104,21 +93,9 @@ def embedding_distance(tracks, detections, metric='cosine'):
    if cost_matrix.size == 0:
        return cost_matrix
    det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
-    for i, track in enumerate(tracks):
-        cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
-    return cost_matrix
+    track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
+    cost_matrix = np.maximum(0.0, cdist(track_features, det_features)) # Nomalized features

-
-def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
-    if cost_matrix.size == 0:
-        return cost_matrix
-    gating_dim = 2 if only_position else 4
-    gating_threshold = kalman_filter.chi2inv95[gating_dim]
-    measurements = np.asarray([det.to_xyah() for det in detections])
-    for row, track in enumerate(tracks):
-        gating_distance = kf.gating_distance(
-            track.mean, track.covariance, measurements, only_position)
-        cost_matrix[row, gating_distance > gating_threshold] = np.inf
    return cost_matrix


@ -130,10 +107,7 @@ def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda
    measurements = np.asarray([det.to_xyah() for det in detections])
    for row, track in enumerate(tracks):
        gating_distance = kf.gating_distance(
-            track.mean, track.covariance, measurements, only_position)
+            track.mean, track.covariance, measurements, only_position, metric='maha')
        cost_matrix[row, gating_distance > gating_threshold] = np.inf
-        #print(cost_matrix[row])
-        #print(gating_distance)
-        #print('-'*90)
        cost_matrix[row] = lambda_ * cost_matrix[row] + (1-lambda_)* gating_distance
    return cost_matrix
--- a/tracker/multitracker.py
+++ b/tracker/multitracker.py
@ -6,6 +6,7 @@ import os
 import os.path as osp
 import time
 import torch
+import torch.nn.functional as F

 from utils.utils import *
 from utils.log import logger
@ -16,6 +17,7 @@ from .basetrack import BaseTrack, TrackState


 class STrack(BaseTrack):
+    shared_kalman = KalmanFilter()

    def __init__(self, tlwh, score, temp_feat, buffer_size=30):

@ -41,7 +43,7 @@ class STrack(BaseTrack):
        else:
            self.smooth_feat = self.alpha *self.smooth_feat + (1-self.alpha) * feat
        self.features.append(feat)
-        self.smooth_feat /= np.linalg.norm(self.smooth_feat)  
+        self.smooth_feat /= np.linalg.norm(self.smooth_feat)

    def predict(self):
        mean_state = self.mean.copy()
@ -49,6 +51,19 @@ class STrack(BaseTrack):
            mean_state[7] = 0
        self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)

+    @staticmethod
+    def multi_predict(stracks):
+        if len(stracks) > 0:
+            multi_mean = np.asarray([st.mean.copy() for st in stracks])
+            multi_covariance = np.asarray([st.covariance for st in stracks])
+            for i,st in enumerate(stracks):
+                if st.state != TrackState.Tracked:
+                    multi_mean[i][7] = 0
+            multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
+            for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
+                stracks[i].mean = mean
+                stracks[i].covariance = cov
+

    def activate(self, kalman_filter, frame_id):
        """Start a new tracklet"""
@ -97,7 +112,7 @@ class STrack(BaseTrack):
            self.update_features(new_track.curr_feat)

    @property
-    @jit
+    #@jit(nopython=True)
    def tlwh(self):
        """Get current position in bounding box format `(top left x, top left y,
                width, height)`.
@ -110,7 +125,7 @@ class STrack(BaseTrack):
        return ret

    @property
-    @jit
+    #@jit(nopython=True)
    def tlbr(self):
        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
        `(top left, bottom right)`.
@ -120,7 +135,7 @@ class STrack(BaseTrack):
        return ret

    @staticmethod
-    @jit
+    #@jit(nopython=True)
    def tlwh_to_xyah(tlwh):
        """Convert bounding box to format `(center x, center y, aspect ratio,
        height)`, where the aspect ratio is `width / height`.
@ -134,14 +149,14 @@ class STrack(BaseTrack):
        return self.tlwh_to_xyah(self.tlwh)

    @staticmethod
-    @jit
+    #@jit(nopython=True)
    def tlbr_to_tlwh(tlbr):
        ret = np.asarray(tlbr).copy()
        ret[2:] -= ret[:2]
        return ret

    @staticmethod
-    @jit
+    #@jit(nopython=True)
    def tlwh_to_tlbr(tlwh):
        ret = np.asarray(tlwh).copy()
        ret[2:] += ret[:2]
@ -151,10 +166,11 @@ class STrack(BaseTrack):
        return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)


+
 class JDETracker(object):
    def __init__(self, opt, frame_rate=30):
        self.opt = opt
-        self.model = Darknet(opt.cfg, opt.img_size, nID=14455)
+        self.model = Darknet(opt.cfg)
        # load_darknet_weights(self.model, opt.weights)
        self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'], strict=False)
        self.model.cuda().eval()
@ -183,17 +199,16 @@ class JDETracker(object):
            pred = self.model(im_blob)
        pred = pred[pred[:, :, 4] > self.opt.conf_thres]
        if len(pred) > 0:
-            dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, self.opt.nms_thres)[0].cpu()
+            dets = non_max_suppression(pred.unsqueeze(0), self.opt.conf_thres, 
+                                       self.opt.nms_thres)[0]
            scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round()
+            dets, embs = dets[:, :5].cpu().numpy(), dets[:, 6:].cpu().numpy()
            '''Detections'''
-            detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f.numpy(), 30) for
-                          (tlbrs, f) in zip(dets[:, :5], dets[:, 6:])]
+            detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for
+                          (tlbrs, f) in zip(dets, embs)]
        else:
            detections = []

-        t2 = time.time()
-        # print('Forward: {} s'.format(t2-t1))
-
        ''' Add newly detected tracklets to tracked_stracks'''
        unconfirmed = []
        tracked_stracks = []  # type: list[STrack]
@ -206,11 +221,8 @@ class JDETracker(object):
        ''' Step 2: First association, with embedding'''
        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
        # Predict the current location with KF
-        for strack in strack_pool:
-            strack.predict()
-
+        STrack.multi_predict(strack_pool)
        dists = matching.embedding_distance(strack_pool, detections)
-        #dists = matching.gate_cost_matrix(self.kalman_filter, dists, strack_pool, detections)
        dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections)
        matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7)

@ -271,13 +283,10 @@ class JDETracker(object):
            if self.frame_id - track.end_frame > self.max_time_lost:
                track.mark_removed()
                removed_stracks.append(track)
-        t4 = time.time()
-        # print('Ramained match {} s'.format(t4-t3))

        self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
        self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
        self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
-        # self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost]  # type: list[STrack]
        self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
        self.lost_stracks.extend(lost_stracks)
        self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
@ -292,8 +301,6 @@ class JDETracker(object):
        logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks]))
        logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks]))
        logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks]))
-        t5 = time.time()
-        # print('Final {} s'.format(t5-t4))
        return output_stracks

 def joint_stracks(tlista, tlistb):
--- a/train.py
+++ b/train.py
@ -13,7 +13,6 @@ from torchvision.transforms import transforms as T
 def train(
        cfg,
        data_cfg,
-        img_size=(1088,608),
        resume=False,
        epochs=100,
        batch_size=16,
@ -33,16 +32,19 @@ def train(
    trainset_paths = data_config['train']
    dataset_root = data_config['root']
    f.close()
+    cfg_dict = parse_model_cfg(cfg) 
+    img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])]

-
-    transforms = T.Compose([T.ToTensor()])
    # Get dataloader
+    transforms = T.Compose([T.ToTensor()])
    dataset = JointDataset(dataset_root, trainset_paths, img_size, augment=True, transforms=transforms)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True,
                                             num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate_fn) 
-    
+
    # Initialize model
-    model = Darknet(cfg, img_size, dataset.nID)
+    model = Darknet(cfg_dict, dataset.nID)
+
+    

    cutoff = -1  # backbone reaches to cutoff layer
    start_epoch = 0
@ -87,14 +89,13 @@ def train(
            p.requires_grad = False if 'batch_norm' in name else True

    model_info(model)
+       
    t0 = time.time()
    for epoch in range(epochs):
        epoch += start_epoch

        logger.info(('%8s%12s' + '%10s' * 6) % (
            'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time'))
-
-
        
        # Freeze darknet53.conv.74 for first epoch
        if freeze_backbone and (epoch < 2):
@ -108,7 +109,7 @@ def train(
        for i, (imgs, targets, _, _, targets_len) in enumerate(dataloader):
            if sum([len(x) for x in targets]) < 1:  # if no targets continue
                continue
-
+            
            # SGD burn-in
            burnin = min(1000, len(dataloader))
            if (epoch == 0) & (i <= burnin):
@ -154,8 +155,8 @@ def train(
        # Calculate mAP
        if epoch % opt.test_interval ==0:
            with torch.no_grad():
-                mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
-                test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
+                mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40)
+                test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40)


        # Call scheduler.step() after opimizer.step() with pytorch > 1.1.0 
@ -166,9 +167,8 @@ if __name__ == '__main__':
    parser.add_argument('--epochs', type=int, default=30, help='number of epochs')
    parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')
    parser.add_argument('--accumulated-batches', type=int, default=1, help='number of batches before optimizer step')
-    parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
+    parser.add_argument('--cfg', type=str, default='cfg/yolov3_1088x608.cfg', help='cfg file path')
    parser.add_argument('--data-cfg', type=str, default='cfg/ccmcpe.json', help='coco.data file path')
-    parser.add_argument('--img-size', type=int, default=[1088, 608], nargs='+', help='pixels')
    parser.add_argument('--resume', action='store_true', help='resume training flag')
    parser.add_argument('--print-interval', type=int, default=40, help='print interval')
    parser.add_argument('--test-interval', type=int, default=9, help='test interval')
@ -181,7 +181,6 @@ if __name__ == '__main__':
    train(
        opt.cfg,
        opt.data_cfg,
-        img_size=opt.img_size,
        resume=opt.resume,
        epochs=opt.epochs,
        batch_size=opt.batch_size,
--- a/utils/evaluation.py
+++ b/utils/evaluation.py
@ -2,7 +2,7 @@ import os
 import numpy as np
 import copy
 import motmetrics as mm
-
+mm.lap.default_solver = 'lap'
 from utils.io import read_results, unzip_objs


@ -39,18 +39,20 @@ class Evaluator(object):
        ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
        ignore_tlwhs = unzip_objs(ignore_objs)[0]

+
        # remove ignored results
        keep = np.ones(len(trk_tlwhs), dtype=bool)
        iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
-        match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
-        match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
-        match_ious = iou_distance[match_is, match_js]
+        if len(iou_distance) > 0:
+            match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
+            match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
+            match_ious = iou_distance[match_is, match_js]

-        match_js = np.asarray(match_js, dtype=int)
-        match_js = match_js[np.logical_not(np.isnan(match_ious))]
-        keep[match_js] = False
-        trk_tlwhs = trk_tlwhs[keep]
-        trk_ids = trk_ids[keep]
+            match_js = np.asarray(match_js, dtype=int)
+            match_js = match_js[np.logical_not(np.isnan(match_ious))]
+            keep[match_js] = False
+            trk_tlwhs = trk_tlwhs[keep]
+            trk_ids = trk_ids[keep]

        # get distance matrix
        iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
--- a/utils/kalman_filter.py
+++ b/utils/kalman_filter.py
@ -1,4 +1,5 @@
 # vim: expandtab:ts=4:sw=4
+import numba
 import numpy as np
 import scipy.linalg

@ -116,7 +117,7 @@ class KalmanFilter(object):
            self._std_weight_velocity * mean[3]]
        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))

-        mean = np.dot(self._motion_mat, mean)
+        mean = np.dot(mean, self._motion_mat.T)
        covariance = np.linalg.multi_dot((
            self._motion_mat, covariance, self._motion_mat.T)) + motion_cov

@ -150,6 +151,48 @@ class KalmanFilter(object):
        covariance = np.linalg.multi_dot((
            self._update_mat, covariance, self._update_mat.T))
        return mean, covariance + innovation_cov
+    
+    def multi_predict(self, mean, covariance):
+        """Run Kalman filter prediction step (Vectorized version).
+
+        Parameters
+        ----------
+        mean : ndarray
+            The Nx8 dimensional mean matrix of the object states at the previous
+            time step.
+        covariance : ndarray
+            The Nx8x8 dimensional covariance matrics of the object states at the
+            previous time step.
+
+        Returns
+        -------
+        (ndarray, ndarray)
+            Returns the mean vector and covariance matrix of the predicted
+            state. Unobserved velocities are initialized to 0 mean.
+
+        """
+        std_pos = [
+            self._std_weight_position * mean[:, 3],
+            self._std_weight_position * mean[:, 3],
+            1e-2 * np.ones_like(mean[:, 3]),
+            self._std_weight_position * mean[:, 3]]
+        std_vel = [
+            self._std_weight_velocity * mean[:, 3],
+            self._std_weight_velocity * mean[:, 3],
+            1e-5 * np.ones_like(mean[:, 3]),
+            self._std_weight_velocity * mean[:, 3]]
+        sqr = np.square(np.r_[std_pos, std_vel]).T
+        
+        motion_cov = []
+        for i in range(len(mean)):
+            motion_cov.append(np.diag(sqr[i]))
+        motion_cov = np.asarray(motion_cov)
+            
+        mean = np.dot(mean, self._motion_mat.T)
+        left = np.dot(self._motion_mat, covariance).transpose((1,0,2))
+        covariance = np.dot(left, self._motion_mat.T) + motion_cov
+
+        return mean, covariance

    def update(self, mean, covariance, measurement):
        """Run Kalman filter correction step.
@ -186,7 +229,7 @@ class KalmanFilter(object):
        return new_mean, new_covariance

    def gating_distance(self, mean, covariance, measurements,
-                        only_position=False):
+                        only_position=False, metric='maha'):
        """Compute gating distance between state distribution and measurements.

        A suitable distance threshold can be obtained from `chi2inv95`. If
@ -219,11 +262,17 @@ class KalmanFilter(object):
        if only_position:
            mean, covariance = mean[:2], covariance[:2, :2]
            measurements = measurements[:, :2]
-
-        cholesky_factor = np.linalg.cholesky(covariance)
+        
        d = measurements - mean
-        z = scipy.linalg.solve_triangular(
-            cholesky_factor, d.T, lower=True, check_finite=False,
-            overwrite_b=True)
-        squared_maha = np.sum(z * z, axis=0)
-        return squared_maha
+        if metric == 'gaussian':
+            return np.sum(d * d, axis=1)
+        elif metric == 'maha':
+            cholesky_factor = np.linalg.cholesky(covariance)
+            z = scipy.linalg.solve_triangular(
+                cholesky_factor, d.T, lower=True, check_finite=False,
+                overwrite_b=True)
+            squared_maha = np.sum(z * z, axis=0)
+            return squared_maha
+        else:
+            raise ValueError('invalid distance metric')
+
--- a/utils/parse_config.py
+++ b/utils/parse_config.py
@ -14,7 +14,9 @@ def parse_model_cfg(path):
        else:
            key, value = line.split("=")
            value = value.strip()
-            module_defs[-1][key.rstrip()] = value.strip()
+            if value[0] == '$':
+                value = module_defs[0].get(value.strip('$'), None)
+            module_defs[-1][key.rstrip()] = value

    return module_defs