replace maskrcnn-benchmark nms with torchvision nms

2020-01-09 22:48:17 +08:00 · 2020-01-09 22:48:17 +08:00 · be116014d6
commit be116014d6
parent 1cb8cee836
46 changed files with 112372 additions and 111016 deletions
--- a/cfg/ccmcpe.json
+++ b/cfg/ccmcpe.json
@ -7,7 +7,10 @@
        "citypersons":"./data/citypersons.train",
        "cuhksysu":"./data/cuhksysu.train",
        "prw":"./data/prw.train",
-        "eth":"./data/eth.train"
+        "eth":"./data/eth.train",
        "03":"./data/mot16-03.test",
        "01":"./data/mot16-01.test",
        "14":"./data/mot16-14.test"
    },
    "test_emb":
    {
@ -17,7 +20,6 @@
    },
    "test":
    {
        "mot19":"./data/mot19.train",
        "caltech":"./data/caltech.val",
        "citypersons":"./data/citypersons.val"
    }
--- a/cfg/yolov3_864x480.cfg
+++ b/cfg/yolov3_864x480.cfg
@ -0,0 +1,833 @@
 [net]
 # Testing
 #batch=1
 #subdivisions=1
 # Training
 batch=16
 subdivisions=1
 width=480
 height=864
 channels=3
 momentum=0.9
 decay=0.0005
 angle=0
 saturation = 1.5
 exposure = 1.5
 hue=.1
 learning_rate=0.001
 burn_in=1000
 max_batches = 500200
 policy=steps
 steps=400000,450000
 scales=.1,.1
 [convolutional]
 batch_normalize=1
 filters=32
 size=3
 stride=1
 pad=1
 activation=leaky
 # Downsample
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=32
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 # Downsample
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=64
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=64
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 # Downsample
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 # Downsample
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 # Downsample
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=2
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 [shortcut]
 from=-3
 activation=linear
 ######################
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=1024
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=1024
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=1024
 activation=leaky
 [convolutional]
 size=1
 stride=1
 pad=1
 filters=24
 activation=linear
 ######### embedding ###########
 [route]
 layers = -3
 [convolutional]
 size=3
 stride=1
 pad=1
 filters=512
 activation=linear
 [route]
 layers = -3, -1
 ###############################
 [yolo]
 mask = 8,9,10,11
 anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508   
 classes=1
 num=12
 jitter=.3
 ignore_thresh = .7
 truth_thresh = 1
 random=1
 [route]
 layers = -7
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [upsample]
 stride=2
 [route]
 layers = -1, 61
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=512
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=512
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=512
 activation=leaky
 [convolutional]
 size=1
 stride=1
 pad=1
 filters=24
 activation=linear
 ######### embedding ###########
 [route]
 layers = -3
 [convolutional]
 size=3
 stride=1
 pad=1
 filters=512
 activation=linear
 [route]
 layers = -3, -1
 ###############################
 [yolo]
 mask = 4,5,6,7 
 anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508   
 classes=1
 num=12
 jitter=.3
 ignore_thresh = .7
 truth_thresh = 1
 random=1
 [route]
 layers = -7
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [upsample]
 stride=2
 [route]
 layers = -1, 36
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=256
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=256
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=256
 activation=leaky
 [convolutional]
 size=1
 stride=1
 pad=1
 filters=24
 activation=linear
 ######### embedding ###########
 [route]
 layers = -3
 [convolutional]
 size=3
 stride=1
 pad=1
 filters=512
 activation=linear
 [route]
 layers = -3, -1
 ###############################
 [yolo]
 mask = 0,1,2,3
 anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508   
 classes=1
 num=12
 jitter=.3
 ignore_thresh = .7
 truth_thresh = 1
 random=1
--- a/data/mot16-01.test
+++ b/data/mot16-01.test
@ -0,0 +1,23 @@
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000041.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000201.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000221.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000061.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000021.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000261.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000241.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000001.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000421.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000401.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000381.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000181.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000441.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000161.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000321.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000301.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000141.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000101.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000341.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000361.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000121.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000281.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000081.jpg
--- a/data/mot16-03.test
+++ b/data/mot16-03.test
@ -0,0 +1,52 @@
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000391.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000811.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001471.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001021.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000061.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001261.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000021.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000871.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000631.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000241.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001411.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000781.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001201.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000001.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000451.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000661.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000421.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001441.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000211.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000991.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001051.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000181.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000601.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001381.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000011.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000841.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001231.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000031.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000271.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000931.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000301.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000751.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000481.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000511.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001291.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001141.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000091.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000361.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000121.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000571.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001321.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001111.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000151.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001081.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001351.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000901.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000331.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000721.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001171.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000961.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000541.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000691.jpg
--- a/data/mot16-14.test
+++ b/data/mot16-14.test
@ -0,0 +1,38 @@
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000201.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000041.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000581.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000221.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000061.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000021.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000241.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000261.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000001.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000661.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000421.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000401.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000381.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000641.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000601.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000181.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000441.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000461.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000621.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000161.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000321.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000301.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000481.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000141.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000681.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000101.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000341.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000361.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000121.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000741.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000501.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000281.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000521.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000721.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000561.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000541.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000081.jpg
 /home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000701.jpg
--- a/models.py
+++ b/models.py
@ -10,7 +10,7 @@ import time
 import math
 batch_norm=SyncBN #nn.BatchNorm2d
-
+#batch_norm=nn.BatchNorm2d
 def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
@ -34,7 +34,13 @@ def create_modules(module_defs):
                                                        padding=pad,
                                                        bias=not bn))
            if bn:
-                modules.add_module('batch_norm_%d' % i, batch_norm(filters))
+                after_bn = batch_norm(filters)
                modules.add_module('batch_norm_%d' % i, after_bn)
                # BN is uniformly initialized by default in pytorch 1.0.1. 
                # In pytorch>1.2.0, BN weights are initialized with constant 1,
                # but we find with the uniform initialization the model converges faster.
                nn.init.uniform_(after_bn.weight) 
                nn.init.zeros_(after_bn.bias)
            if module_def['activation'] == 'leaky':
                modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,75 @@
 ###################################################################
 # File Name: setup.py
 # Author: Zhongdao Wang
 # mail: wcd17@mails.tsinghua.edu.cn
 # Created Time: Thu 19 Dec 2019 07:29:02 PM CST
 ###################################################################
 from __future__ import print_function
 from __future__ import division
 from __future__ import absolute_import
 import os
 import glob
 import torch
 from setuptools import find_packages
 from setuptools import setup
 from torch.utils.cpp_extension import CUDA_HOME
 from torch.utils.cpp_extension import CppExtension
 from torch.utils.cpp_extension import CUDAExtension
 def get_extensions():
    this_dir = os.path.dirname(os.path.abspath(__file__))
    extensions_dir = os.path.join(this_dir, "utils", "nms")
    main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
    source_cpu = glob.glob(os.path.join(extensions_dir,  "*.cpp"))
    source_cuda = glob.glob(os.path.join(extensions_dir,  "*.cu"))
    sources = main_file 
    extension = CppExtension
    extra_compile_args = {"cxx": []}
    define_macros = []
    #if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
    if False:
        extension = CUDAExtension
        sources += source_cuda
        define_macros += [("WITH_CUDA", None)]
        extra_compile_args["nvcc"] = [
            "-DCUDA_HAS_FP16=1",
            "-D__CUDA_NO_HALF_OPERATORS__",
            "-D__CUDA_NO_HALF_CONVERSIONS__",
            "-D__CUDA_NO_HALF2_OPERATORS__",
        ]
    sources = [os.path.join(extensions_dir, s) for s in sources]
    include_dirs = [extensions_dir]
    ext_modules = [
        extension(
            "nms",
            sources,
            include_dirs=include_dirs,
            define_macros=define_macros,
            extra_compile_args=extra_compile_args,
        )
    ]
    return ext_modules
 print(get_extensions())
 setup(
    name="nms",
    version="0.1",
    author="fmassa",
    url="https://github.com/facebookresearch/maskrcnn-benchmark",
    description="GPU supported NMS",
    ext_modules=get_extensions(),
    cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
 )
--- a/track.py
+++ b/track.py
@ -147,13 +147,22 @@ if __name__ == '__main__':
    print(opt, end='\n\n')
    if not opt.test_mot16:
-        seqs_str = '''KITTI-13
+        #seqs_str = '''KITTI-13
-                      KITTI-17
+        #              KITTI-17
-                      ADL-Rundle-6
+        #              ADL-Rundle-6
-                      PETS09-S2L1
+        #              PETS09-S2L1
-                      TUD-Campus
+        #              TUD-Campus
-                      TUD-Stadtmitte'''
+        #              TUD-Stadtmitte'''
-        data_root = '/home/wangzd/datasets/MOT/MOT15/train'
+        #data_root = '/home/wangzd/datasets/MOT/MOT15/train'
        seqs_str = '''MOT17-02-SDP
                      MOT17-04-SDP
                      MOT17-05-SDP
                      MOT17-09-SDP
                      MOT17-10-SDP
                      MOT17-11-SDP
                      MOT17-13-SDP
                    '''
        data_root = '/home/wangzd/datasets/MOT/MOT17/images/train'
    else:
        seqs_str = '''MOT16-01
                     MOT16-03
--- a/tracker/matching.py
+++ b/tracker/matching.py
@ -4,7 +4,7 @@ import scipy
 from scipy.spatial.distance import cdist
 from sklearn.utils import linear_assignment_
-from utils.cython_bbox import bbox_ious
+from cython_bbox import bbox_overlaps as bbox_ious
 from utils import kalman_filter
 import time
--- a/train.py
+++ b/train.py
@ -94,8 +94,6 @@ def train(
        logger.info(('%8s%12s' + '%10s' * 6) % (
            'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time'))
        # Update scheduler (automatic)
        scheduler.step()
        # Freeze darknet53.conv.74 for first epoch
@ -146,7 +144,6 @@ def train(
            if i % opt.print_interval == 0:
                logger.info(s)
        # Save latest checkpoint
        checkpoint = {'epoch': epoch,
                      'model': model.module.state_dict(),
@ -161,6 +158,8 @@ def train(
                test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
        # Call scheduler.step() after opimizer.step() with pytorch > 1.1.0 
        scheduler.step()
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
--- a/utils/_C.cpython-36m-x86_64-linux-gnu.so
+++ b/utils/_C.cpython-36m-x86_64-linux-gnu.so
--- a/utils/cython_bbox.cpython-36m-x86_64-linux-gnu.so
+++ b/utils/cython_bbox.cpython-36m-x86_64-linux-gnu.so
--- a/utils/datasets.py
+++ b/utils/datasets.py
@ -194,6 +194,7 @@ class LoadImagesAndLabels:  # for training
        if self.augment:
            img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20))
        plotFlag = False
        if plotFlag:
            import matplotlib
@ -392,12 +393,10 @@ class JointDataset(LoadImagesAndLabels):  # for training
    def __getitem__(self, files_index):
        for i, c in enumerate(self.cds):
            if files_index >= c: 
                ds = list(self.label_files.keys())[i]
                start_index = c
        img_path = self.img_files[ds][files_index - start_index]
        label_path = self.label_files[ds][files_index - start_index]
--- a/utils/nms.py
+++ b/utils/nms.py
@ -1,7 +0,0 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 # from ._utils import _C
 from utils import _C
 nms = _C.nms
 # nms.__doc__ = """
 # This function performs Non-maximum suppresion"""
--- a/utils/nms/nms.h
+++ b/utils/nms/nms.h
@ -0,0 +1,32 @@
 // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 #pragma once
 #include <torch/extension.h>
 at::Tensor nms_cpu(const at::Tensor& dets, const at::Tensor& scores, const float threshold);
 #ifdef WITH_CUDA
 at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 #endif
 at::Tensor nms(const at::Tensor& dets,
               const at::Tensor& scores,
               const float threshold) {
  if (dets.type().is_cuda()) {
 #ifdef WITH_CUDA
    // TODO raise error if not compiled with CUDA
    if (dets.numel() == 0)
      return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
    auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
    return nms_cuda(b, threshold);
 #else
    AT_ERROR("Not compiled with GPU support");
 #endif
  }
  at::Tensor result = nms_cpu(dets, scores, threshold);
  return result;
 }
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){
    m.def("nms", &nms, "non-maximum suppression");
 }
--- a/utils/nms/nms_cpu.cpp
+++ b/utils/nms/nms_cpu.cpp
@ -0,0 +1,74 @@
 // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 #include "nms.h"
 template <typename scalar_t>
 at::Tensor nms_cpu_kernel(const at::Tensor& dets,
                          const at::Tensor& scores,
                          const float threshold) {
  AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
  AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
  AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
  if (dets.numel() == 0) {
    return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
  }
  auto x1_t = dets.select(1, 0).contiguous();
  auto y1_t = dets.select(1, 1).contiguous();
  auto x2_t = dets.select(1, 2).contiguous();
  auto y2_t = dets.select(1, 3).contiguous();
  at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
  auto ndets = dets.size(0);
  at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
  auto suppressed = suppressed_t.data<uint8_t>();
  auto order = order_t.data<int64_t>();
  auto x1 = x1_t.data<scalar_t>();
  auto y1 = y1_t.data<scalar_t>();
  auto x2 = x2_t.data<scalar_t>();
  auto y2 = y2_t.data<scalar_t>();
  auto areas = areas_t.data<scalar_t>();
  for (int64_t _i = 0; _i < ndets; _i++) {
    auto i = order[_i];
    if (suppressed[i] == 1)
      continue;
    auto ix1 = x1[i];
    auto iy1 = y1[i];
    auto ix2 = x2[i];
    auto iy2 = y2[i];
    auto iarea = areas[i];
    for (int64_t _j = _i + 1; _j < ndets; _j++) {
      auto j = order[_j];
      if (suppressed[j] == 1)
        continue;
      auto xx1 = std::max(ix1, x1[j]);
      auto yy1 = std::max(iy1, y1[j]);
      auto xx2 = std::min(ix2, x2[j]);
      auto yy2 = std::min(iy2, y2[j]);
      auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
      auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
      auto inter = w * h;
      auto ovr = inter / (iarea + areas[j] - inter);
      if (ovr >= threshold)
        suppressed[j] = 1;
   }
  }
  return at::nonzero(suppressed_t == 0).squeeze(1);
 }
 at::Tensor nms_cpu(const at::Tensor& dets,
               const at::Tensor& scores,
               const float threshold) {
  at::Tensor result;
  AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
    result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
  });
  return result;
 }
--- a/utils/nms/nms_kernel.cu
+++ b/utils/nms/nms_kernel.cu
@ -0,0 +1,131 @@
 #include <ATen/ATen.h>
 #include <ATen/cuda/CUDAContext.h>
 #include <THC/THC.h>
 #include <THC/THCDeviceUtils.cuh>
 #include <vector>
 #include <iostream>
 int const threadsPerBlock = sizeof(unsigned long long) * 8;
 __device__ inline float devIoU(float const * const a, float const * const b) {
  float left = max(a[0], b[0]), right = min(a[2], b[2]);
  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
  float interS = width * height;
  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
  return interS / (Sa + Sb - interS);
 }
 __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
                           const float *dev_boxes, unsigned long long *dev_mask) {
  const int row_start = blockIdx.y;
  const int col_start = blockIdx.x;
  // if (row_start > col_start) return;
  const int row_size =
        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
  const int col_size =
        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
  __shared__ float block_boxes[threadsPerBlock * 5];
  if (threadIdx.x < col_size) {
    block_boxes[threadIdx.x * 5 + 0] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
    block_boxes[threadIdx.x * 5 + 1] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
    block_boxes[threadIdx.x * 5 + 2] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
    block_boxes[threadIdx.x * 5 + 3] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
    block_boxes[threadIdx.x * 5 + 4] =
        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
  }
  __syncthreads();
  if (threadIdx.x < row_size) {
    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
    const float *cur_box = dev_boxes + cur_box_idx * 5;
    int i = 0;
    unsigned long long t = 0;
    int start = 0;
    if (row_start == col_start) {
      start = threadIdx.x + 1;
    }
    for (i = start; i < col_size; i++) {
      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
        t |= 1ULL << i;
      }
    }
    const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
    dev_mask[cur_box_idx * col_blocks + col_start] = t;
  }
 }
 // boxes is a N x 5 tensor
 at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
  using scalar_t = float;
  AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
  auto scores = boxes.select(1, 4);
  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
  auto boxes_sorted = boxes.index_select(0, order_t);
  int boxes_num = boxes.size(0);
  const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
  scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
  THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
  unsigned long long* mask_dev = NULL;
  //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
  //                      boxes_num * col_blocks * sizeof(unsigned long long)));
  mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
  dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
              THCCeilDiv(boxes_num, threadsPerBlock));
  dim3 threads(threadsPerBlock);
  nms_kernel<<<blocks, threads>>>(boxes_num,
                                  nms_overlap_thresh,
                                  boxes_dev,
                                  mask_dev);
  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
  THCudaCheck(cudaMemcpy(&mask_host[0],
                        mask_dev,
                        sizeof(unsigned long long) * boxes_num * col_blocks,
                        cudaMemcpyDeviceToHost));
  std::vector<unsigned long long> remv(col_blocks);
  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
  at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
  int64_t* keep_out = keep.data<int64_t>();
  int num_to_keep = 0;
  for (int i = 0; i < boxes_num; i++) {
    int nblock = i / threadsPerBlock;
    int inblock = i % threadsPerBlock;
    if (!(remv[nblock] & (1ULL << inblock))) {
      keep_out[num_to_keep++] = i;
      unsigned long long *p = &mask_host[0] + i * col_blocks;
      for (int j = nblock; j < col_blocks; j++) {
        remv[j] |= p[j];
      }
    }
  }
  THCudaFree(state, mask_dev);
  // TODO improve this part
  return std::get<0>(order_t.index({
                       keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
                         order_t.device(), keep.scalar_type())
                     }).sort(0, false));
 }
--- a/utils/utils.py
+++ b/utils/utils.py
@ -9,11 +9,8 @@ import matplotlib.pyplot as plt
 import numpy as np
 import torch
 import torch.nn.functional as F
-
+from torchvision.ops import nms
-import maskrcnn_benchmark.layers.nms as nms
+#import maskrcnn_benchmark.layers.nms as nms
 # Set printoptions
 torch.set_printoptions(linewidth=1320, precision=5, profile='long')
 np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5
 def mkdir_if_missing(d):
    if not osp.exists(d):
@ -424,12 +421,17 @@ def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.05, method=1):
            np.uint8(method))
    return keep
-def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
+def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method='standard'):
    """
    Removes detections with lower object confidence score than 'conf_thres'
    Non-Maximum Suppression to further filter detections.
    Returns detections with shape:
        (x1, y1, x2, y2, object_conf, class_score, class_pred)
    Args:
        prediction,
        conf_thres,
        nms_thres,
        method = 'standard', 'fast', 'soft_linear' or 'soft_gaussian'
    """
    output = [None for _ in range(len(prediction))]
@ -453,11 +455,18 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
        # Non-maximum suppression
-        if method == -1:
+        if method == 'standard':
            nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
-        else:
+        elif method == 'soft_linear':
            dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
-            nms_indices = soft_nms(dets, Nt=nms_thres, method=method)
+            nms_indices = soft_nms(dets, Nt=nms_thres, method=0)
        elif method == 'soft_gaussian':
            dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
            nms_indices = soft_nms(dets, Nt=nms_thres, method=1)
        elif method == 'fast':
            nms_indices = fast_nms(pred[:, :4], pred[:, 4], iou_thres=nms_thres, conf_thres=conf_thres)
        else:
            raise ValueError('Invalid NMS type!')
        det_max = pred[nms_indices]        
        if len(det_max) > 0:
@ -466,6 +475,87 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
    return output
 def fast_nms(boxes, scores, iou_thres:float=0.5, top_k:int=200, second_threshold:bool=False, conf_thres:float=0.5):
    '''
    Vectorized, approximated, fast NMS, adopted from YOLACT:
    https://github.com/dbolya/yolact/blob/master/layers/functions/detection.py
    The original version is for multi-class NMS, here we simplify the code for single-class NMS
    '''
    scores, idx = scores.sort(0, descending=True)
    idx = idx[:top_k].contiguous()
    scores = scores[:top_k]
    num_dets = idx.size()
    boxes = boxes[idx, :]
    iou = jaccard(boxes, boxes)
    iou.triu_(diagonal=1)
    iou_max, _ = iou.max(dim=0)
    keep = (iou_max <= iou_thres)
    if second_threshold:
        keep *= (scores > self.conf_thresh)
    return idx[keep]
@torch.jit.script
 def intersect(box_a, box_b):
    """ We resize both tensors to [A,B,2] without new malloc:
    [A,2] -> [A,1,2] -> [A,B,2]
    [B,2] -> [1,B,2] -> [A,B,2]
    Then we compute the area of intersect between box_a and box_b.
    Args:
      box_a: (tensor) bounding boxes, Shape: [n,A,4].
      box_b: (tensor) bounding boxes, Shape: [n,B,4].
    Return:
      (tensor) intersection area, Shape: [n,A,B].
    """
    n = box_a.size(0)
    A = box_a.size(1)
    B = box_b.size(1)
    max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2),
                       box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2))
    min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2),
                       box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2))
    inter = torch.clamp((max_xy - min_xy), min=0)
    return inter[:, :, :, 0] * inter[:, :, :, 1]
 def jaccard(box_a, box_b, iscrowd:bool=False):
    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
    is simply the intersection over union of two boxes.  Here we operate on
    ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
    E.g.:
        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
    Args:
        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
    Return:
        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
    """
    use_batch = True
    if box_a.dim() == 2:
        use_batch = False
        box_a = box_a[None, ...]
        box_b = box_b[None, ...]
    inter = intersect(box_a, box_b)
    area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) *
              (box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter)  # [A,B]
    area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) *
              (box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
    union = area_a + area_b - inter
    out = inter / area_a if iscrowd else inter / union
    return out if use_batch else out.squeeze(0)
 def return_torch_unique_index(u, uv):
    n = uv.shape[1]  # number of columns