replace maskrcnn-benchmark nms with torchvision nms

2020-01-09 22:48:17 +08:00 · 2020-01-09 22:48:17 +08:00 · be116014d6
parent 1cb8cee836
commit be116014d6
46 changed files with 112372 additions and 111016 deletions
--- a/cfg/ccmcpe.json
+++ b/cfg/ccmcpe.json
@ -7,7 +7,10 @@
        "citypersons":"./data/citypersons.train",
        "cuhksysu":"./data/cuhksysu.train",
        "prw":"./data/prw.train",
-        "eth":"./data/eth.train"
+        "eth":"./data/eth.train",
+        "03":"./data/mot16-03.test",
+        "01":"./data/mot16-01.test",
+        "14":"./data/mot16-14.test"
    },
    "test_emb":
    {
@ -17,7 +20,6 @@
    },
    "test":
    {
-        "mot19":"./data/mot19.train",
        "caltech":"./data/caltech.val",
        "citypersons":"./data/citypersons.val"
    }
--- a/cfg/yolov3_864x480.cfg
+++ b/cfg/yolov3_864x480.cfg
@ -0,0 +1,833 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=16
+subdivisions=1
+width=480
+height=864
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+# Downsample
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-3
+activation=linear
+
+######################
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=24
+activation=linear
+
+######### embedding ###########
+[route]
+layers = -3
+
+[convolutional]
+size=3
+stride=1
+pad=1
+filters=512
+activation=linear
+
+[route]
+layers = -3, -1
+###############################
+
+
+[yolo]
+mask = 8,9,10,11
+anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508   
+classes=1
+num=12
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+[route]
+layers = -7
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 61
+
+
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=24
+activation=linear
+
+######### embedding ###########
+[route]
+layers = -3
+
+[convolutional]
+size=3
+stride=1
+pad=1
+filters=512
+activation=linear
+
+[route]
+layers = -3, -1
+###############################
+
+[yolo]
+mask = 4,5,6,7 
+anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508   
+classes=1
+num=12
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+
+
+[route]
+layers = -7
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 36
+
+
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=24
+activation=linear
+
+
+
+######### embedding ###########
+[route]
+layers = -3
+
+[convolutional]
+size=3
+stride=1
+pad=1
+filters=512
+activation=linear
+
+[route]
+layers = -3, -1
+###############################
+
+[yolo]
+mask = 0,1,2,3
+anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508   
+classes=1
+num=12
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
--- a/data/mot16-01.test
+++ b/data/mot16-01.test
@ -0,0 +1,23 @@
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000041.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000201.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000221.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000061.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000021.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000261.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000241.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000001.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000421.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000401.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000381.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000181.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000441.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000161.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000321.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000301.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000141.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000101.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000341.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000361.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000121.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000281.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000081.jpg
--- a/data/mot16-03.test
+++ b/data/mot16-03.test
@ -0,0 +1,52 @@
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000391.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000811.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001471.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001021.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000061.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001261.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000021.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000871.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000631.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000241.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001411.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000781.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001201.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000001.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000451.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000661.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000421.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001441.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000211.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000991.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001051.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000181.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000601.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001381.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000011.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000841.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001231.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000031.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000271.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000931.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000301.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000751.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000481.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000511.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001291.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001141.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000091.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000361.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000121.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000571.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001321.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001111.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000151.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001081.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001351.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000901.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000331.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000721.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001171.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000961.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000541.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000691.jpg
--- a/data/mot16-14.test
+++ b/data/mot16-14.test
@ -0,0 +1,38 @@
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000201.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000041.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000581.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000221.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000061.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000021.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000241.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000261.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000001.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000661.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000421.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000401.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000381.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000641.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000601.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000181.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000441.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000461.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000621.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000161.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000321.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000301.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000481.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000141.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000681.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000101.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000341.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000361.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000121.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000741.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000501.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000281.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000521.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000721.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000561.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000541.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000081.jpg
+/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000701.jpg
--- a/models.py
+++ b/models.py
@ -10,7 +10,7 @@ import time
 import math

 batch_norm=SyncBN #nn.BatchNorm2d
-
+#batch_norm=nn.BatchNorm2d
 def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
@ -34,7 +34,13 @@ def create_modules(module_defs):
                                                        padding=pad,
                                                        bias=not bn))
            if bn:
-                modules.add_module('batch_norm_%d' % i, batch_norm(filters))
+                after_bn = batch_norm(filters)
+                modules.add_module('batch_norm_%d' % i, after_bn)
+                # BN is uniformly initialized by default in pytorch 1.0.1. 
+                # In pytorch>1.2.0, BN weights are initialized with constant 1,
+                # but we find with the uniform initialization the model converges faster.
+                nn.init.uniform_(after_bn.weight) 
+                nn.init.zeros_(after_bn.bias)
            if module_def['activation'] == 'leaky':
                modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))

--- a/setup.py
+++ b/setup.py
@ -0,0 +1,75 @@
+###################################################################
+# File Name: setup.py
+# Author: Zhongdao Wang
+# mail: wcd17@mails.tsinghua.edu.cn
+# Created Time: Thu 19 Dec 2019 07:29:02 PM CST
+###################################################################
+
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+
+import os
+import glob
+
+import torch
+from setuptools import find_packages
+from setuptools import setup
+from torch.utils.cpp_extension import CUDA_HOME
+from torch.utils.cpp_extension import CppExtension
+from torch.utils.cpp_extension import CUDAExtension
+
+
+
+def get_extensions():
+    this_dir = os.path.dirname(os.path.abspath(__file__))
+    extensions_dir = os.path.join(this_dir, "utils", "nms")
+
+    main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
+    source_cpu = glob.glob(os.path.join(extensions_dir,  "*.cpp"))
+    source_cuda = glob.glob(os.path.join(extensions_dir,  "*.cu"))
+
+    sources = main_file 
+    extension = CppExtension
+
+    extra_compile_args = {"cxx": []}
+    define_macros = []
+
+    #if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
+    if False:
+        extension = CUDAExtension
+        sources += source_cuda
+        define_macros += [("WITH_CUDA", None)]
+        extra_compile_args["nvcc"] = [
+            "-DCUDA_HAS_FP16=1",
+            "-D__CUDA_NO_HALF_OPERATORS__",
+            "-D__CUDA_NO_HALF_CONVERSIONS__",
+            "-D__CUDA_NO_HALF2_OPERATORS__",
+        ]
+
+    sources = [os.path.join(extensions_dir, s) for s in sources]
+
+    include_dirs = [extensions_dir]
+
+    ext_modules = [
+        extension(
+            "nms",
+            sources,
+            include_dirs=include_dirs,
+            define_macros=define_macros,
+            extra_compile_args=extra_compile_args,
+        )
+    ]
+
+    return ext_modules
+
+print(get_extensions())
+setup(
+    name="nms",
+    version="0.1",
+    author="fmassa",
+    url="https://github.com/facebookresearch/maskrcnn-benchmark",
+    description="GPU supported NMS",
+    ext_modules=get_extensions(),
+    cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
+)
--- a/track.py
+++ b/track.py
@ -147,13 +147,22 @@ if __name__ == '__main__':
    print(opt, end='\n\n')
 
    if not opt.test_mot16:
-        seqs_str = '''KITTI-13
-                      KITTI-17
-                      ADL-Rundle-6
-                      PETS09-S2L1
-                      TUD-Campus
-                      TUD-Stadtmitte'''
-        data_root = '/home/wangzd/datasets/MOT/MOT15/train'
+        #seqs_str = '''KITTI-13
+        #              KITTI-17
+        #              ADL-Rundle-6
+        #              PETS09-S2L1
+        #              TUD-Campus
+        #              TUD-Stadtmitte'''
+        #data_root = '/home/wangzd/datasets/MOT/MOT15/train'
+        seqs_str = '''MOT17-02-SDP
+                      MOT17-04-SDP
+                      MOT17-05-SDP
+                      MOT17-09-SDP
+                      MOT17-10-SDP
+                      MOT17-11-SDP
+                      MOT17-13-SDP
+                    '''
+        data_root = '/home/wangzd/datasets/MOT/MOT17/images/train'
    else:
        seqs_str = '''MOT16-01
                     MOT16-03
--- a/tracker/matching.py
+++ b/tracker/matching.py
@ -4,7 +4,7 @@ import scipy
 from scipy.spatial.distance import cdist
 from sklearn.utils import linear_assignment_

-from utils.cython_bbox import bbox_ious
+from cython_bbox import bbox_overlaps as bbox_ious
 from utils import kalman_filter
 import time

--- a/train.py
+++ b/train.py
@ -94,8 +94,6 @@ def train(
        logger.info(('%8s%12s' + '%10s' * 6) % (
            'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time'))

-        # Update scheduler (automatic)
-        scheduler.step()

        
        # Freeze darknet53.conv.74 for first epoch
@ -146,7 +144,6 @@ def train(
            if i % opt.print_interval == 0:
                logger.info(s)
        
-
        # Save latest checkpoint
        checkpoint = {'epoch': epoch,
                      'model': model.module.state_dict(),
@ -161,6 +158,8 @@ def train(
                test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)


+        # Call scheduler.step() after opimizer.step() with pytorch > 1.1.0 
+        scheduler.step()

 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
--- a/utils/_C.cpython-36m-x86_64-linux-gnu.so
+++ b/utils/_C.cpython-36m-x86_64-linux-gnu.so
--- a/utils/cython_bbox.cpython-36m-x86_64-linux-gnu.so
+++ b/utils/cython_bbox.cpython-36m-x86_64-linux-gnu.so
--- a/utils/datasets.py
+++ b/utils/datasets.py
@ -194,6 +194,7 @@ class LoadImagesAndLabels:  # for training
        if self.augment:
            img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20))

+    
        plotFlag = False
        if plotFlag:
            import matplotlib
@ -392,12 +393,10 @@ class JointDataset(LoadImagesAndLabels):  # for training
        

    def __getitem__(self, files_index):
-
        for i, c in enumerate(self.cds):
            if files_index >= c: 
                ds = list(self.label_files.keys())[i]
                start_index = c
-
        img_path = self.img_files[ds][files_index - start_index]
        label_path = self.label_files[ds][files_index - start_index]
        
--- a/utils/nms.py
+++ b/utils/nms.py
@ -1,7 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
-# from ._utils import _C
-from utils import _C
-
-nms = _C.nms
-# nms.__doc__ = """
-# This function performs Non-maximum suppresion"""
--- a/utils/nms/nms.h
+++ b/utils/nms/nms.h
@ -0,0 +1,32 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#pragma once
+#include <torch/extension.h>
+at::Tensor nms_cpu(const at::Tensor& dets, const at::Tensor& scores, const float threshold);
+#ifdef WITH_CUDA
+at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
+#endif
+
+
+at::Tensor nms(const at::Tensor& dets,
+               const at::Tensor& scores,
+               const float threshold) {
+
+  if (dets.type().is_cuda()) {
+#ifdef WITH_CUDA
+    // TODO raise error if not compiled with CUDA
+    if (dets.numel() == 0)
+      return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
+    auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
+    return nms_cuda(b, threshold);
+#else
+    AT_ERROR("Not compiled with GPU support");
+#endif
+  }
+
+  at::Tensor result = nms_cpu(dets, scores, threshold);
+  return result;
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){
+    m.def("nms", &nms, "non-maximum suppression");
+}
--- a/utils/nms/nms_cpu.cpp
+++ b/utils/nms/nms_cpu.cpp
@ -0,0 +1,74 @@
+// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+#include "nms.h"
+template <typename scalar_t>
+at::Tensor nms_cpu_kernel(const at::Tensor& dets,
+                          const at::Tensor& scores,
+                          const float threshold) {
+  AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
+  AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
+  AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
+
+  if (dets.numel() == 0) {
+    return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
+  }
+
+  auto x1_t = dets.select(1, 0).contiguous();
+  auto y1_t = dets.select(1, 1).contiguous();
+  auto x2_t = dets.select(1, 2).contiguous();
+  auto y2_t = dets.select(1, 3).contiguous();
+
+  at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
+
+  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
+
+  auto ndets = dets.size(0);
+  at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
+
+  auto suppressed = suppressed_t.data<uint8_t>();
+  auto order = order_t.data<int64_t>();
+  auto x1 = x1_t.data<scalar_t>();
+  auto y1 = y1_t.data<scalar_t>();
+  auto x2 = x2_t.data<scalar_t>();
+  auto y2 = y2_t.data<scalar_t>();
+  auto areas = areas_t.data<scalar_t>();
+
+  for (int64_t _i = 0; _i < ndets; _i++) {
+    auto i = order[_i];
+    if (suppressed[i] == 1)
+      continue;
+    auto ix1 = x1[i];
+    auto iy1 = y1[i];
+    auto ix2 = x2[i];
+    auto iy2 = y2[i];
+    auto iarea = areas[i];
+
+    for (int64_t _j = _i + 1; _j < ndets; _j++) {
+      auto j = order[_j];
+      if (suppressed[j] == 1)
+        continue;
+      auto xx1 = std::max(ix1, x1[j]);
+      auto yy1 = std::max(iy1, y1[j]);
+      auto xx2 = std::min(ix2, x2[j]);
+      auto yy2 = std::min(iy2, y2[j]);
+
+      auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
+      auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
+      auto inter = w * h;
+      auto ovr = inter / (iarea + areas[j] - inter);
+      if (ovr >= threshold)
+        suppressed[j] = 1;
+   }
+  }
+  return at::nonzero(suppressed_t == 0).squeeze(1);
+}
+
+at::Tensor nms_cpu(const at::Tensor& dets,
+               const at::Tensor& scores,
+               const float threshold) {
+  at::Tensor result;
+  AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
+    result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
+  });
+  return result;
+}
+
--- a/utils/nms/nms_kernel.cu
+++ b/utils/nms/nms_kernel.cu
@ -0,0 +1,131 @@
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+
+#include <THC/THC.h>
+#include <THC/THCDeviceUtils.cuh>
+
+#include <vector>
+#include <iostream>
+
+
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+
+__device__ inline float devIoU(float const * const a, float const * const b) {
+  float left = max(a[0], b[0]), right = min(a[2], b[2]);
+  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
+  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
+  float interS = width * height;
+  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
+  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
+  return interS / (Sa + Sb - interS);
+}
+
+__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
+                           const float *dev_boxes, unsigned long long *dev_mask) {
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  // if (row_start > col_start) return;
+
+  const int row_size =
+        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+  const int col_size =
+        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+  __shared__ float block_boxes[threadsPerBlock * 5];
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * 5 + 0] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
+    block_boxes[threadIdx.x * 5 + 1] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
+    block_boxes[threadIdx.x * 5 + 2] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
+    block_boxes[threadIdx.x * 5 + 3] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
+    block_boxes[threadIdx.x * 5 + 4] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+    const float *cur_box = dev_boxes + cur_box_idx * 5;
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
+    dev_mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+// boxes is a N x 5 tensor
+at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
+  using scalar_t = float;
+  AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
+  auto scores = boxes.select(1, 4);
+  auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
+  auto boxes_sorted = boxes.index_select(0, order_t);
+
+  int boxes_num = boxes.size(0);
+
+  const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
+
+  scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
+
+  THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
+
+  unsigned long long* mask_dev = NULL;
+  //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
+  //                      boxes_num * col_blocks * sizeof(unsigned long long)));
+
+  mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
+
+  dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
+              THCCeilDiv(boxes_num, threadsPerBlock));
+  dim3 threads(threadsPerBlock);
+  nms_kernel<<<blocks, threads>>>(boxes_num,
+                                  nms_overlap_thresh,
+                                  boxes_dev,
+                                  mask_dev);
+
+  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
+  THCudaCheck(cudaMemcpy(&mask_host[0],
+                        mask_dev,
+                        sizeof(unsigned long long) * boxes_num * col_blocks,
+                        cudaMemcpyDeviceToHost));
+
+  std::vector<unsigned long long> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+
+  at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
+  int64_t* keep_out = keep.data<int64_t>();
+
+  int num_to_keep = 0;
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / threadsPerBlock;
+    int inblock = i % threadsPerBlock;
+
+    if (!(remv[nblock] & (1ULL << inblock))) {
+      keep_out[num_to_keep++] = i;
+      unsigned long long *p = &mask_host[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv[j] |= p[j];
+      }
+    }
+  }
+
+  THCudaFree(state, mask_dev);
+  // TODO improve this part
+  return std::get<0>(order_t.index({
+                       keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
+                         order_t.device(), keep.scalar_type())
+                     }).sort(0, false));
+}
--- a/utils/utils.py
+++ b/utils/utils.py
@ -9,11 +9,8 @@ import matplotlib.pyplot as plt
 import numpy as np
 import torch
 import torch.nn.functional as F
-
-import maskrcnn_benchmark.layers.nms as nms
-# Set printoptions
-torch.set_printoptions(linewidth=1320, precision=5, profile='long')
-np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5
+from torchvision.ops import nms
+#import maskrcnn_benchmark.layers.nms as nms

 def mkdir_if_missing(d):
    if not osp.exists(d):
@ -424,12 +421,17 @@ def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.05, method=1):
            np.uint8(method))
    return keep

-def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
+def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method='standard'):
    """
    Removes detections with lower object confidence score than 'conf_thres'
    Non-Maximum Suppression to further filter detections.
    Returns detections with shape:
        (x1, y1, x2, y2, object_conf, class_score, class_pred)
+    Args:
+        prediction,
+        conf_thres,
+        nms_thres,
+        method = 'standard', 'fast', 'soft_linear' or 'soft_gaussian'
    """

    output = [None for _ in range(len(prediction))]
@ -453,11 +455,18 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):

        
        # Non-maximum suppression
-        if method == -1:
+        if method == 'standard':
            nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
-        else:
+        elif method == 'soft_linear':
            dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
-            nms_indices = soft_nms(dets, Nt=nms_thres, method=method)
+            nms_indices = soft_nms(dets, Nt=nms_thres, method=0)
+        elif method == 'soft_gaussian':
+            dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
+            nms_indices = soft_nms(dets, Nt=nms_thres, method=1)
+        elif method == 'fast':
+            nms_indices = fast_nms(pred[:, :4], pred[:, 4], iou_thres=nms_thres, conf_thres=conf_thres)
+        else:
+            raise ValueError('Invalid NMS type!')
        det_max = pred[nms_indices]        

        if len(det_max) > 0:
@ -466,6 +475,87 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):

    return output

+def fast_nms(boxes, scores, iou_thres:float=0.5, top_k:int=200, second_threshold:bool=False, conf_thres:float=0.5):
+    '''
+    Vectorized, approximated, fast NMS, adopted from YOLACT:
+    https://github.com/dbolya/yolact/blob/master/layers/functions/detection.py
+    The original version is for multi-class NMS, here we simplify the code for single-class NMS
+    '''
+    scores, idx = scores.sort(0, descending=True)
+    
+    idx = idx[:top_k].contiguous()
+    scores = scores[:top_k]
+    num_dets = idx.size()
+
+    boxes = boxes[idx, :]
+
+    iou = jaccard(boxes, boxes)
+    iou.triu_(diagonal=1)
+    iou_max, _ = iou.max(dim=0)
+
+    keep = (iou_max <= iou_thres)
+
+    if second_threshold:
+        keep *= (scores > self.conf_thresh)
+
+    return idx[keep]
+
+
+
+@torch.jit.script
+def intersect(box_a, box_b):
+    """ We resize both tensors to [A,B,2] without new malloc:
+    [A,2] -> [A,1,2] -> [A,B,2]
+    [B,2] -> [1,B,2] -> [A,B,2]
+    Then we compute the area of intersect between box_a and box_b.
+    Args:
+      box_a: (tensor) bounding boxes, Shape: [n,A,4].
+      box_b: (tensor) bounding boxes, Shape: [n,B,4].
+    Return:
+      (tensor) intersection area, Shape: [n,A,B].
+    """
+    n = box_a.size(0)
+    A = box_a.size(1)
+    B = box_b.size(1)
+    max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2),
+                       box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2))
+    min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2),
+                       box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2))
+    inter = torch.clamp((max_xy - min_xy), min=0)
+    return inter[:, :, :, 0] * inter[:, :, :, 1]
+
+
+
+def jaccard(box_a, box_b, iscrowd:bool=False):
+    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
+    is simply the intersection over union of two boxes.  Here we operate on
+    ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
+    E.g.:
+        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
+    Args:
+        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
+        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
+    Return:
+        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
+    """
+    use_batch = True
+    if box_a.dim() == 2:
+        use_batch = False
+        box_a = box_a[None, ...]
+        box_b = box_b[None, ...]
+
+    inter = intersect(box_a, box_b)
+    area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) *
+              (box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter)  # [A,B]
+    area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) *
+              (box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
+    union = area_a + area_b - inter
+
+    out = inter / area_a if iscrowd else inter / union
+    return out if use_batch else out.squeeze(0)
+
+
+

 def return_torch_unique_index(u, uv):
    n = uv.shape[1]  # number of columns