replace maskrcnn-benchmark nms with torchvision nms

This commit is contained in:
Zhongdao 2020-01-09 22:48:17 +08:00
parent 1cb8cee836
commit be116014d6
46 changed files with 112372 additions and 111016 deletions

View file

@ -7,7 +7,10 @@
"citypersons":"./data/citypersons.train", "citypersons":"./data/citypersons.train",
"cuhksysu":"./data/cuhksysu.train", "cuhksysu":"./data/cuhksysu.train",
"prw":"./data/prw.train", "prw":"./data/prw.train",
"eth":"./data/eth.train" "eth":"./data/eth.train",
"03":"./data/mot16-03.test",
"01":"./data/mot16-01.test",
"14":"./data/mot16-14.test"
}, },
"test_emb": "test_emb":
{ {
@ -17,7 +20,6 @@
}, },
"test": "test":
{ {
"mot19":"./data/mot19.train",
"caltech":"./data/caltech.val", "caltech":"./data/caltech.val",
"citypersons":"./data/citypersons.val" "citypersons":"./data/citypersons.val"
} }

833
cfg/yolov3_864x480.cfg Normal file
View file

@ -0,0 +1,833 @@
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=16
subdivisions=1
width=480
height=864
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# Downsample
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
######################
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=linear
######### embedding ###########
[route]
layers = -3
[convolutional]
size=3
stride=1
pad=1
filters=512
activation=linear
[route]
layers = -3, -1
###############################
[yolo]
mask = 8,9,10,11
anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508
classes=1
num=12
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -7
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 61
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=linear
######### embedding ###########
[route]
layers = -3
[convolutional]
size=3
stride=1
pad=1
filters=512
activation=linear
[route]
layers = -3, -1
###############################
[yolo]
mask = 4,5,6,7
anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508
classes=1
num=12
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -7
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 36
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=linear
######### embedding ###########
[route]
layers = -3
[convolutional]
size=3
stride=1
pad=1
filters=512
activation=linear
[route]
layers = -3, -1
###############################
[yolo]
mask = 0,1,2,3
anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508
classes=1
num=12
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1

23
data/mot16-01.test Normal file
View file

@ -0,0 +1,23 @@
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000041.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000201.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000221.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000061.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000021.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000261.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000241.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000001.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000421.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000401.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000381.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000181.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000441.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000161.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000321.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000301.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000141.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000101.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000341.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000361.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000121.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000281.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000081.jpg

52
data/mot16-03.test Normal file
View file

@ -0,0 +1,52 @@
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000391.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000811.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001471.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001021.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000061.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001261.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000021.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000871.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000631.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000241.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001411.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000781.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001201.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000001.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000451.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000661.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000421.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001441.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000211.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000991.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001051.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000181.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000601.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001381.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000011.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000841.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001231.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000031.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000271.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000931.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000301.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000751.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000481.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000511.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001291.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001141.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000091.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000361.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000121.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000571.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001321.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001111.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000151.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001081.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001351.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000901.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000331.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000721.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001171.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000961.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000541.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000691.jpg

38
data/mot16-14.test Normal file
View file

@ -0,0 +1,38 @@
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000201.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000041.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000581.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000221.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000061.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000021.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000241.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000261.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000001.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000661.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000421.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000401.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000381.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000641.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000601.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000181.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000441.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000461.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000621.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000161.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000321.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000301.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000481.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000141.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000681.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000101.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000341.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000361.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000121.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000741.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000501.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000281.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000521.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000721.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000561.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000541.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000081.jpg
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000701.jpg

View file

@ -10,7 +10,7 @@ import time
import math import math
batch_norm=SyncBN #nn.BatchNorm2d batch_norm=SyncBN #nn.BatchNorm2d
#batch_norm=nn.BatchNorm2d
def create_modules(module_defs): def create_modules(module_defs):
""" """
Constructs module list of layer blocks from module configuration in module_defs Constructs module list of layer blocks from module configuration in module_defs
@ -34,7 +34,13 @@ def create_modules(module_defs):
padding=pad, padding=pad,
bias=not bn)) bias=not bn))
if bn: if bn:
modules.add_module('batch_norm_%d' % i, batch_norm(filters)) after_bn = batch_norm(filters)
modules.add_module('batch_norm_%d' % i, after_bn)
# BN is uniformly initialized by default in pytorch 1.0.1.
# In pytorch>1.2.0, BN weights are initialized with constant 1,
# but we find with the uniform initialization the model converges faster.
nn.init.uniform_(after_bn.weight)
nn.init.zeros_(after_bn.bias)
if module_def['activation'] == 'leaky': if module_def['activation'] == 'leaky':
modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1)) modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))

75
setup.py Normal file
View file

@ -0,0 +1,75 @@
###################################################################
# File Name: setup.py
# Author: Zhongdao Wang
# mail: wcd17@mails.tsinghua.edu.cn
# Created Time: Thu 19 Dec 2019 07:29:02 PM CST
###################################################################
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import os
import glob
import torch
from setuptools import find_packages
from setuptools import setup
from torch.utils.cpp_extension import CUDA_HOME
from torch.utils.cpp_extension import CppExtension
from torch.utils.cpp_extension import CUDAExtension
def get_extensions():
this_dir = os.path.dirname(os.path.abspath(__file__))
extensions_dir = os.path.join(this_dir, "utils", "nms")
main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
source_cpu = glob.glob(os.path.join(extensions_dir, "*.cpp"))
source_cuda = glob.glob(os.path.join(extensions_dir, "*.cu"))
sources = main_file
extension = CppExtension
extra_compile_args = {"cxx": []}
define_macros = []
#if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
if False:
extension = CUDAExtension
sources += source_cuda
define_macros += [("WITH_CUDA", None)]
extra_compile_args["nvcc"] = [
"-DCUDA_HAS_FP16=1",
"-D__CUDA_NO_HALF_OPERATORS__",
"-D__CUDA_NO_HALF_CONVERSIONS__",
"-D__CUDA_NO_HALF2_OPERATORS__",
]
sources = [os.path.join(extensions_dir, s) for s in sources]
include_dirs = [extensions_dir]
ext_modules = [
extension(
"nms",
sources,
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
)
]
return ext_modules
print(get_extensions())
setup(
name="nms",
version="0.1",
author="fmassa",
url="https://github.com/facebookresearch/maskrcnn-benchmark",
description="GPU supported NMS",
ext_modules=get_extensions(),
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
)

View file

@ -147,13 +147,22 @@ if __name__ == '__main__':
print(opt, end='\n\n') print(opt, end='\n\n')
if not opt.test_mot16: if not opt.test_mot16:
seqs_str = '''KITTI-13 #seqs_str = '''KITTI-13
KITTI-17 # KITTI-17
ADL-Rundle-6 # ADL-Rundle-6
PETS09-S2L1 # PETS09-S2L1
TUD-Campus # TUD-Campus
TUD-Stadtmitte''' # TUD-Stadtmitte'''
data_root = '/home/wangzd/datasets/MOT/MOT15/train' #data_root = '/home/wangzd/datasets/MOT/MOT15/train'
seqs_str = '''MOT17-02-SDP
MOT17-04-SDP
MOT17-05-SDP
MOT17-09-SDP
MOT17-10-SDP
MOT17-11-SDP
MOT17-13-SDP
'''
data_root = '/home/wangzd/datasets/MOT/MOT17/images/train'
else: else:
seqs_str = '''MOT16-01 seqs_str = '''MOT16-01
MOT16-03 MOT16-03

View file

@ -4,7 +4,7 @@ import scipy
from scipy.spatial.distance import cdist from scipy.spatial.distance import cdist
from sklearn.utils import linear_assignment_ from sklearn.utils import linear_assignment_
from utils.cython_bbox import bbox_ious from cython_bbox import bbox_overlaps as bbox_ious
from utils import kalman_filter from utils import kalman_filter
import time import time

View file

@ -94,8 +94,6 @@ def train(
logger.info(('%8s%12s' + '%10s' * 6) % ( logger.info(('%8s%12s' + '%10s' * 6) % (
'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time')) 'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time'))
# Update scheduler (automatic)
scheduler.step()
# Freeze darknet53.conv.74 for first epoch # Freeze darknet53.conv.74 for first epoch
@ -146,7 +144,6 @@ def train(
if i % opt.print_interval == 0: if i % opt.print_interval == 0:
logger.info(s) logger.info(s)
# Save latest checkpoint # Save latest checkpoint
checkpoint = {'epoch': epoch, checkpoint = {'epoch': epoch,
'model': model.module.state_dict(), 'model': model.module.state_dict(),
@ -161,6 +158,8 @@ def train(
test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID) test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
# Call scheduler.step() after opimizer.step() with pytorch > 1.1.0
scheduler.step()
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()

View file

@ -194,6 +194,7 @@ class LoadImagesAndLabels: # for training
if self.augment: if self.augment:
img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20)) img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20))
plotFlag = False plotFlag = False
if plotFlag: if plotFlag:
import matplotlib import matplotlib
@ -392,12 +393,10 @@ class JointDataset(LoadImagesAndLabels): # for training
def __getitem__(self, files_index): def __getitem__(self, files_index):
for i, c in enumerate(self.cds): for i, c in enumerate(self.cds):
if files_index >= c: if files_index >= c:
ds = list(self.label_files.keys())[i] ds = list(self.label_files.keys())[i]
start_index = c start_index = c
img_path = self.img_files[ds][files_index - start_index] img_path = self.img_files[ds][files_index - start_index]
label_path = self.label_files[ds][files_index - start_index] label_path = self.label_files[ds][files_index - start_index]

View file

@ -1,7 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# from ._utils import _C
from utils import _C
nms = _C.nms
# nms.__doc__ = """
# This function performs Non-maximum suppresion"""

32
utils/nms/nms.h Normal file
View file

@ -0,0 +1,32 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#pragma once
#include <torch/extension.h>
at::Tensor nms_cpu(const at::Tensor& dets, const at::Tensor& scores, const float threshold);
#ifdef WITH_CUDA
at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
#endif
at::Tensor nms(const at::Tensor& dets,
const at::Tensor& scores,
const float threshold) {
if (dets.type().is_cuda()) {
#ifdef WITH_CUDA
// TODO raise error if not compiled with CUDA
if (dets.numel() == 0)
return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
return nms_cuda(b, threshold);
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
at::Tensor result = nms_cpu(dets, scores, threshold);
return result;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){
m.def("nms", &nms, "non-maximum suppression");
}

74
utils/nms/nms_cpu.cpp Normal file
View file

@ -0,0 +1,74 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#include "nms.h"
template <typename scalar_t>
at::Tensor nms_cpu_kernel(const at::Tensor& dets,
const at::Tensor& scores,
const float threshold) {
AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
if (dets.numel() == 0) {
return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
}
auto x1_t = dets.select(1, 0).contiguous();
auto y1_t = dets.select(1, 1).contiguous();
auto x2_t = dets.select(1, 2).contiguous();
auto y2_t = dets.select(1, 3).contiguous();
at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
auto ndets = dets.size(0);
at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
auto suppressed = suppressed_t.data<uint8_t>();
auto order = order_t.data<int64_t>();
auto x1 = x1_t.data<scalar_t>();
auto y1 = y1_t.data<scalar_t>();
auto x2 = x2_t.data<scalar_t>();
auto y2 = y2_t.data<scalar_t>();
auto areas = areas_t.data<scalar_t>();
for (int64_t _i = 0; _i < ndets; _i++) {
auto i = order[_i];
if (suppressed[i] == 1)
continue;
auto ix1 = x1[i];
auto iy1 = y1[i];
auto ix2 = x2[i];
auto iy2 = y2[i];
auto iarea = areas[i];
for (int64_t _j = _i + 1; _j < ndets; _j++) {
auto j = order[_j];
if (suppressed[j] == 1)
continue;
auto xx1 = std::max(ix1, x1[j]);
auto yy1 = std::max(iy1, y1[j]);
auto xx2 = std::min(ix2, x2[j]);
auto yy2 = std::min(iy2, y2[j]);
auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
auto inter = w * h;
auto ovr = inter / (iarea + areas[j] - inter);
if (ovr >= threshold)
suppressed[j] = 1;
}
}
return at::nonzero(suppressed_t == 0).squeeze(1);
}
at::Tensor nms_cpu(const at::Tensor& dets,
const at::Tensor& scores,
const float threshold) {
at::Tensor result;
AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
});
return result;
}

131
utils/nms/nms_kernel.cu Normal file
View file

@ -0,0 +1,131 @@
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <THC/THC.h>
#include <THC/THCDeviceUtils.cuh>
#include <vector>
#include <iostream>
int const threadsPerBlock = sizeof(unsigned long long) * 8;
__device__ inline float devIoU(float const * const a, float const * const b) {
float left = max(a[0], b[0]), right = min(a[2], b[2]);
float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
float interS = width * height;
float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
return interS / (Sa + Sb - interS);
}
__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
const float *dev_boxes, unsigned long long *dev_mask) {
const int row_start = blockIdx.y;
const int col_start = blockIdx.x;
// if (row_start > col_start) return;
const int row_size =
min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
const int col_size =
min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
__shared__ float block_boxes[threadsPerBlock * 5];
if (threadIdx.x < col_size) {
block_boxes[threadIdx.x * 5 + 0] =
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
block_boxes[threadIdx.x * 5 + 1] =
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
block_boxes[threadIdx.x * 5 + 2] =
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
block_boxes[threadIdx.x * 5 + 3] =
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
block_boxes[threadIdx.x * 5 + 4] =
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
}
__syncthreads();
if (threadIdx.x < row_size) {
const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
const float *cur_box = dev_boxes + cur_box_idx * 5;
int i = 0;
unsigned long long t = 0;
int start = 0;
if (row_start == col_start) {
start = threadIdx.x + 1;
}
for (i = start; i < col_size; i++) {
if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
t |= 1ULL << i;
}
}
const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
dev_mask[cur_box_idx * col_blocks + col_start] = t;
}
}
// boxes is a N x 5 tensor
at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
using scalar_t = float;
AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
auto scores = boxes.select(1, 4);
auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
auto boxes_sorted = boxes.index_select(0, order_t);
int boxes_num = boxes.size(0);
const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
unsigned long long* mask_dev = NULL;
//THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
// boxes_num * col_blocks * sizeof(unsigned long long)));
mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
THCCeilDiv(boxes_num, threadsPerBlock));
dim3 threads(threadsPerBlock);
nms_kernel<<<blocks, threads>>>(boxes_num,
nms_overlap_thresh,
boxes_dev,
mask_dev);
std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
THCudaCheck(cudaMemcpy(&mask_host[0],
mask_dev,
sizeof(unsigned long long) * boxes_num * col_blocks,
cudaMemcpyDeviceToHost));
std::vector<unsigned long long> remv(col_blocks);
memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
int64_t* keep_out = keep.data<int64_t>();
int num_to_keep = 0;
for (int i = 0; i < boxes_num; i++) {
int nblock = i / threadsPerBlock;
int inblock = i % threadsPerBlock;
if (!(remv[nblock] & (1ULL << inblock))) {
keep_out[num_to_keep++] = i;
unsigned long long *p = &mask_host[0] + i * col_blocks;
for (int j = nblock; j < col_blocks; j++) {
remv[j] |= p[j];
}
}
}
THCudaFree(state, mask_dev);
// TODO improve this part
return std::get<0>(order_t.index({
keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
order_t.device(), keep.scalar_type())
}).sort(0, false));
}

View file

@ -9,11 +9,8 @@ import matplotlib.pyplot as plt
import numpy as np import numpy as np
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from torchvision.ops import nms
import maskrcnn_benchmark.layers.nms as nms #import maskrcnn_benchmark.layers.nms as nms
# Set printoptions
torch.set_printoptions(linewidth=1320, precision=5, profile='long')
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
def mkdir_if_missing(d): def mkdir_if_missing(d):
if not osp.exists(d): if not osp.exists(d):
@ -424,12 +421,17 @@ def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.05, method=1):
np.uint8(method)) np.uint8(method))
return keep return keep
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1): def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method='standard'):
""" """
Removes detections with lower object confidence score than 'conf_thres' Removes detections with lower object confidence score than 'conf_thres'
Non-Maximum Suppression to further filter detections. Non-Maximum Suppression to further filter detections.
Returns detections with shape: Returns detections with shape:
(x1, y1, x2, y2, object_conf, class_score, class_pred) (x1, y1, x2, y2, object_conf, class_score, class_pred)
Args:
prediction,
conf_thres,
nms_thres,
method = 'standard', 'fast', 'soft_linear' or 'soft_gaussian'
""" """
output = [None for _ in range(len(prediction))] output = [None for _ in range(len(prediction))]
@ -453,11 +455,18 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
# Non-maximum suppression # Non-maximum suppression
if method == -1: if method == 'standard':
nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres) nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
else: elif method == 'soft_linear':
dets = pred[:, :5].clone().contiguous().data.cpu().numpy() dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
nms_indices = soft_nms(dets, Nt=nms_thres, method=method) nms_indices = soft_nms(dets, Nt=nms_thres, method=0)
elif method == 'soft_gaussian':
dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
nms_indices = soft_nms(dets, Nt=nms_thres, method=1)
elif method == 'fast':
nms_indices = fast_nms(pred[:, :4], pred[:, 4], iou_thres=nms_thres, conf_thres=conf_thres)
else:
raise ValueError('Invalid NMS type!')
det_max = pred[nms_indices] det_max = pred[nms_indices]
if len(det_max) > 0: if len(det_max) > 0:
@ -466,6 +475,87 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
return output return output
def fast_nms(boxes, scores, iou_thres:float=0.5, top_k:int=200, second_threshold:bool=False, conf_thres:float=0.5):
'''
Vectorized, approximated, fast NMS, adopted from YOLACT:
https://github.com/dbolya/yolact/blob/master/layers/functions/detection.py
The original version is for multi-class NMS, here we simplify the code for single-class NMS
'''
scores, idx = scores.sort(0, descending=True)
idx = idx[:top_k].contiguous()
scores = scores[:top_k]
num_dets = idx.size()
boxes = boxes[idx, :]
iou = jaccard(boxes, boxes)
iou.triu_(diagonal=1)
iou_max, _ = iou.max(dim=0)
keep = (iou_max <= iou_thres)
if second_threshold:
keep *= (scores > self.conf_thresh)
return idx[keep]
@torch.jit.script
def intersect(box_a, box_b):
""" We resize both tensors to [A,B,2] without new malloc:
[A,2] -> [A,1,2] -> [A,B,2]
[B,2] -> [1,B,2] -> [A,B,2]
Then we compute the area of intersect between box_a and box_b.
Args:
box_a: (tensor) bounding boxes, Shape: [n,A,4].
box_b: (tensor) bounding boxes, Shape: [n,B,4].
Return:
(tensor) intersection area, Shape: [n,A,B].
"""
n = box_a.size(0)
A = box_a.size(1)
B = box_b.size(1)
max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2),
box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2))
min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2),
box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
return inter[:, :, :, 0] * inter[:, :, :, 1]
def jaccard(box_a, box_b, iscrowd:bool=False):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes. Here we operate on
ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
E.g.:
A B / A B = A B / (area(A) + area(B) - A B)
Args:
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
Return:
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
"""
use_batch = True
if box_a.dim() == 2:
use_batch = False
box_a = box_a[None, ...]
box_b = box_b[None, ...]
inter = intersect(box_a, box_b)
area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) *
(box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter) # [A,B]
area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) *
(box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter) # [A,B]
union = area_a + area_b - inter
out = inter / area_a if iscrowd else inter / union
return out if use_batch else out.squeeze(0)
def return_torch_unique_index(u, uv): def return_torch_unique_index(u, uv):
n = uv.shape[1] # number of columns n = uv.shape[1] # number of columns