replace maskrcnn-benchmark nms with torchvision nms
This commit is contained in:
parent
1cb8cee836
commit
be116014d6
46 changed files with 112372 additions and 111016 deletions
|
@ -7,7 +7,10 @@
|
|||
"citypersons":"./data/citypersons.train",
|
||||
"cuhksysu":"./data/cuhksysu.train",
|
||||
"prw":"./data/prw.train",
|
||||
"eth":"./data/eth.train"
|
||||
"eth":"./data/eth.train",
|
||||
"03":"./data/mot16-03.test",
|
||||
"01":"./data/mot16-01.test",
|
||||
"14":"./data/mot16-14.test"
|
||||
},
|
||||
"test_emb":
|
||||
{
|
||||
|
@ -17,7 +20,6 @@
|
|||
},
|
||||
"test":
|
||||
{
|
||||
"mot19":"./data/mot19.train",
|
||||
"caltech":"./data/caltech.val",
|
||||
"citypersons":"./data/citypersons.val"
|
||||
}
|
||||
|
|
833
cfg/yolov3_864x480.cfg
Normal file
833
cfg/yolov3_864x480.cfg
Normal file
|
@ -0,0 +1,833 @@
|
|||
[net]
|
||||
# Testing
|
||||
#batch=1
|
||||
#subdivisions=1
|
||||
# Training
|
||||
batch=16
|
||||
subdivisions=1
|
||||
width=480
|
||||
height=864
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
burn_in=1000
|
||||
max_batches = 500200
|
||||
policy=steps
|
||||
steps=400000,450000
|
||||
scales=.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
# Downsample
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=2
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[shortcut]
|
||||
from=-3
|
||||
activation=linear
|
||||
|
||||
######################
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=24
|
||||
activation=linear
|
||||
|
||||
######### embedding ###########
|
||||
[route]
|
||||
layers = -3
|
||||
|
||||
[convolutional]
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
layers = -3, -1
|
||||
###############################
|
||||
|
||||
|
||||
[yolo]
|
||||
mask = 8,9,10,11
|
||||
anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508
|
||||
classes=1
|
||||
num=12
|
||||
jitter=.3
|
||||
ignore_thresh = .7
|
||||
truth_thresh = 1
|
||||
random=1
|
||||
|
||||
|
||||
[route]
|
||||
layers = -7
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[upsample]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers = -1, 61
|
||||
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=24
|
||||
activation=linear
|
||||
|
||||
######### embedding ###########
|
||||
[route]
|
||||
layers = -3
|
||||
|
||||
[convolutional]
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
layers = -3, -1
|
||||
###############################
|
||||
|
||||
[yolo]
|
||||
mask = 4,5,6,7
|
||||
anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508
|
||||
classes=1
|
||||
num=12
|
||||
jitter=.3
|
||||
ignore_thresh = .7
|
||||
truth_thresh = 1
|
||||
random=1
|
||||
|
||||
|
||||
|
||||
[route]
|
||||
layers = -7
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[upsample]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers = -1, 36
|
||||
|
||||
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=256
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=256
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=256
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=24
|
||||
activation=linear
|
||||
|
||||
|
||||
|
||||
######### embedding ###########
|
||||
[route]
|
||||
layers = -3
|
||||
|
||||
[convolutional]
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=512
|
||||
activation=linear
|
||||
|
||||
[route]
|
||||
layers = -3, -1
|
||||
###############################
|
||||
|
||||
[yolo]
|
||||
mask = 0,1,2,3
|
||||
anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508
|
||||
classes=1
|
||||
num=12
|
||||
jitter=.3
|
||||
ignore_thresh = .7
|
||||
truth_thresh = 1
|
||||
random=1
|
23
data/mot16-01.test
Normal file
23
data/mot16-01.test
Normal file
|
@ -0,0 +1,23 @@
|
|||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000041.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000201.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000221.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000061.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000021.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000261.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000241.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000001.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000421.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000401.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000381.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000181.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000441.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000161.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000321.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000301.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000141.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000101.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000341.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000361.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000121.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000281.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000081.jpg
|
52
data/mot16-03.test
Normal file
52
data/mot16-03.test
Normal file
|
@ -0,0 +1,52 @@
|
|||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000391.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000811.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001471.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001021.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000061.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001261.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000021.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000871.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000631.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000241.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001411.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000781.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001201.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000001.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000451.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000661.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000421.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001441.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000211.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000991.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001051.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000181.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000601.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001381.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000011.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000841.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001231.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000031.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000271.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000931.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000301.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000751.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000481.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000511.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001291.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001141.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000091.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000361.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000121.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000571.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001321.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001111.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000151.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001081.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001351.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000901.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000331.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000721.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001171.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000961.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000541.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000691.jpg
|
38
data/mot16-14.test
Normal file
38
data/mot16-14.test
Normal file
|
@ -0,0 +1,38 @@
|
|||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000201.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000041.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000581.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000221.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000061.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000021.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000241.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000261.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000001.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000661.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000421.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000401.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000381.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000641.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000601.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000181.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000441.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000461.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000621.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000161.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000321.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000301.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000481.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000141.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000681.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000101.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000341.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000361.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000121.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000741.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000501.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000281.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000521.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000721.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000561.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000541.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000081.jpg
|
||||
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000701.jpg
|
10
models.py
10
models.py
|
@ -10,7 +10,7 @@ import time
|
|||
import math
|
||||
|
||||
batch_norm=SyncBN #nn.BatchNorm2d
|
||||
|
||||
#batch_norm=nn.BatchNorm2d
|
||||
def create_modules(module_defs):
|
||||
"""
|
||||
Constructs module list of layer blocks from module configuration in module_defs
|
||||
|
@ -34,7 +34,13 @@ def create_modules(module_defs):
|
|||
padding=pad,
|
||||
bias=not bn))
|
||||
if bn:
|
||||
modules.add_module('batch_norm_%d' % i, batch_norm(filters))
|
||||
after_bn = batch_norm(filters)
|
||||
modules.add_module('batch_norm_%d' % i, after_bn)
|
||||
# BN is uniformly initialized by default in pytorch 1.0.1.
|
||||
# In pytorch>1.2.0, BN weights are initialized with constant 1,
|
||||
# but we find with the uniform initialization the model converges faster.
|
||||
nn.init.uniform_(after_bn.weight)
|
||||
nn.init.zeros_(after_bn.bias)
|
||||
if module_def['activation'] == 'leaky':
|
||||
modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))
|
||||
|
||||
|
|
75
setup.py
Normal file
75
setup.py
Normal file
|
@ -0,0 +1,75 @@
|
|||
###################################################################
|
||||
# File Name: setup.py
|
||||
# Author: Zhongdao Wang
|
||||
# mail: wcd17@mails.tsinghua.edu.cn
|
||||
# Created Time: Thu 19 Dec 2019 07:29:02 PM CST
|
||||
###################################################################
|
||||
|
||||
from __future__ import print_function
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import
|
||||
|
||||
import os
|
||||
import glob
|
||||
|
||||
import torch
|
||||
from setuptools import find_packages
|
||||
from setuptools import setup
|
||||
from torch.utils.cpp_extension import CUDA_HOME
|
||||
from torch.utils.cpp_extension import CppExtension
|
||||
from torch.utils.cpp_extension import CUDAExtension
|
||||
|
||||
|
||||
|
||||
def get_extensions():
|
||||
this_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
extensions_dir = os.path.join(this_dir, "utils", "nms")
|
||||
|
||||
main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
|
||||
source_cpu = glob.glob(os.path.join(extensions_dir, "*.cpp"))
|
||||
source_cuda = glob.glob(os.path.join(extensions_dir, "*.cu"))
|
||||
|
||||
sources = main_file
|
||||
extension = CppExtension
|
||||
|
||||
extra_compile_args = {"cxx": []}
|
||||
define_macros = []
|
||||
|
||||
#if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
|
||||
if False:
|
||||
extension = CUDAExtension
|
||||
sources += source_cuda
|
||||
define_macros += [("WITH_CUDA", None)]
|
||||
extra_compile_args["nvcc"] = [
|
||||
"-DCUDA_HAS_FP16=1",
|
||||
"-D__CUDA_NO_HALF_OPERATORS__",
|
||||
"-D__CUDA_NO_HALF_CONVERSIONS__",
|
||||
"-D__CUDA_NO_HALF2_OPERATORS__",
|
||||
]
|
||||
|
||||
sources = [os.path.join(extensions_dir, s) for s in sources]
|
||||
|
||||
include_dirs = [extensions_dir]
|
||||
|
||||
ext_modules = [
|
||||
extension(
|
||||
"nms",
|
||||
sources,
|
||||
include_dirs=include_dirs,
|
||||
define_macros=define_macros,
|
||||
extra_compile_args=extra_compile_args,
|
||||
)
|
||||
]
|
||||
|
||||
return ext_modules
|
||||
|
||||
print(get_extensions())
|
||||
setup(
|
||||
name="nms",
|
||||
version="0.1",
|
||||
author="fmassa",
|
||||
url="https://github.com/facebookresearch/maskrcnn-benchmark",
|
||||
description="GPU supported NMS",
|
||||
ext_modules=get_extensions(),
|
||||
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
|
||||
)
|
23
track.py
23
track.py
|
@ -147,13 +147,22 @@ if __name__ == '__main__':
|
|||
print(opt, end='\n\n')
|
||||
|
||||
if not opt.test_mot16:
|
||||
seqs_str = '''KITTI-13
|
||||
KITTI-17
|
||||
ADL-Rundle-6
|
||||
PETS09-S2L1
|
||||
TUD-Campus
|
||||
TUD-Stadtmitte'''
|
||||
data_root = '/home/wangzd/datasets/MOT/MOT15/train'
|
||||
#seqs_str = '''KITTI-13
|
||||
# KITTI-17
|
||||
# ADL-Rundle-6
|
||||
# PETS09-S2L1
|
||||
# TUD-Campus
|
||||
# TUD-Stadtmitte'''
|
||||
#data_root = '/home/wangzd/datasets/MOT/MOT15/train'
|
||||
seqs_str = '''MOT17-02-SDP
|
||||
MOT17-04-SDP
|
||||
MOT17-05-SDP
|
||||
MOT17-09-SDP
|
||||
MOT17-10-SDP
|
||||
MOT17-11-SDP
|
||||
MOT17-13-SDP
|
||||
'''
|
||||
data_root = '/home/wangzd/datasets/MOT/MOT17/images/train'
|
||||
else:
|
||||
seqs_str = '''MOT16-01
|
||||
MOT16-03
|
||||
|
|
|
@ -4,7 +4,7 @@ import scipy
|
|||
from scipy.spatial.distance import cdist
|
||||
from sklearn.utils import linear_assignment_
|
||||
|
||||
from utils.cython_bbox import bbox_ious
|
||||
from cython_bbox import bbox_overlaps as bbox_ious
|
||||
from utils import kalman_filter
|
||||
import time
|
||||
|
||||
|
|
5
train.py
5
train.py
|
@ -94,8 +94,6 @@ def train(
|
|||
logger.info(('%8s%12s' + '%10s' * 6) % (
|
||||
'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time'))
|
||||
|
||||
# Update scheduler (automatic)
|
||||
scheduler.step()
|
||||
|
||||
|
||||
# Freeze darknet53.conv.74 for first epoch
|
||||
|
@ -146,7 +144,6 @@ def train(
|
|||
if i % opt.print_interval == 0:
|
||||
logger.info(s)
|
||||
|
||||
|
||||
# Save latest checkpoint
|
||||
checkpoint = {'epoch': epoch,
|
||||
'model': model.module.state_dict(),
|
||||
|
@ -161,6 +158,8 @@ def train(
|
|||
test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
|
||||
|
||||
|
||||
# Call scheduler.step() after opimizer.step() with pytorch > 1.1.0
|
||||
scheduler.step()
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -194,6 +194,7 @@ class LoadImagesAndLabels: # for training
|
|||
if self.augment:
|
||||
img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20))
|
||||
|
||||
|
||||
plotFlag = False
|
||||
if plotFlag:
|
||||
import matplotlib
|
||||
|
@ -392,12 +393,10 @@ class JointDataset(LoadImagesAndLabels): # for training
|
|||
|
||||
|
||||
def __getitem__(self, files_index):
|
||||
|
||||
for i, c in enumerate(self.cds):
|
||||
if files_index >= c:
|
||||
ds = list(self.label_files.keys())[i]
|
||||
start_index = c
|
||||
|
||||
img_path = self.img_files[ds][files_index - start_index]
|
||||
label_path = self.label_files[ds][files_index - start_index]
|
||||
|
||||
|
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
# from ._utils import _C
|
||||
from utils import _C
|
||||
|
||||
nms = _C.nms
|
||||
# nms.__doc__ = """
|
||||
# This function performs Non-maximum suppresion"""
|
32
utils/nms/nms.h
Normal file
32
utils/nms/nms.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
#pragma once
|
||||
#include <torch/extension.h>
|
||||
at::Tensor nms_cpu(const at::Tensor& dets, const at::Tensor& scores, const float threshold);
|
||||
#ifdef WITH_CUDA
|
||||
at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
|
||||
#endif
|
||||
|
||||
|
||||
at::Tensor nms(const at::Tensor& dets,
|
||||
const at::Tensor& scores,
|
||||
const float threshold) {
|
||||
|
||||
if (dets.type().is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
// TODO raise error if not compiled with CUDA
|
||||
if (dets.numel() == 0)
|
||||
return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
|
||||
auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
|
||||
return nms_cuda(b, threshold);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support");
|
||||
#endif
|
||||
}
|
||||
|
||||
at::Tensor result = nms_cpu(dets, scores, threshold);
|
||||
return result;
|
||||
}
|
||||
|
||||
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){
|
||||
m.def("nms", &nms, "non-maximum suppression");
|
||||
}
|
74
utils/nms/nms_cpu.cpp
Normal file
74
utils/nms/nms_cpu.cpp
Normal file
|
@ -0,0 +1,74 @@
|
|||
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
#include "nms.h"
|
||||
template <typename scalar_t>
|
||||
at::Tensor nms_cpu_kernel(const at::Tensor& dets,
|
||||
const at::Tensor& scores,
|
||||
const float threshold) {
|
||||
AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
|
||||
AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
|
||||
AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
|
||||
|
||||
if (dets.numel() == 0) {
|
||||
return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
|
||||
}
|
||||
|
||||
auto x1_t = dets.select(1, 0).contiguous();
|
||||
auto y1_t = dets.select(1, 1).contiguous();
|
||||
auto x2_t = dets.select(1, 2).contiguous();
|
||||
auto y2_t = dets.select(1, 3).contiguous();
|
||||
|
||||
at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
|
||||
|
||||
auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
|
||||
|
||||
auto ndets = dets.size(0);
|
||||
at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
|
||||
|
||||
auto suppressed = suppressed_t.data<uint8_t>();
|
||||
auto order = order_t.data<int64_t>();
|
||||
auto x1 = x1_t.data<scalar_t>();
|
||||
auto y1 = y1_t.data<scalar_t>();
|
||||
auto x2 = x2_t.data<scalar_t>();
|
||||
auto y2 = y2_t.data<scalar_t>();
|
||||
auto areas = areas_t.data<scalar_t>();
|
||||
|
||||
for (int64_t _i = 0; _i < ndets; _i++) {
|
||||
auto i = order[_i];
|
||||
if (suppressed[i] == 1)
|
||||
continue;
|
||||
auto ix1 = x1[i];
|
||||
auto iy1 = y1[i];
|
||||
auto ix2 = x2[i];
|
||||
auto iy2 = y2[i];
|
||||
auto iarea = areas[i];
|
||||
|
||||
for (int64_t _j = _i + 1; _j < ndets; _j++) {
|
||||
auto j = order[_j];
|
||||
if (suppressed[j] == 1)
|
||||
continue;
|
||||
auto xx1 = std::max(ix1, x1[j]);
|
||||
auto yy1 = std::max(iy1, y1[j]);
|
||||
auto xx2 = std::min(ix2, x2[j]);
|
||||
auto yy2 = std::min(iy2, y2[j]);
|
||||
|
||||
auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
|
||||
auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
|
||||
auto inter = w * h;
|
||||
auto ovr = inter / (iarea + areas[j] - inter);
|
||||
if (ovr >= threshold)
|
||||
suppressed[j] = 1;
|
||||
}
|
||||
}
|
||||
return at::nonzero(suppressed_t == 0).squeeze(1);
|
||||
}
|
||||
|
||||
at::Tensor nms_cpu(const at::Tensor& dets,
|
||||
const at::Tensor& scores,
|
||||
const float threshold) {
|
||||
at::Tensor result;
|
||||
AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
|
||||
result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
131
utils/nms/nms_kernel.cu
Normal file
131
utils/nms/nms_kernel.cu
Normal file
|
@ -0,0 +1,131 @@
|
|||
#include <ATen/ATen.h>
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
|
||||
#include <THC/THC.h>
|
||||
#include <THC/THCDeviceUtils.cuh>
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
int const threadsPerBlock = sizeof(unsigned long long) * 8;
|
||||
|
||||
__device__ inline float devIoU(float const * const a, float const * const b) {
|
||||
float left = max(a[0], b[0]), right = min(a[2], b[2]);
|
||||
float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
|
||||
float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
|
||||
float interS = width * height;
|
||||
float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
|
||||
float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
|
||||
return interS / (Sa + Sb - interS);
|
||||
}
|
||||
|
||||
__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
|
||||
const float *dev_boxes, unsigned long long *dev_mask) {
|
||||
const int row_start = blockIdx.y;
|
||||
const int col_start = blockIdx.x;
|
||||
|
||||
// if (row_start > col_start) return;
|
||||
|
||||
const int row_size =
|
||||
min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
|
||||
const int col_size =
|
||||
min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
|
||||
|
||||
__shared__ float block_boxes[threadsPerBlock * 5];
|
||||
if (threadIdx.x < col_size) {
|
||||
block_boxes[threadIdx.x * 5 + 0] =
|
||||
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
|
||||
block_boxes[threadIdx.x * 5 + 1] =
|
||||
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
|
||||
block_boxes[threadIdx.x * 5 + 2] =
|
||||
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
|
||||
block_boxes[threadIdx.x * 5 + 3] =
|
||||
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
|
||||
block_boxes[threadIdx.x * 5 + 4] =
|
||||
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < row_size) {
|
||||
const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
|
||||
const float *cur_box = dev_boxes + cur_box_idx * 5;
|
||||
int i = 0;
|
||||
unsigned long long t = 0;
|
||||
int start = 0;
|
||||
if (row_start == col_start) {
|
||||
start = threadIdx.x + 1;
|
||||
}
|
||||
for (i = start; i < col_size; i++) {
|
||||
if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
|
||||
t |= 1ULL << i;
|
||||
}
|
||||
}
|
||||
const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
|
||||
dev_mask[cur_box_idx * col_blocks + col_start] = t;
|
||||
}
|
||||
}
|
||||
|
||||
// boxes is a N x 5 tensor
|
||||
at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
|
||||
using scalar_t = float;
|
||||
AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
|
||||
auto scores = boxes.select(1, 4);
|
||||
auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
|
||||
auto boxes_sorted = boxes.index_select(0, order_t);
|
||||
|
||||
int boxes_num = boxes.size(0);
|
||||
|
||||
const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
|
||||
|
||||
scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
|
||||
|
||||
THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
|
||||
|
||||
unsigned long long* mask_dev = NULL;
|
||||
//THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
|
||||
// boxes_num * col_blocks * sizeof(unsigned long long)));
|
||||
|
||||
mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
|
||||
|
||||
dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
|
||||
THCCeilDiv(boxes_num, threadsPerBlock));
|
||||
dim3 threads(threadsPerBlock);
|
||||
nms_kernel<<<blocks, threads>>>(boxes_num,
|
||||
nms_overlap_thresh,
|
||||
boxes_dev,
|
||||
mask_dev);
|
||||
|
||||
std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
|
||||
THCudaCheck(cudaMemcpy(&mask_host[0],
|
||||
mask_dev,
|
||||
sizeof(unsigned long long) * boxes_num * col_blocks,
|
||||
cudaMemcpyDeviceToHost));
|
||||
|
||||
std::vector<unsigned long long> remv(col_blocks);
|
||||
memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
|
||||
|
||||
at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
|
||||
int64_t* keep_out = keep.data<int64_t>();
|
||||
|
||||
int num_to_keep = 0;
|
||||
for (int i = 0; i < boxes_num; i++) {
|
||||
int nblock = i / threadsPerBlock;
|
||||
int inblock = i % threadsPerBlock;
|
||||
|
||||
if (!(remv[nblock] & (1ULL << inblock))) {
|
||||
keep_out[num_to_keep++] = i;
|
||||
unsigned long long *p = &mask_host[0] + i * col_blocks;
|
||||
for (int j = nblock; j < col_blocks; j++) {
|
||||
remv[j] |= p[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
THCudaFree(state, mask_dev);
|
||||
// TODO improve this part
|
||||
return std::get<0>(order_t.index({
|
||||
keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
|
||||
order_t.device(), keep.scalar_type())
|
||||
}).sort(0, false));
|
||||
}
|
108
utils/utils.py
108
utils/utils.py
|
@ -9,11 +9,8 @@ import matplotlib.pyplot as plt
|
|||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
import maskrcnn_benchmark.layers.nms as nms
|
||||
# Set printoptions
|
||||
torch.set_printoptions(linewidth=1320, precision=5, profile='long')
|
||||
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
|
||||
from torchvision.ops import nms
|
||||
#import maskrcnn_benchmark.layers.nms as nms
|
||||
|
||||
def mkdir_if_missing(d):
|
||||
if not osp.exists(d):
|
||||
|
@ -424,12 +421,17 @@ def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.05, method=1):
|
|||
np.uint8(method))
|
||||
return keep
|
||||
|
||||
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
|
||||
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method='standard'):
|
||||
"""
|
||||
Removes detections with lower object confidence score than 'conf_thres'
|
||||
Non-Maximum Suppression to further filter detections.
|
||||
Returns detections with shape:
|
||||
(x1, y1, x2, y2, object_conf, class_score, class_pred)
|
||||
Args:
|
||||
prediction,
|
||||
conf_thres,
|
||||
nms_thres,
|
||||
method = 'standard', 'fast', 'soft_linear' or 'soft_gaussian'
|
||||
"""
|
||||
|
||||
output = [None for _ in range(len(prediction))]
|
||||
|
@ -453,11 +455,18 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
|
|||
|
||||
|
||||
# Non-maximum suppression
|
||||
if method == -1:
|
||||
if method == 'standard':
|
||||
nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
|
||||
else:
|
||||
elif method == 'soft_linear':
|
||||
dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
|
||||
nms_indices = soft_nms(dets, Nt=nms_thres, method=method)
|
||||
nms_indices = soft_nms(dets, Nt=nms_thres, method=0)
|
||||
elif method == 'soft_gaussian':
|
||||
dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
|
||||
nms_indices = soft_nms(dets, Nt=nms_thres, method=1)
|
||||
elif method == 'fast':
|
||||
nms_indices = fast_nms(pred[:, :4], pred[:, 4], iou_thres=nms_thres, conf_thres=conf_thres)
|
||||
else:
|
||||
raise ValueError('Invalid NMS type!')
|
||||
det_max = pred[nms_indices]
|
||||
|
||||
if len(det_max) > 0:
|
||||
|
@ -466,6 +475,87 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
|
|||
|
||||
return output
|
||||
|
||||
def fast_nms(boxes, scores, iou_thres:float=0.5, top_k:int=200, second_threshold:bool=False, conf_thres:float=0.5):
|
||||
'''
|
||||
Vectorized, approximated, fast NMS, adopted from YOLACT:
|
||||
https://github.com/dbolya/yolact/blob/master/layers/functions/detection.py
|
||||
The original version is for multi-class NMS, here we simplify the code for single-class NMS
|
||||
'''
|
||||
scores, idx = scores.sort(0, descending=True)
|
||||
|
||||
idx = idx[:top_k].contiguous()
|
||||
scores = scores[:top_k]
|
||||
num_dets = idx.size()
|
||||
|
||||
boxes = boxes[idx, :]
|
||||
|
||||
iou = jaccard(boxes, boxes)
|
||||
iou.triu_(diagonal=1)
|
||||
iou_max, _ = iou.max(dim=0)
|
||||
|
||||
keep = (iou_max <= iou_thres)
|
||||
|
||||
if second_threshold:
|
||||
keep *= (scores > self.conf_thresh)
|
||||
|
||||
return idx[keep]
|
||||
|
||||
|
||||
|
||||
@torch.jit.script
|
||||
def intersect(box_a, box_b):
|
||||
""" We resize both tensors to [A,B,2] without new malloc:
|
||||
[A,2] -> [A,1,2] -> [A,B,2]
|
||||
[B,2] -> [1,B,2] -> [A,B,2]
|
||||
Then we compute the area of intersect between box_a and box_b.
|
||||
Args:
|
||||
box_a: (tensor) bounding boxes, Shape: [n,A,4].
|
||||
box_b: (tensor) bounding boxes, Shape: [n,B,4].
|
||||
Return:
|
||||
(tensor) intersection area, Shape: [n,A,B].
|
||||
"""
|
||||
n = box_a.size(0)
|
||||
A = box_a.size(1)
|
||||
B = box_b.size(1)
|
||||
max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2),
|
||||
box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2))
|
||||
min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2),
|
||||
box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2))
|
||||
inter = torch.clamp((max_xy - min_xy), min=0)
|
||||
return inter[:, :, :, 0] * inter[:, :, :, 1]
|
||||
|
||||
|
||||
|
||||
def jaccard(box_a, box_b, iscrowd:bool=False):
|
||||
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
|
||||
is simply the intersection over union of two boxes. Here we operate on
|
||||
ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
|
||||
E.g.:
|
||||
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
|
||||
Args:
|
||||
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
|
||||
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
|
||||
Return:
|
||||
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
|
||||
"""
|
||||
use_batch = True
|
||||
if box_a.dim() == 2:
|
||||
use_batch = False
|
||||
box_a = box_a[None, ...]
|
||||
box_b = box_b[None, ...]
|
||||
|
||||
inter = intersect(box_a, box_b)
|
||||
area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) *
|
||||
(box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter) # [A,B]
|
||||
area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) *
|
||||
(box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter) # [A,B]
|
||||
union = area_a + area_b - inter
|
||||
|
||||
out = inter / area_a if iscrowd else inter / union
|
||||
return out if use_batch else out.squeeze(0)
|
||||
|
||||
|
||||
|
||||
|
||||
def return_torch_unique_index(u, uv):
|
||||
n = uv.shape[1] # number of columns
|
||||
|
|
Loading…
Reference in a new issue