replace maskrcnn-benchmark nms with torchvision nms
This commit is contained in:
parent
1cb8cee836
commit
be116014d6
46 changed files with 112372 additions and 111016 deletions
|
@ -7,7 +7,10 @@
|
||||||
"citypersons":"./data/citypersons.train",
|
"citypersons":"./data/citypersons.train",
|
||||||
"cuhksysu":"./data/cuhksysu.train",
|
"cuhksysu":"./data/cuhksysu.train",
|
||||||
"prw":"./data/prw.train",
|
"prw":"./data/prw.train",
|
||||||
"eth":"./data/eth.train"
|
"eth":"./data/eth.train",
|
||||||
|
"03":"./data/mot16-03.test",
|
||||||
|
"01":"./data/mot16-01.test",
|
||||||
|
"14":"./data/mot16-14.test"
|
||||||
},
|
},
|
||||||
"test_emb":
|
"test_emb":
|
||||||
{
|
{
|
||||||
|
@ -17,7 +20,6 @@
|
||||||
},
|
},
|
||||||
"test":
|
"test":
|
||||||
{
|
{
|
||||||
"mot19":"./data/mot19.train",
|
|
||||||
"caltech":"./data/caltech.val",
|
"caltech":"./data/caltech.val",
|
||||||
"citypersons":"./data/citypersons.val"
|
"citypersons":"./data/citypersons.val"
|
||||||
}
|
}
|
||||||
|
|
833
cfg/yolov3_864x480.cfg
Normal file
833
cfg/yolov3_864x480.cfg
Normal file
|
@ -0,0 +1,833 @@
|
||||||
|
[net]
|
||||||
|
# Testing
|
||||||
|
#batch=1
|
||||||
|
#subdivisions=1
|
||||||
|
# Training
|
||||||
|
batch=16
|
||||||
|
subdivisions=1
|
||||||
|
width=480
|
||||||
|
height=864
|
||||||
|
channels=3
|
||||||
|
momentum=0.9
|
||||||
|
decay=0.0005
|
||||||
|
angle=0
|
||||||
|
saturation = 1.5
|
||||||
|
exposure = 1.5
|
||||||
|
hue=.1
|
||||||
|
|
||||||
|
learning_rate=0.001
|
||||||
|
burn_in=1000
|
||||||
|
max_batches = 500200
|
||||||
|
policy=steps
|
||||||
|
steps=400000,450000
|
||||||
|
scales=.1,.1
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=32
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
# Downsample
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=64
|
||||||
|
size=3
|
||||||
|
stride=2
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=32
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=64
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
# Downsample
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=3
|
||||||
|
stride=2
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=64
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=64
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
# Downsample
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=3
|
||||||
|
stride=2
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
# Downsample
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=2
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
# Downsample
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=1024
|
||||||
|
size=3
|
||||||
|
stride=2
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=1024
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=1024
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=1024
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=1024
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[shortcut]
|
||||||
|
from=-3
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
######################
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=1024
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=1024
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=512
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=1024
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=24
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
######### embedding ###########
|
||||||
|
[route]
|
||||||
|
layers = -3
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=512
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[route]
|
||||||
|
layers = -3, -1
|
||||||
|
###############################
|
||||||
|
|
||||||
|
|
||||||
|
[yolo]
|
||||||
|
mask = 8,9,10,11
|
||||||
|
anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508
|
||||||
|
classes=1
|
||||||
|
num=12
|
||||||
|
jitter=.3
|
||||||
|
ignore_thresh = .7
|
||||||
|
truth_thresh = 1
|
||||||
|
random=1
|
||||||
|
|
||||||
|
|
||||||
|
[route]
|
||||||
|
layers = -7
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[upsample]
|
||||||
|
stride=2
|
||||||
|
|
||||||
|
[route]
|
||||||
|
layers = -1, 61
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=512
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=512
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=256
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=512
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=24
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
######### embedding ###########
|
||||||
|
[route]
|
||||||
|
layers = -3
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=512
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[route]
|
||||||
|
layers = -3, -1
|
||||||
|
###############################
|
||||||
|
|
||||||
|
[yolo]
|
||||||
|
mask = 4,5,6,7
|
||||||
|
anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508
|
||||||
|
classes=1
|
||||||
|
num=12
|
||||||
|
jitter=.3
|
||||||
|
ignore_thresh = .7
|
||||||
|
truth_thresh = 1
|
||||||
|
random=1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[route]
|
||||||
|
layers = -7
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[upsample]
|
||||||
|
stride=2
|
||||||
|
|
||||||
|
[route]
|
||||||
|
layers = -1, 36
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=256
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=256
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
filters=128
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
batch_normalize=1
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=256
|
||||||
|
activation=leaky
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
size=1
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=24
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
######### embedding ###########
|
||||||
|
[route]
|
||||||
|
layers = -3
|
||||||
|
|
||||||
|
[convolutional]
|
||||||
|
size=3
|
||||||
|
stride=1
|
||||||
|
pad=1
|
||||||
|
filters=512
|
||||||
|
activation=linear
|
||||||
|
|
||||||
|
[route]
|
||||||
|
layers = -3, -1
|
||||||
|
###############################
|
||||||
|
|
||||||
|
[yolo]
|
||||||
|
mask = 0,1,2,3
|
||||||
|
anchors = 6,19, 9,27, 13,38, 18,54, 25,76, 36,107, 51,152, 71,215, 102,305, 143, 429, 203,508, 407,508
|
||||||
|
classes=1
|
||||||
|
num=12
|
||||||
|
jitter=.3
|
||||||
|
ignore_thresh = .7
|
||||||
|
truth_thresh = 1
|
||||||
|
random=1
|
23
data/mot16-01.test
Normal file
23
data/mot16-01.test
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000041.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000201.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000221.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000061.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000021.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000261.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000241.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000001.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000421.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000401.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000381.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000181.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000441.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000161.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000321.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000301.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000141.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000101.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000341.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000361.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000121.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000281.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-01/img1/000081.jpg
|
52
data/mot16-03.test
Normal file
52
data/mot16-03.test
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000391.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000811.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001471.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001021.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000061.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001261.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000021.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000871.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000631.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000241.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001411.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000781.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001201.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000001.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000451.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000661.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000421.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001441.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000211.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000991.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001051.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000181.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000601.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001381.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000011.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000841.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001231.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000031.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000271.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000931.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000301.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000751.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000481.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000511.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001291.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001141.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000091.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000361.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000121.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000571.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001321.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001111.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000151.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001081.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001351.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000901.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000331.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000721.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/001171.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000961.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000541.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-03/img1/000691.jpg
|
38
data/mot16-14.test
Normal file
38
data/mot16-14.test
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000201.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000041.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000581.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000221.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000061.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000021.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000241.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000261.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000001.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000661.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000421.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000401.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000381.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000641.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000601.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000181.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000441.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000461.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000621.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000161.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000321.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000301.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000481.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000141.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000681.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000101.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000341.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000361.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000121.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000741.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000501.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000281.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000521.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000721.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000561.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000541.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000081.jpg
|
||||||
|
/home/wangzd/datasets/MOT/MOT16/images/test/MOT16-14/img1/000701.jpg
|
10
models.py
10
models.py
|
@ -10,7 +10,7 @@ import time
|
||||||
import math
|
import math
|
||||||
|
|
||||||
batch_norm=SyncBN #nn.BatchNorm2d
|
batch_norm=SyncBN #nn.BatchNorm2d
|
||||||
|
#batch_norm=nn.BatchNorm2d
|
||||||
def create_modules(module_defs):
|
def create_modules(module_defs):
|
||||||
"""
|
"""
|
||||||
Constructs module list of layer blocks from module configuration in module_defs
|
Constructs module list of layer blocks from module configuration in module_defs
|
||||||
|
@ -34,7 +34,13 @@ def create_modules(module_defs):
|
||||||
padding=pad,
|
padding=pad,
|
||||||
bias=not bn))
|
bias=not bn))
|
||||||
if bn:
|
if bn:
|
||||||
modules.add_module('batch_norm_%d' % i, batch_norm(filters))
|
after_bn = batch_norm(filters)
|
||||||
|
modules.add_module('batch_norm_%d' % i, after_bn)
|
||||||
|
# BN is uniformly initialized by default in pytorch 1.0.1.
|
||||||
|
# In pytorch>1.2.0, BN weights are initialized with constant 1,
|
||||||
|
# but we find with the uniform initialization the model converges faster.
|
||||||
|
nn.init.uniform_(after_bn.weight)
|
||||||
|
nn.init.zeros_(after_bn.bias)
|
||||||
if module_def['activation'] == 'leaky':
|
if module_def['activation'] == 'leaky':
|
||||||
modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))
|
modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))
|
||||||
|
|
||||||
|
|
75
setup.py
Normal file
75
setup.py
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
###################################################################
|
||||||
|
# File Name: setup.py
|
||||||
|
# Author: Zhongdao Wang
|
||||||
|
# mail: wcd17@mails.tsinghua.edu.cn
|
||||||
|
# Created Time: Thu 19 Dec 2019 07:29:02 PM CST
|
||||||
|
###################################################################
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from setuptools import find_packages
|
||||||
|
from setuptools import setup
|
||||||
|
from torch.utils.cpp_extension import CUDA_HOME
|
||||||
|
from torch.utils.cpp_extension import CppExtension
|
||||||
|
from torch.utils.cpp_extension import CUDAExtension
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_extensions():
|
||||||
|
this_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
extensions_dir = os.path.join(this_dir, "utils", "nms")
|
||||||
|
|
||||||
|
main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
|
||||||
|
source_cpu = glob.glob(os.path.join(extensions_dir, "*.cpp"))
|
||||||
|
source_cuda = glob.glob(os.path.join(extensions_dir, "*.cu"))
|
||||||
|
|
||||||
|
sources = main_file
|
||||||
|
extension = CppExtension
|
||||||
|
|
||||||
|
extra_compile_args = {"cxx": []}
|
||||||
|
define_macros = []
|
||||||
|
|
||||||
|
#if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
|
||||||
|
if False:
|
||||||
|
extension = CUDAExtension
|
||||||
|
sources += source_cuda
|
||||||
|
define_macros += [("WITH_CUDA", None)]
|
||||||
|
extra_compile_args["nvcc"] = [
|
||||||
|
"-DCUDA_HAS_FP16=1",
|
||||||
|
"-D__CUDA_NO_HALF_OPERATORS__",
|
||||||
|
"-D__CUDA_NO_HALF_CONVERSIONS__",
|
||||||
|
"-D__CUDA_NO_HALF2_OPERATORS__",
|
||||||
|
]
|
||||||
|
|
||||||
|
sources = [os.path.join(extensions_dir, s) for s in sources]
|
||||||
|
|
||||||
|
include_dirs = [extensions_dir]
|
||||||
|
|
||||||
|
ext_modules = [
|
||||||
|
extension(
|
||||||
|
"nms",
|
||||||
|
sources,
|
||||||
|
include_dirs=include_dirs,
|
||||||
|
define_macros=define_macros,
|
||||||
|
extra_compile_args=extra_compile_args,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
return ext_modules
|
||||||
|
|
||||||
|
print(get_extensions())
|
||||||
|
setup(
|
||||||
|
name="nms",
|
||||||
|
version="0.1",
|
||||||
|
author="fmassa",
|
||||||
|
url="https://github.com/facebookresearch/maskrcnn-benchmark",
|
||||||
|
description="GPU supported NMS",
|
||||||
|
ext_modules=get_extensions(),
|
||||||
|
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
|
||||||
|
)
|
23
track.py
23
track.py
|
@ -147,13 +147,22 @@ if __name__ == '__main__':
|
||||||
print(opt, end='\n\n')
|
print(opt, end='\n\n')
|
||||||
|
|
||||||
if not opt.test_mot16:
|
if not opt.test_mot16:
|
||||||
seqs_str = '''KITTI-13
|
#seqs_str = '''KITTI-13
|
||||||
KITTI-17
|
# KITTI-17
|
||||||
ADL-Rundle-6
|
# ADL-Rundle-6
|
||||||
PETS09-S2L1
|
# PETS09-S2L1
|
||||||
TUD-Campus
|
# TUD-Campus
|
||||||
TUD-Stadtmitte'''
|
# TUD-Stadtmitte'''
|
||||||
data_root = '/home/wangzd/datasets/MOT/MOT15/train'
|
#data_root = '/home/wangzd/datasets/MOT/MOT15/train'
|
||||||
|
seqs_str = '''MOT17-02-SDP
|
||||||
|
MOT17-04-SDP
|
||||||
|
MOT17-05-SDP
|
||||||
|
MOT17-09-SDP
|
||||||
|
MOT17-10-SDP
|
||||||
|
MOT17-11-SDP
|
||||||
|
MOT17-13-SDP
|
||||||
|
'''
|
||||||
|
data_root = '/home/wangzd/datasets/MOT/MOT17/images/train'
|
||||||
else:
|
else:
|
||||||
seqs_str = '''MOT16-01
|
seqs_str = '''MOT16-01
|
||||||
MOT16-03
|
MOT16-03
|
||||||
|
|
|
@ -4,7 +4,7 @@ import scipy
|
||||||
from scipy.spatial.distance import cdist
|
from scipy.spatial.distance import cdist
|
||||||
from sklearn.utils import linear_assignment_
|
from sklearn.utils import linear_assignment_
|
||||||
|
|
||||||
from utils.cython_bbox import bbox_ious
|
from cython_bbox import bbox_overlaps as bbox_ious
|
||||||
from utils import kalman_filter
|
from utils import kalman_filter
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
5
train.py
5
train.py
|
@ -94,8 +94,6 @@ def train(
|
||||||
logger.info(('%8s%12s' + '%10s' * 6) % (
|
logger.info(('%8s%12s' + '%10s' * 6) % (
|
||||||
'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time'))
|
'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time'))
|
||||||
|
|
||||||
# Update scheduler (automatic)
|
|
||||||
scheduler.step()
|
|
||||||
|
|
||||||
|
|
||||||
# Freeze darknet53.conv.74 for first epoch
|
# Freeze darknet53.conv.74 for first epoch
|
||||||
|
@ -146,7 +144,6 @@ def train(
|
||||||
if i % opt.print_interval == 0:
|
if i % opt.print_interval == 0:
|
||||||
logger.info(s)
|
logger.info(s)
|
||||||
|
|
||||||
|
|
||||||
# Save latest checkpoint
|
# Save latest checkpoint
|
||||||
checkpoint = {'epoch': epoch,
|
checkpoint = {'epoch': epoch,
|
||||||
'model': model.module.state_dict(),
|
'model': model.module.state_dict(),
|
||||||
|
@ -161,6 +158,8 @@ def train(
|
||||||
test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
|
test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, img_size=img_size, print_interval=40, nID=dataset.nID)
|
||||||
|
|
||||||
|
|
||||||
|
# Call scheduler.step() after opimizer.step() with pytorch > 1.1.0
|
||||||
|
scheduler.step()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -194,6 +194,7 @@ class LoadImagesAndLabels: # for training
|
||||||
if self.augment:
|
if self.augment:
|
||||||
img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20))
|
img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20))
|
||||||
|
|
||||||
|
|
||||||
plotFlag = False
|
plotFlag = False
|
||||||
if plotFlag:
|
if plotFlag:
|
||||||
import matplotlib
|
import matplotlib
|
||||||
|
@ -392,12 +393,10 @@ class JointDataset(LoadImagesAndLabels): # for training
|
||||||
|
|
||||||
|
|
||||||
def __getitem__(self, files_index):
|
def __getitem__(self, files_index):
|
||||||
|
|
||||||
for i, c in enumerate(self.cds):
|
for i, c in enumerate(self.cds):
|
||||||
if files_index >= c:
|
if files_index >= c:
|
||||||
ds = list(self.label_files.keys())[i]
|
ds = list(self.label_files.keys())[i]
|
||||||
start_index = c
|
start_index = c
|
||||||
|
|
||||||
img_path = self.img_files[ds][files_index - start_index]
|
img_path = self.img_files[ds][files_index - start_index]
|
||||||
label_path = self.label_files[ds][files_index - start_index]
|
label_path = self.label_files[ds][files_index - start_index]
|
||||||
|
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
|
||||||
# from ._utils import _C
|
|
||||||
from utils import _C
|
|
||||||
|
|
||||||
nms = _C.nms
|
|
||||||
# nms.__doc__ = """
|
|
||||||
# This function performs Non-maximum suppresion"""
|
|
32
utils/nms/nms.h
Normal file
32
utils/nms/nms.h
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||||
|
#pragma once
|
||||||
|
#include <torch/extension.h>
|
||||||
|
at::Tensor nms_cpu(const at::Tensor& dets, const at::Tensor& scores, const float threshold);
|
||||||
|
#ifdef WITH_CUDA
|
||||||
|
at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
at::Tensor nms(const at::Tensor& dets,
|
||||||
|
const at::Tensor& scores,
|
||||||
|
const float threshold) {
|
||||||
|
|
||||||
|
if (dets.type().is_cuda()) {
|
||||||
|
#ifdef WITH_CUDA
|
||||||
|
// TODO raise error if not compiled with CUDA
|
||||||
|
if (dets.numel() == 0)
|
||||||
|
return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
|
||||||
|
auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
|
||||||
|
return nms_cuda(b, threshold);
|
||||||
|
#else
|
||||||
|
AT_ERROR("Not compiled with GPU support");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
at::Tensor result = nms_cpu(dets, scores, threshold);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){
|
||||||
|
m.def("nms", &nms, "non-maximum suppression");
|
||||||
|
}
|
74
utils/nms/nms_cpu.cpp
Normal file
74
utils/nms/nms_cpu.cpp
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||||
|
#include "nms.h"
|
||||||
|
template <typename scalar_t>
|
||||||
|
at::Tensor nms_cpu_kernel(const at::Tensor& dets,
|
||||||
|
const at::Tensor& scores,
|
||||||
|
const float threshold) {
|
||||||
|
AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
|
||||||
|
AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
|
||||||
|
AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
|
||||||
|
|
||||||
|
if (dets.numel() == 0) {
|
||||||
|
return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
|
||||||
|
}
|
||||||
|
|
||||||
|
auto x1_t = dets.select(1, 0).contiguous();
|
||||||
|
auto y1_t = dets.select(1, 1).contiguous();
|
||||||
|
auto x2_t = dets.select(1, 2).contiguous();
|
||||||
|
auto y2_t = dets.select(1, 3).contiguous();
|
||||||
|
|
||||||
|
at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
|
||||||
|
|
||||||
|
auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
|
||||||
|
|
||||||
|
auto ndets = dets.size(0);
|
||||||
|
at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
|
||||||
|
|
||||||
|
auto suppressed = suppressed_t.data<uint8_t>();
|
||||||
|
auto order = order_t.data<int64_t>();
|
||||||
|
auto x1 = x1_t.data<scalar_t>();
|
||||||
|
auto y1 = y1_t.data<scalar_t>();
|
||||||
|
auto x2 = x2_t.data<scalar_t>();
|
||||||
|
auto y2 = y2_t.data<scalar_t>();
|
||||||
|
auto areas = areas_t.data<scalar_t>();
|
||||||
|
|
||||||
|
for (int64_t _i = 0; _i < ndets; _i++) {
|
||||||
|
auto i = order[_i];
|
||||||
|
if (suppressed[i] == 1)
|
||||||
|
continue;
|
||||||
|
auto ix1 = x1[i];
|
||||||
|
auto iy1 = y1[i];
|
||||||
|
auto ix2 = x2[i];
|
||||||
|
auto iy2 = y2[i];
|
||||||
|
auto iarea = areas[i];
|
||||||
|
|
||||||
|
for (int64_t _j = _i + 1; _j < ndets; _j++) {
|
||||||
|
auto j = order[_j];
|
||||||
|
if (suppressed[j] == 1)
|
||||||
|
continue;
|
||||||
|
auto xx1 = std::max(ix1, x1[j]);
|
||||||
|
auto yy1 = std::max(iy1, y1[j]);
|
||||||
|
auto xx2 = std::min(ix2, x2[j]);
|
||||||
|
auto yy2 = std::min(iy2, y2[j]);
|
||||||
|
|
||||||
|
auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
|
||||||
|
auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
|
||||||
|
auto inter = w * h;
|
||||||
|
auto ovr = inter / (iarea + areas[j] - inter);
|
||||||
|
if (ovr >= threshold)
|
||||||
|
suppressed[j] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return at::nonzero(suppressed_t == 0).squeeze(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
at::Tensor nms_cpu(const at::Tensor& dets,
|
||||||
|
const at::Tensor& scores,
|
||||||
|
const float threshold) {
|
||||||
|
at::Tensor result;
|
||||||
|
AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
|
||||||
|
result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
|
||||||
|
});
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
131
utils/nms/nms_kernel.cu
Normal file
131
utils/nms/nms_kernel.cu
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
#include <ATen/ATen.h>
|
||||||
|
#include <ATen/cuda/CUDAContext.h>
|
||||||
|
|
||||||
|
#include <THC/THC.h>
|
||||||
|
#include <THC/THCDeviceUtils.cuh>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
|
||||||
|
int const threadsPerBlock = sizeof(unsigned long long) * 8;
|
||||||
|
|
||||||
|
__device__ inline float devIoU(float const * const a, float const * const b) {
|
||||||
|
float left = max(a[0], b[0]), right = min(a[2], b[2]);
|
||||||
|
float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
|
||||||
|
float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
|
||||||
|
float interS = width * height;
|
||||||
|
float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
|
||||||
|
float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
|
||||||
|
return interS / (Sa + Sb - interS);
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
|
||||||
|
const float *dev_boxes, unsigned long long *dev_mask) {
|
||||||
|
const int row_start = blockIdx.y;
|
||||||
|
const int col_start = blockIdx.x;
|
||||||
|
|
||||||
|
// if (row_start > col_start) return;
|
||||||
|
|
||||||
|
const int row_size =
|
||||||
|
min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
|
||||||
|
const int col_size =
|
||||||
|
min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
|
||||||
|
|
||||||
|
__shared__ float block_boxes[threadsPerBlock * 5];
|
||||||
|
if (threadIdx.x < col_size) {
|
||||||
|
block_boxes[threadIdx.x * 5 + 0] =
|
||||||
|
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
|
||||||
|
block_boxes[threadIdx.x * 5 + 1] =
|
||||||
|
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
|
||||||
|
block_boxes[threadIdx.x * 5 + 2] =
|
||||||
|
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
|
||||||
|
block_boxes[threadIdx.x * 5 + 3] =
|
||||||
|
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
|
||||||
|
block_boxes[threadIdx.x * 5 + 4] =
|
||||||
|
dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
if (threadIdx.x < row_size) {
|
||||||
|
const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
|
||||||
|
const float *cur_box = dev_boxes + cur_box_idx * 5;
|
||||||
|
int i = 0;
|
||||||
|
unsigned long long t = 0;
|
||||||
|
int start = 0;
|
||||||
|
if (row_start == col_start) {
|
||||||
|
start = threadIdx.x + 1;
|
||||||
|
}
|
||||||
|
for (i = start; i < col_size; i++) {
|
||||||
|
if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
|
||||||
|
t |= 1ULL << i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
|
||||||
|
dev_mask[cur_box_idx * col_blocks + col_start] = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// boxes is a N x 5 tensor
|
||||||
|
at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
|
||||||
|
using scalar_t = float;
|
||||||
|
AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
|
||||||
|
auto scores = boxes.select(1, 4);
|
||||||
|
auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
|
||||||
|
auto boxes_sorted = boxes.index_select(0, order_t);
|
||||||
|
|
||||||
|
int boxes_num = boxes.size(0);
|
||||||
|
|
||||||
|
const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
|
||||||
|
|
||||||
|
scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
|
||||||
|
|
||||||
|
THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
|
||||||
|
|
||||||
|
unsigned long long* mask_dev = NULL;
|
||||||
|
//THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
|
||||||
|
// boxes_num * col_blocks * sizeof(unsigned long long)));
|
||||||
|
|
||||||
|
mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
|
||||||
|
|
||||||
|
dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
|
||||||
|
THCCeilDiv(boxes_num, threadsPerBlock));
|
||||||
|
dim3 threads(threadsPerBlock);
|
||||||
|
nms_kernel<<<blocks, threads>>>(boxes_num,
|
||||||
|
nms_overlap_thresh,
|
||||||
|
boxes_dev,
|
||||||
|
mask_dev);
|
||||||
|
|
||||||
|
std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
|
||||||
|
THCudaCheck(cudaMemcpy(&mask_host[0],
|
||||||
|
mask_dev,
|
||||||
|
sizeof(unsigned long long) * boxes_num * col_blocks,
|
||||||
|
cudaMemcpyDeviceToHost));
|
||||||
|
|
||||||
|
std::vector<unsigned long long> remv(col_blocks);
|
||||||
|
memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
|
||||||
|
|
||||||
|
at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
|
||||||
|
int64_t* keep_out = keep.data<int64_t>();
|
||||||
|
|
||||||
|
int num_to_keep = 0;
|
||||||
|
for (int i = 0; i < boxes_num; i++) {
|
||||||
|
int nblock = i / threadsPerBlock;
|
||||||
|
int inblock = i % threadsPerBlock;
|
||||||
|
|
||||||
|
if (!(remv[nblock] & (1ULL << inblock))) {
|
||||||
|
keep_out[num_to_keep++] = i;
|
||||||
|
unsigned long long *p = &mask_host[0] + i * col_blocks;
|
||||||
|
for (int j = nblock; j < col_blocks; j++) {
|
||||||
|
remv[j] |= p[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
THCudaFree(state, mask_dev);
|
||||||
|
// TODO improve this part
|
||||||
|
return std::get<0>(order_t.index({
|
||||||
|
keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
|
||||||
|
order_t.device(), keep.scalar_type())
|
||||||
|
}).sort(0, false));
|
||||||
|
}
|
108
utils/utils.py
108
utils/utils.py
|
@ -9,11 +9,8 @@ import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
from torchvision.ops import nms
|
||||||
import maskrcnn_benchmark.layers.nms as nms
|
#import maskrcnn_benchmark.layers.nms as nms
|
||||||
# Set printoptions
|
|
||||||
torch.set_printoptions(linewidth=1320, precision=5, profile='long')
|
|
||||||
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
|
|
||||||
|
|
||||||
def mkdir_if_missing(d):
|
def mkdir_if_missing(d):
|
||||||
if not osp.exists(d):
|
if not osp.exists(d):
|
||||||
|
@ -424,12 +421,17 @@ def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.05, method=1):
|
||||||
np.uint8(method))
|
np.uint8(method))
|
||||||
return keep
|
return keep
|
||||||
|
|
||||||
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
|
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method='standard'):
|
||||||
"""
|
"""
|
||||||
Removes detections with lower object confidence score than 'conf_thres'
|
Removes detections with lower object confidence score than 'conf_thres'
|
||||||
Non-Maximum Suppression to further filter detections.
|
Non-Maximum Suppression to further filter detections.
|
||||||
Returns detections with shape:
|
Returns detections with shape:
|
||||||
(x1, y1, x2, y2, object_conf, class_score, class_pred)
|
(x1, y1, x2, y2, object_conf, class_score, class_pred)
|
||||||
|
Args:
|
||||||
|
prediction,
|
||||||
|
conf_thres,
|
||||||
|
nms_thres,
|
||||||
|
method = 'standard', 'fast', 'soft_linear' or 'soft_gaussian'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
output = [None for _ in range(len(prediction))]
|
output = [None for _ in range(len(prediction))]
|
||||||
|
@ -453,11 +455,18 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
|
||||||
|
|
||||||
|
|
||||||
# Non-maximum suppression
|
# Non-maximum suppression
|
||||||
if method == -1:
|
if method == 'standard':
|
||||||
nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
|
nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres)
|
||||||
else:
|
elif method == 'soft_linear':
|
||||||
dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
|
dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
|
||||||
nms_indices = soft_nms(dets, Nt=nms_thres, method=method)
|
nms_indices = soft_nms(dets, Nt=nms_thres, method=0)
|
||||||
|
elif method == 'soft_gaussian':
|
||||||
|
dets = pred[:, :5].clone().contiguous().data.cpu().numpy()
|
||||||
|
nms_indices = soft_nms(dets, Nt=nms_thres, method=1)
|
||||||
|
elif method == 'fast':
|
||||||
|
nms_indices = fast_nms(pred[:, :4], pred[:, 4], iou_thres=nms_thres, conf_thres=conf_thres)
|
||||||
|
else:
|
||||||
|
raise ValueError('Invalid NMS type!')
|
||||||
det_max = pred[nms_indices]
|
det_max = pred[nms_indices]
|
||||||
|
|
||||||
if len(det_max) > 0:
|
if len(det_max) > 0:
|
||||||
|
@ -466,6 +475,87 @@ def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.4, method=-1):
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
def fast_nms(boxes, scores, iou_thres:float=0.5, top_k:int=200, second_threshold:bool=False, conf_thres:float=0.5):
|
||||||
|
'''
|
||||||
|
Vectorized, approximated, fast NMS, adopted from YOLACT:
|
||||||
|
https://github.com/dbolya/yolact/blob/master/layers/functions/detection.py
|
||||||
|
The original version is for multi-class NMS, here we simplify the code for single-class NMS
|
||||||
|
'''
|
||||||
|
scores, idx = scores.sort(0, descending=True)
|
||||||
|
|
||||||
|
idx = idx[:top_k].contiguous()
|
||||||
|
scores = scores[:top_k]
|
||||||
|
num_dets = idx.size()
|
||||||
|
|
||||||
|
boxes = boxes[idx, :]
|
||||||
|
|
||||||
|
iou = jaccard(boxes, boxes)
|
||||||
|
iou.triu_(diagonal=1)
|
||||||
|
iou_max, _ = iou.max(dim=0)
|
||||||
|
|
||||||
|
keep = (iou_max <= iou_thres)
|
||||||
|
|
||||||
|
if second_threshold:
|
||||||
|
keep *= (scores > self.conf_thresh)
|
||||||
|
|
||||||
|
return idx[keep]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@torch.jit.script
|
||||||
|
def intersect(box_a, box_b):
|
||||||
|
""" We resize both tensors to [A,B,2] without new malloc:
|
||||||
|
[A,2] -> [A,1,2] -> [A,B,2]
|
||||||
|
[B,2] -> [1,B,2] -> [A,B,2]
|
||||||
|
Then we compute the area of intersect between box_a and box_b.
|
||||||
|
Args:
|
||||||
|
box_a: (tensor) bounding boxes, Shape: [n,A,4].
|
||||||
|
box_b: (tensor) bounding boxes, Shape: [n,B,4].
|
||||||
|
Return:
|
||||||
|
(tensor) intersection area, Shape: [n,A,B].
|
||||||
|
"""
|
||||||
|
n = box_a.size(0)
|
||||||
|
A = box_a.size(1)
|
||||||
|
B = box_b.size(1)
|
||||||
|
max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2),
|
||||||
|
box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2))
|
||||||
|
min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2),
|
||||||
|
box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2))
|
||||||
|
inter = torch.clamp((max_xy - min_xy), min=0)
|
||||||
|
return inter[:, :, :, 0] * inter[:, :, :, 1]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def jaccard(box_a, box_b, iscrowd:bool=False):
|
||||||
|
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
|
||||||
|
is simply the intersection over union of two boxes. Here we operate on
|
||||||
|
ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
|
||||||
|
E.g.:
|
||||||
|
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
|
||||||
|
Args:
|
||||||
|
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
|
||||||
|
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
|
||||||
|
Return:
|
||||||
|
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
|
||||||
|
"""
|
||||||
|
use_batch = True
|
||||||
|
if box_a.dim() == 2:
|
||||||
|
use_batch = False
|
||||||
|
box_a = box_a[None, ...]
|
||||||
|
box_b = box_b[None, ...]
|
||||||
|
|
||||||
|
inter = intersect(box_a, box_b)
|
||||||
|
area_a = ((box_a[:, :, 2]-box_a[:, :, 0]) *
|
||||||
|
(box_a[:, :, 3]-box_a[:, :, 1])).unsqueeze(2).expand_as(inter) # [A,B]
|
||||||
|
area_b = ((box_b[:, :, 2]-box_b[:, :, 0]) *
|
||||||
|
(box_b[:, :, 3]-box_b[:, :, 1])).unsqueeze(1).expand_as(inter) # [A,B]
|
||||||
|
union = area_a + area_b - inter
|
||||||
|
|
||||||
|
out = inter / area_a if iscrowd else inter / union
|
||||||
|
return out if use_batch else out.squeeze(0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def return_torch_unique_index(u, uv):
|
def return_torch_unique_index(u, uv):
|
||||||
n = uv.shape[1] # number of columns
|
n = uv.shape[1] # number of columns
|
||||||
|
|
Loading…
Reference in a new issue