98 lines
3.9 KiB
Python
98 lines
3.9 KiB
Python
import argparse
|
|
import json
|
|
import time
|
|
from pathlib import Path
|
|
|
|
from sklearn import metrics
|
|
from scipy import interpolate
|
|
import torch.nn.functional as F
|
|
from models import *
|
|
from utils.utils import *
|
|
from torchvision.transforms import transforms as T
|
|
from utils.datasets import LoadImages, JointDataset, collate_fn
|
|
|
|
def extract_ped_per_frame(
|
|
cfg,
|
|
input_root,
|
|
output_root,
|
|
weights,
|
|
batch_size=16,
|
|
img_size=416,
|
|
iou_thres=0.5,
|
|
conf_thres=0.3,
|
|
nms_thres=0.45,
|
|
print_interval=40,
|
|
nID=14455,
|
|
):
|
|
mkdir_if_missing(output_root)
|
|
|
|
# Initialize model
|
|
model = Darknet(cfg, img_size, nID)
|
|
|
|
# Load weights
|
|
if weights.endswith('.pt'): # pytorch format
|
|
model.load_state_dict(torch.load(weights, map_location='cpu')['model'], strict=False)
|
|
else: # darknet format
|
|
load_darknet_weights(model, weights)
|
|
|
|
model = torch.nn.DataParallel(model)
|
|
model.cuda().eval()
|
|
|
|
vlist = os.listdir(input_root)
|
|
vlist = [osp.join(input_root, v, 'img1') for v in vlist]
|
|
|
|
for vpath in vlist:
|
|
vroot = osp.join('/',*vpath.split('/')[:-1])
|
|
out_vroot = vroot.replace(input_root, output_root)
|
|
mkdir_if_missing(out_vroot)
|
|
dataloader = LoadImages(vpath, img_size)
|
|
for frame_id, (frame_path, frame, frame_ori) in enumerate(dataloader):
|
|
frame_ground_id = frame_path.split('/')[-1].split('.')[0]
|
|
if frame_id % 20 == 0:
|
|
print('Processing frame {} of video {}'.format(frame_id, frame_path))
|
|
blob = torch.from_numpy(frame).cuda().unsqueeze(0)
|
|
pred = model(blob)
|
|
pred = pred[pred[:,:,4] > conf_thres]
|
|
if len(pred) > 0:
|
|
dets = non_max_suppression(pred.unsqueeze(0), conf_thres, nms_thres)[0].cpu()
|
|
scale_coords(img_size, dets[:, :4], frame_ori.shape).round()
|
|
frame_dir = osp.join(out_vroot, frame_ground_id)
|
|
mkdir_if_missing(frame_dir)
|
|
dets = dets[:, :5]
|
|
|
|
for ped_id, det in enumerate(dets):
|
|
box = det[:4].int()
|
|
conf = det[4]
|
|
ped = frame_ori[box[1]:box[3], box[0]:box[2]]
|
|
ped_path = osp.join(frame_dir, ('{:04d}_'+ '{:d}_'*4 + '{:.2f}.jpg').format(ped_id, *box, conf))
|
|
cv2.imwrite(ped_path, ped)
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(prog='test.py')
|
|
parser.add_argument('--batch-size', type=int, default=40, help='size of each image batch')
|
|
parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
|
|
parser.add_argument('--weights', type=str, default='weights/mot_64/latest.pt', help='path to weights file')
|
|
parser.add_argument('--iou-thres', type=float, default=0.3, help='iou threshold required to qualify as detected')
|
|
parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
|
|
parser.add_argument('--nms-thres', type=float, default=0.3, help='iou threshold for non-maximum suppression')
|
|
parser.add_argument('--img-size', type=int, default=(1088, 608), help='size of each image dimension')
|
|
parser.add_argument('--print-interval', type=int, default=10, help='size of each image dimension')
|
|
parser.add_argument('--input-root', type=str, default='/home/wangzd/datasets/youtube/data/0002/frame', help='path to input frames')
|
|
parser.add_argument('--output-root', type=str, default='/home/wangzd/datasets/youtube/data/0002/ped_per_frame', help='path to output frames')
|
|
opt = parser.parse_args()
|
|
print(opt, end='\n\n')
|
|
|
|
with torch.no_grad():
|
|
extract_ped_per_frame(
|
|
opt.cfg,
|
|
opt.input_root,
|
|
opt.output_root,
|
|
opt.weights,
|
|
opt.batch_size,
|
|
opt.img_size,
|
|
opt.iou_thres,
|
|
opt.conf_thres,
|
|
opt.nms_thres,
|
|
opt.print_interval,
|
|
)
|
|
|