804 lines
No EOL
34 KiB
Python
804 lines
No EOL
34 KiB
Python
from argparse import Namespace
|
|
from collections import defaultdict
|
|
import csv
|
|
from dataclasses import dataclass, field
|
|
import json
|
|
import logging
|
|
from math import nan
|
|
from multiprocessing import Event
|
|
import multiprocessing
|
|
from pathlib import Path
|
|
import pickle
|
|
import time
|
|
from typing import DefaultDict, Dict, Optional, List
|
|
import jsonlines
|
|
import numpy as np
|
|
import torch
|
|
import torchvision
|
|
import ultralytics
|
|
import zmq
|
|
import cv2
|
|
|
|
from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights, keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights, maskrcnn_resnet50_fpn_v2, MaskRCNN_ResNet50_FPN_V2_Weights, FasterRCNN_ResNet50_FPN_V2_Weights, fasterrcnn_resnet50_fpn_v2
|
|
from deep_sort_realtime.deepsort_tracker import DeepSort
|
|
from torchvision.models import ResNet50_Weights
|
|
from deep_sort_realtime.deep_sort.track import Track as DeepsortTrack
|
|
|
|
from ultralytics import YOLO
|
|
from ultralytics.engine.results import Results as YOLOResult
|
|
|
|
from trap import timer
|
|
from trap.frame_emitter import Camera, DataclassJSONEncoder, DetectionState, Frame, Detection, Track
|
|
from bytetracker import BYTETracker
|
|
|
|
from tsmoothie.smoother import KalmanSmoother, ConvolutionSmoother
|
|
import tsmoothie.smoother
|
|
from datetime import datetime, timedelta
|
|
|
|
# Detection = [int, int, int, int, float, int]
|
|
# Detections = [Detection]
|
|
|
|
# This is the dt that is also used by the scene.
|
|
# as this needs to be rather stable, try to adhere
|
|
# to it by waiting when we are faster. Value chosen based
|
|
# on a rough estimate of tracker duration
|
|
TARGET_DT = .1
|
|
|
|
logger = logging.getLogger("trap.tracker")
|
|
|
|
DETECTOR_RETINANET = 'retinanet'
|
|
DETECTOR_MASKRCNN = 'maskrcnn'
|
|
DETECTOR_FASTERRCNN = 'fasterrcnn'
|
|
DETECTOR_YOLOv8 = 'ultralytics'
|
|
|
|
TRACKER_DEEPSORT = 'deepsort'
|
|
TRACKER_BYTETRACK = 'bytetrack'
|
|
|
|
DETECTORS = [DETECTOR_RETINANET, DETECTOR_MASKRCNN, DETECTOR_FASTERRCNN, DETECTOR_YOLOv8]
|
|
TRACKERS =[TRACKER_DEEPSORT, TRACKER_BYTETRACK]
|
|
|
|
TRACKER_CONFIDENCE_MINIMUM = .2
|
|
TRACKER_BYTETRACK_MINIMUM = .1 # bytetrack can track items iwth lower thershold
|
|
NON_MAXIMUM_SUPRESSION = 1
|
|
RCNN_SCALE = .4 # seems to have no impact on detections in the corners
|
|
|
|
def _yolov8_track(frame: Frame, model: YOLO, **kwargs) -> List[Detection]:
|
|
|
|
results: List[YOLOResult] = list(model.track(frame.img, persist=True, tracker="custom_bytetrack.yaml", verbose=False, conf=0.00001, **kwargs))
|
|
|
|
if results[0].boxes is None or results[0].boxes.id is None:
|
|
# work around https://github.com/ultralytics/ultralytics/issues/5968
|
|
return []
|
|
|
|
boxes = results[0].boxes.xywh.cpu()
|
|
track_ids = results[0].boxes.id.int().cpu().tolist()
|
|
classes = results[0].boxes.cls.int().cpu().tolist()
|
|
return [Detection(track_id, bbox[0]-.5*bbox[2], bbox[1]-.5*bbox[3], bbox[2], bbox[3], 1, DetectionState.Confirmed, frame.index, class_id) for bbox, track_id, class_id in zip(boxes, track_ids, classes)]
|
|
|
|
class Multifile():
|
|
def __init__(self, srcs: List[Path]):
|
|
self.srcs = srcs
|
|
self.g = self.__iter__()
|
|
self.current_file = None
|
|
|
|
@property
|
|
def name(self):
|
|
return ", ".join([s.name for s in self.srcs])
|
|
|
|
def __iter__(self):
|
|
for path in self.srcs:
|
|
self.current_file = path
|
|
with path.open('r') as fp:
|
|
for l in fp:
|
|
yield l
|
|
|
|
def readline(self):
|
|
return self.g.__next__()
|
|
|
|
FIELDNAMES = ['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state', 'source']
|
|
|
|
class TrackFilter:
|
|
pass
|
|
|
|
def apply(self, tracks: List[Track], camera: Camera):
|
|
return [t for t in tracks if self.filter(t, camera)]
|
|
|
|
def apply_to_dict(self, tracks: Dict[str, Track], camera: Camera):
|
|
tracks = self.apply(tracks.values(), camera)
|
|
return {t.track_id: t for t in tracks}
|
|
|
|
class FinalDisplacementFilter(TrackFilter):
|
|
def __init__(self, min_displacement):
|
|
self.min_displacement = min_displacement
|
|
|
|
def filter(self, track: Track, camera: Camera):
|
|
history = track.get_projected_history(H=None, camera=camera)
|
|
displacement = np.linalg.norm(history[0]-history[-1])
|
|
return displacement > self.min_displacement
|
|
|
|
class TrackReader:
|
|
def __init__(self, path: Path, fps: int, include_blacklisted = False, exclude_whitelisted = False):
|
|
self.blacklist_file = path / "blacklist.jsonl"
|
|
self.whitelist_file = path / "whitelist.jsonl" # for skipping
|
|
self.tracks_file = path / "tracks.pkl"
|
|
|
|
# with self.tracks_file.open('r') as fp:
|
|
# tracks_dict: dict = json.load(fp)
|
|
|
|
with self.tracks_file.open('rb') as fp:
|
|
tracks: dict = pickle.load(fp)
|
|
|
|
|
|
if self.blacklist_file.exists():
|
|
with jsonlines.open(self.blacklist_file, 'r') as reader:
|
|
blacklist = [track_id for track_id in reader.iter(type=str)]
|
|
else:
|
|
blacklist = []
|
|
|
|
|
|
if self.whitelist_file.exists():
|
|
with jsonlines.open(self.whitelist_file, 'r') as reader:
|
|
whitelist = [track_id for track_id in reader.iter(type=str)]
|
|
else:
|
|
whitelist = []
|
|
|
|
|
|
self._tracks = { track_id: detection_values
|
|
for track_id, detection_values in tracks.items()
|
|
if (include_blacklisted or track_id not in blacklist) and
|
|
(not exclude_whitelisted or track_id not in whitelist)
|
|
}
|
|
self.fps = fps
|
|
|
|
def __len__(self):
|
|
return len(self._tracks)
|
|
|
|
def get(self, track_id):
|
|
return self._tracks[track_id]
|
|
# detection_values = self._tracks[track_id]
|
|
# history = []
|
|
# # for detection_values in
|
|
# source = None
|
|
# for detection_items in detection_values:
|
|
# d = dict(zip(FIELDNAMES, detection_items))
|
|
# history.append(Detection(
|
|
# d['track_id'],
|
|
# d['l'],
|
|
# d['t'],
|
|
# d['w'],
|
|
# d['h'],
|
|
# nan,
|
|
# d['state'],
|
|
# d['frame_id'],
|
|
# 1,
|
|
# ))
|
|
# source = int(d['source'])
|
|
|
|
# return Track(track_id, history, fps=self.fps, source=source)
|
|
|
|
def __iter__(self):
|
|
for track_id in self._tracks:
|
|
yield self.get(track_id)
|
|
|
|
def track_ids(self):
|
|
return list(self._tracks.keys())
|
|
|
|
def read_tracks_json(path: Path, fps):
|
|
"""
|
|
Reader for tracks.json produced by TrainingDataWriter
|
|
"""
|
|
reader = TrackReader(path, fps)
|
|
for t in reader:
|
|
yield t
|
|
|
|
class TrainingDataWriter:
|
|
def __init__(self, training_path: Optional[Path]):
|
|
if training_path is None:
|
|
self.path = None
|
|
return
|
|
|
|
if not isinstance(training_path, Path):
|
|
raise ValueError("save-for-training should be a path")
|
|
if not training_path.exists():
|
|
logger.info(f"Making path for training data: {training_path}")
|
|
training_path.mkdir(parents=True, exist_ok=False)
|
|
else:
|
|
logger.warning(f"Path for training-data exists: {training_path}. Continuing assuming that's ok.")
|
|
|
|
# following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py
|
|
|
|
self.path = training_path
|
|
|
|
def __enter__(self):
|
|
if self.path:
|
|
d = datetime.now().isoformat(timespec="minutes")
|
|
self.training_fp = open(self.path / f'all-{d}.txt', 'w')
|
|
logger.debug(f"Writing tracker data to {self.training_fp.name}")
|
|
# following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py
|
|
self.csv = csv.DictWriter(self.training_fp, fieldnames=FIELDNAMES, delimiter='\t', quoting=csv.QUOTE_NONE)
|
|
self.count = 0
|
|
return self
|
|
|
|
def add(self, frame: Frame, tracks: List[Track]):
|
|
if not self.path:
|
|
# skip if disabled
|
|
return
|
|
|
|
self.csv.writerows([{
|
|
'frame_id': round(frame.index * 10., 1), # not really time
|
|
'track_id': t.track_id,
|
|
'l': float(t.history[-1].l), # to float, so we're sure it's not a torch.tensor()
|
|
't': float(t.history[-1].t),
|
|
'w': float(t.history[-1].w),
|
|
'h': float(t.history[-1].h),
|
|
'x': t.get_projected_history(frame.H, frame.camera)[-1][0],
|
|
'y': t.get_projected_history(frame.H, frame.camera)[-1][1],
|
|
'state': t.history[-1].state.value
|
|
# only keep _actual_detections, no lost entries
|
|
} for t in tracks
|
|
# if t.history[-1].state != DetectionState.Lost
|
|
])
|
|
self.count += len(tracks)
|
|
|
|
|
|
def __exit__(self, exc_type, exc_value, exc_tb):
|
|
# ... ignore exception (type, value, traceback)
|
|
if not self.path:
|
|
return
|
|
|
|
self.training_fp.close()
|
|
rewrite_raw_track_files(self.path)
|
|
|
|
|
|
|
|
def rewrite_raw_track_files(path: Path):
|
|
source_files = list(sorted(path.glob("*.txt"))) # we loop twice, so need a list instead of generator
|
|
total = 0
|
|
sources = Multifile(source_files)
|
|
for line in sources:
|
|
if len(line) > 3: # make sure not to count empty lines
|
|
total += 1
|
|
|
|
|
|
destinations = {
|
|
'train': int(total * .8),
|
|
'val': int(total * .12),
|
|
'test': int(total * .08),
|
|
}
|
|
|
|
logger.info(f"Splitting gathered data from {source_files}")
|
|
# for source_file in source_files:
|
|
|
|
tracks_file = path / 'tracks.json'
|
|
tracks_pkl = path / 'tracks.pkl'
|
|
tracks = defaultdict(lambda: Track())
|
|
|
|
offset = 0
|
|
max_track_id = 0
|
|
prev_file = None
|
|
|
|
# all-2024-11-12T13:30.txt
|
|
file_date = None
|
|
|
|
src_file_nr = 0
|
|
|
|
for name, line_nrs in destinations.items():
|
|
dir_path = path / name
|
|
dir_path.mkdir(exist_ok=True)
|
|
file = dir_path / 'tracked.txt'
|
|
logger.debug(f"- Write {line_nrs} lines to {file}")
|
|
with file.open('w') as target_fp:
|
|
|
|
for i in range(line_nrs):
|
|
line = sources.readline()
|
|
current_file = sources.current_file
|
|
if prev_file != current_file:
|
|
offset: int = max_track_id
|
|
|
|
logger.info(f'{name} - update offset {offset} ({sources.current_file})')
|
|
prev_file = current_file
|
|
src_file_nr += 1
|
|
|
|
try:
|
|
file_date = datetime.strptime(current_file.name, 'all-%Y-%m-%dT%H:%M.txt')
|
|
except ValueError as e:
|
|
logger.error(str(e))
|
|
file_date = None
|
|
|
|
|
|
parts = line.split('\t')
|
|
track_id = int(parts[1]) + offset
|
|
|
|
if file_date:
|
|
frame_date = file_date + timedelta(seconds = int(float(parts[0]))//10)
|
|
else:
|
|
frame_date = None
|
|
|
|
if track_id > max_track_id:
|
|
max_track_id = track_id
|
|
|
|
track_id = str(track_id)
|
|
target_fp.write("\t".join(parts))
|
|
|
|
parts = [float(p) for p in parts]
|
|
# ['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state', 'source']
|
|
|
|
point = Detection(track_id, parts[2], parts[3], parts[4], parts[5], 1, DetectionState(int(parts[8])), int(parts[0]/10), 1)
|
|
# history = [
|
|
|
|
# for d in parts]
|
|
tracks[track_id].track_id = track_id
|
|
tracks[track_id].source = src_file_nr
|
|
tracks[track_id].history.append(point)
|
|
# tracks[track_id].append([
|
|
# int(parts[0] / 10),
|
|
# track_id,
|
|
# ] + parts[2:8] + [int(parts[8]), src_file_nr])
|
|
|
|
with tracks_file.open('w') as fp:
|
|
logger.info(f"Write {len(tracks)} tracks to {str(tracks_file)}")
|
|
json.dump(tracks, fp, cls=DataclassJSONEncoder, indent=2)
|
|
with tracks_pkl.open('wb') as fp:
|
|
logger.info(f"Write {len(tracks)} tracks to {str(tracks_pkl)}")
|
|
pickle.dump(dict(tracks), fp)
|
|
|
|
|
|
|
|
class TrackerWrapper():
|
|
def __init__(self, tracker):
|
|
self.tracker = tracker
|
|
|
|
def track_detections():
|
|
raise RuntimeError("Not implemented")
|
|
|
|
@classmethod
|
|
def init_type(cls, tracker_type: str):
|
|
if tracker_type == TRACKER_BYTETRACK:
|
|
return ByteTrackWrapper(BYTETracker(track_thresh=TRACKER_BYTETRACK_MINIMUM, match_thresh=TRACKER_CONFIDENCE_MINIMUM, frame_rate=12)) # TODO)) Framerate from emitter
|
|
else:
|
|
return DeepSortWrapper(DeepSort(n_init=5, max_age=30, nms_max_overlap=NON_MAXIMUM_SUPRESSION,
|
|
embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth"
|
|
))
|
|
|
|
class DeepSortWrapper(TrackerWrapper):
|
|
def track_detections(self, detections, img: cv2.Mat, frame_idx: int):
|
|
detections = Tracker.detect_persons_deepsort_wrapper(detections)
|
|
tracks: List[DeepsortTrack] = self.tracker.update_tracks(detections, frame=img)
|
|
active_tracks = [t for t in tracks if t.is_confirmed()]
|
|
return [Detection.from_deepsort(t, frame_idx) for t in active_tracks]
|
|
# raw_detections, embeds=None, frame=None, today=None, others=None, instance_masks=Non
|
|
|
|
|
|
class ByteTrackWrapper(TrackerWrapper):
|
|
def __init__(self, tracker: BYTETracker):
|
|
self.tracker = tracker
|
|
|
|
def track_detections(self, detections: tuple[list[float,float,float,float], float, float], img: cv2.Mat, frame_idx: int):
|
|
# detections
|
|
if detections.shape[0] == 0:
|
|
detections = np.ndarray((0,0)) # needs to be 2-D
|
|
|
|
_ = self.tracker.update(detections)
|
|
removed_tracks = self.tracker.removed_stracks
|
|
active_tracks = [track for track in self.tracker.tracked_stracks if track.is_activated]
|
|
# TODO)) why was this in here:
|
|
# active_tracks = [track for track in active_tracks if track.start_frame < (self.tracker.frame_id - 5)]
|
|
return [Detection.from_bytetrack(track, frame_idx) for track in active_tracks]
|
|
|
|
|
|
|
|
class Tracker:
|
|
def __init__(self, config: Namespace):
|
|
self.config = config
|
|
|
|
|
|
# # TODO: config device
|
|
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
# TODO: support removal
|
|
self.tracks: DefaultDict[str, Track] = defaultdict(lambda: Track())
|
|
|
|
|
|
logger.debug(f"Load tracker: {self.config.detector}")
|
|
|
|
conf = TRACKER_BYTETRACK_MINIMUM if self.config.tracker == TRACKER_BYTETRACK else TRACKER_CONFIDENCE_MINIMUM
|
|
if self.config.detector == DETECTOR_RETINANET:
|
|
# weights = RetinaNet_ResNet50_FPN_V2_Weights.DEFAULT
|
|
# self.model = retinanet_resnet50_fpn_v2(weights=weights, score_thresh=0.2)
|
|
weights = KeypointRCNN_ResNet50_FPN_Weights.COCO_V1
|
|
self.model = keypointrcnn_resnet50_fpn(weights=weights, box_score_thresh=conf)
|
|
self.model.to(self.device)
|
|
# Put the model in inference mode
|
|
self.model.eval()
|
|
# Get the transforms for the model's weights
|
|
self.preprocess = weights.transforms().to(self.device)
|
|
self.mot_tracker = TrackerWrapper.init_type(self.config.tracker)
|
|
elif self.config.detector == DETECTOR_FASTERRCNN:
|
|
# weights = RetinaNet_ResNet50_FPN_V2_Weights.DEFAULT
|
|
# self.model = retinanet_resnet50_fpn_v2(weights=weights, score_thresh=0.2)
|
|
weights = FasterRCNN_ResNet50_FPN_V2_Weights.COCO_V1
|
|
self.model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=conf)
|
|
self.model.to(self.device)
|
|
# Put the model in inference mode
|
|
self.model.eval()
|
|
# Get the transforms for the model's weights
|
|
self.preprocess = weights.transforms().to(self.device)
|
|
self.mot_tracker = TrackerWrapper.init_type(self.config.tracker)
|
|
elif self.config.detector == DETECTOR_MASKRCNN:
|
|
weights = MaskRCNN_ResNet50_FPN_V2_Weights.COCO_V1
|
|
self.model = maskrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=conf) # if we use ByteTrack we can work with low probablity!
|
|
self.model.to(self.device)
|
|
# Put the model in inference mode
|
|
self.model.eval()
|
|
# Get the transforms for the model's weights
|
|
self.preprocess = weights.transforms().to(self.device)
|
|
# self.mot_tracker = DeepSort(n_init=5, max_iou_distance=1, max_cosine_distance=0.2, max_age=15, nms_max_overlap=0.9,
|
|
self.mot_tracker = TrackerWrapper.init_type(self.config.tracker)
|
|
elif self.config.detector == DETECTOR_YOLOv8:
|
|
# self.model = YOLO('EXPERIMENTS/yolov8x.pt')
|
|
self.model = YOLO('yolo11x.pt')
|
|
else:
|
|
raise RuntimeError(f"{self.config.detector} is not implemented yet. See --help")
|
|
|
|
|
|
# homography = list(source.glob('*img2world.txt'))[0]
|
|
|
|
# self.H = self.config.H
|
|
|
|
if self.config.smooth_tracks:
|
|
logger.info("Smoother enabled")
|
|
fps = 12 # TODO)) make configurable, or get from cam
|
|
self.smoother = Smoother(window_len=fps*5, convolution=False)
|
|
else:
|
|
logger.info("Smoother Disabled (enable with --smooth-tracks)")
|
|
|
|
|
|
logger.debug("Set up tracker")
|
|
|
|
def track_frame(self, frame: Frame):
|
|
if self.config.detector == DETECTOR_YOLOv8:
|
|
detections: List[Detection] = _yolov8_track(frame, self.model, classes=[0, 15, 16], imgsz=[1152, 640])
|
|
else :
|
|
detections: List[Detection] = self._resnet_track(frame, scale = RCNN_SCALE)
|
|
|
|
for detection in detections:
|
|
track = self.tracks[detection.track_id]
|
|
track.track_id = detection.track_id # for new tracks
|
|
track.fps = frame.camera.fps
|
|
# track.fps = self.config.camera.fps # for new tracks
|
|
|
|
track.history.append(detection) # add to history
|
|
|
|
return detections
|
|
|
|
|
|
def track(self, is_running: Event, timer_counter: int = 0):
|
|
"""
|
|
Live tracking of frames coming in over zmq
|
|
"""
|
|
|
|
self.is_running = is_running
|
|
|
|
|
|
context = zmq.Context()
|
|
self.frame_sock = context.socket(zmq.SUB)
|
|
self.frame_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!!
|
|
self.frame_sock.setsockopt(zmq.SUBSCRIBE, b'')
|
|
self.frame_sock.connect(self.config.zmq_frame_addr)
|
|
|
|
self.trajectory_socket = context.socket(zmq.PUB)
|
|
self.trajectory_socket.setsockopt(zmq.CONFLATE, 1) # only keep latest frame
|
|
self.trajectory_socket.bind(self.config.zmq_trajectory_addr)
|
|
|
|
prev_run_time = 0
|
|
|
|
# training_fp = None
|
|
# training_csv = None
|
|
# training_frames = 0
|
|
|
|
# if self.config.save_for_training is not None:
|
|
# if not isinstance(self.config.save_for_training, Path):
|
|
# raise ValueError("save-for-training should be a path")
|
|
# if not self.config.save_for_training.exists():
|
|
# logger.info(f"Making path for training data: {self.config.save_for_training}")
|
|
# self.config.save_for_training.mkdir(parents=True, exist_ok=False)
|
|
# else:
|
|
# logger.warning(f"Path for training-data exists: {self.config.save_for_training}. Continuing assuming that's ok.")
|
|
# training_fp = open(self.config.save_for_training / 'all.txt', 'w')
|
|
# # following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py
|
|
# training_csv = csv.DictWriter(training_fp, fieldnames=['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'], delimiter='\t', quoting=csv.QUOTE_NONE)
|
|
|
|
prev_frame_i = -1
|
|
|
|
with TrainingDataWriter(self.config.save_for_training) as writer:
|
|
end_time = None
|
|
tracker_dt = None
|
|
w_time = None
|
|
displacement_filter = FinalDisplacementFilter(.2)
|
|
while self.is_running.is_set():
|
|
|
|
with timer_counter.get_lock():
|
|
timer_counter.value += 1
|
|
# this waiting for target_dt causes frame loss. E.g. with target_dt at .1, it
|
|
# skips exactly 1 frame on a 10 fps video (which, it obviously should not do)
|
|
# so for now, timing should move to emitter
|
|
# this_run_time = time.time()
|
|
# # logger.debug(f'test {prev_run_time - this_run_time}')
|
|
# time.sleep(max(0, prev_run_time - this_run_time + TARGET_DT))
|
|
# prev_run_time = time.time()
|
|
|
|
poll_time = time.time()
|
|
zmq_ev = self.frame_sock.poll(timeout=2000)
|
|
if not zmq_ev:
|
|
logger.warning('skip poll after 2000ms')
|
|
# when there's no data after timeout, loop so that is_running is checked
|
|
continue
|
|
|
|
start_time = time.time()
|
|
frame: Frame = self.frame_sock.recv_pyobj() # frame delivery in current setup: 0.012-0.03s
|
|
|
|
if frame.index > (prev_frame_i+1):
|
|
logger.warning(f"Dropped {frame.index - prev_frame_i - 1} frames ({frame.index=}, {prev_frame_i=}) -- poll time {start_time-poll_time:.5f}")
|
|
if tracker_dt:
|
|
logger.warning(f"last loop took {tracker_dt} (finished {start_time - end_time:0.5f} ago, writing took {w_time-end_time} and finshed {start_time - w_time} ago).. {writer.path}")
|
|
|
|
|
|
|
|
prev_frame_i = frame.index
|
|
# load homography into frame (TODO: should this be done in emitter?)
|
|
if frame.H is None:
|
|
raise RuntimeError('Tracker no longer configures H')
|
|
# fallback: load configured H
|
|
# frame.H = self.H
|
|
|
|
# logger.info(f"Frame delivery delay = {time.time()-frame.time}s")
|
|
|
|
|
|
detections: List[Detection] = self.track_frame(frame)
|
|
|
|
# Store detections into tracklets
|
|
projected_coordinates = []
|
|
# now in track_frame()
|
|
# for detection in detections:
|
|
# track = self.tracks[detection.track_id]
|
|
# track.track_id = detection.track_id # for new tracks
|
|
|
|
# track.history.append(detection) # add to history
|
|
# projected_coordinates.append(track.get_projected_history(self.H)) # then get full history
|
|
|
|
# TODO: hadle occlusions, and dissappearance
|
|
# if len(track.history) > 30: # retain 90 tracks for 90 frames
|
|
# track.history.pop(0)
|
|
|
|
|
|
# trajectories = {}
|
|
# for detection in detections:
|
|
# tid = str(detection.track_id)
|
|
# track = self.tracks[detection.track_id]
|
|
# coords = track.get_projected_history(self.H) # get full history
|
|
# trajectories[tid] = {
|
|
# "id": tid,
|
|
# "det_conf": detection.conf,
|
|
# "bbox": detection.to_ltwh(),
|
|
# "history": [{"x":c[0], "y":c[1]} for c in coords[0]] if not self.config.bypass_prediction else coords[0].tolist() # already doubles nested, fine for test
|
|
# }
|
|
active_track_ids = [d.track_id for d in detections]
|
|
active_tracks = {t.track_id: t.get_with_interpolated_history() for t in self.tracks.values() if t.track_id in active_track_ids}
|
|
active_tracks = displacement_filter.apply_to_dict(active_tracks, frame.camera)# a filter to remove just detecting static objects
|
|
|
|
removable_tracks = []
|
|
for track_id, track in self.tracks.items():
|
|
if not len(track.history):
|
|
continue
|
|
detection: Detection = track.history[-1]
|
|
if detection.frame_nr < (frame.index - frame.camera.fps * 5):
|
|
removable_tracks.append(track_id)
|
|
for track_id in removable_tracks:
|
|
del self.tracks[track_id]
|
|
|
|
# active_tracks = {t.track_id: t for t in self.tracks.values() if t.track_id in active_track_ids}
|
|
# active_tracks = {t.track_id: t for t in self.tracks.values() if t.track_id in active_track_ids}
|
|
# logger.info(f"{trajectories}")
|
|
frame.tracks = active_tracks
|
|
|
|
# if self.config.bypass_prediction:
|
|
# self.trajectory_socket.send_string(json.dumps(trajectories))
|
|
# else:
|
|
# self.trajectory_socket.send(pickle.dumps(frame))
|
|
if self.config.smooth_tracks:
|
|
frame = self.smoother.smooth_frame_tracks(frame)
|
|
|
|
# print(f"send to {self.trajectory_socket}, {self.config.zmq_trajectory_addr}")
|
|
self.trajectory_socket.send_pyobj(frame.without_img()) # ditch image for faster passthrough
|
|
|
|
end_time = time.time()
|
|
tracker_dt = end_time - start_time
|
|
|
|
|
|
# having {end_time-frame.time} creates incidental delay... don't know why, maybe because of send?. So add n/a for now
|
|
# or is it {len(active_tracks)} or {tracker_dt}
|
|
# logger.debug(f"Trajectories: n/a. Current frame delay = n/a s (trajectories:s)")
|
|
|
|
# self.trajectory_socket.send_string(json.dumps(trajectories))
|
|
# provide a {ID: {id: ID, history: [[x,y],[x,y],...]}}
|
|
# TODO: provide a track object that actually keeps history (unlike tracker)
|
|
|
|
# TODO calculate fps (also for other loops to see asynchonity)
|
|
# fpsfilter=fpsfilter*.9+(1/dt)*.1 #trust value in order to stabilize fps display
|
|
|
|
writer.add(frame, active_tracks.values())
|
|
|
|
w_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
logger.info('Stopping')
|
|
|
|
|
|
def _resnet_track(self, frame: Frame, scale: float = 1) -> List[Detection]:
|
|
img = frame.img
|
|
if scale != 1:
|
|
dsize = (int(img.shape[1] * scale), int(img.shape[0] * scale))
|
|
img = cv2.resize(img, dsize)
|
|
detections = self._resnet_detect_persons(img)
|
|
tracks: List[Detection] = self.mot_tracker.track_detections(detections, img, frame.index)
|
|
# active_tracks = [t for t in tracks if t.is_confirmed()]
|
|
return [d.get_scaled(1/scale) for d in tracks]
|
|
|
|
def _resnet_detect_persons(self, frame) -> List[Detection]:
|
|
t = torch.from_numpy(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
|
# change axes of image loaded image to be compatilbe with torch.io.read_image (which has C,W,H format instead of W,H,C)
|
|
t = t.permute(2, 0, 1)
|
|
|
|
batch = self.preprocess(t)[None, :].to(self.device)
|
|
# no_grad can be used on inference, should be slightly faster
|
|
with torch.no_grad():
|
|
predictions = self.model(batch)
|
|
prediction = predictions[0] # we feed only one frame at once
|
|
|
|
# TODO: check if we need e.g. cyclist
|
|
mask = prediction['labels'] == 1 # if we want more than one label: np.isin(prediction['labels'], [1,86])
|
|
|
|
scores = prediction['scores'][mask]
|
|
# print(scores, prediction['labels'])
|
|
labels = prediction['labels'][mask]
|
|
boxes = prediction['boxes'][mask]
|
|
# print(prediction['scores'])
|
|
|
|
if NON_MAXIMUM_SUPRESSION < 1:
|
|
nms_mask = torch.zeros(scores.shape[0]).bool()
|
|
nms_keep_ids = torchvision.ops.nms(boxes, scores, NON_MAXIMUM_SUPRESSION)
|
|
nms_mask[nms_keep_ids] = True
|
|
print(scores.shape[0], nms_keep_ids, nms_mask)
|
|
scores = scores[nms_mask]
|
|
labels = labels[nms_mask]
|
|
boxes = boxes[nms_mask]
|
|
|
|
# TODO: introduce confidence and NMS supression: https://github.com/cfotache/pytorch_objectdetecttrack/blob/master/PyTorch_Object_Tracking.ipynb
|
|
# (which I _think_ we better do after filtering)
|
|
# alternatively look at Soft-NMS https://towardsdatascience.com/non-maximum-suppression-nms-93ce178e177c
|
|
|
|
# dets - a numpy array of detections in the format [[x1,y1,x2,y2,score, label],[x1,y1,x2,y2,score, label],...]
|
|
detections = np.array([np.append(bbox, [score, label]) for bbox, score, label in zip(boxes.cpu(), scores.cpu(), labels.cpu())])
|
|
|
|
|
|
return detections
|
|
|
|
@classmethod
|
|
def detect_persons_deepsort_wrapper(cls, detections):
|
|
"""make detect_persons() compatible with
|
|
deep_sort_realtime tracker by going from ltrb to ltwh and
|
|
different nesting
|
|
"""
|
|
return [([d[0], d[1], d[2]-d[0], d[3]-d[1]], d[4], d[5]) for d in detections]
|
|
|
|
|
|
def run_tracker(config: Namespace, is_running: Event, timer_counter):
|
|
router = Tracker(config)
|
|
router.track(is_running, timer_counter)
|
|
|
|
def run():
|
|
# Frame emitter
|
|
import argparse
|
|
argparser = argparse.ArgumentParser()
|
|
argparser.add_argument('--zmq-frame-addr',
|
|
help='Manually specity communication addr for the frame messages',
|
|
type=str,
|
|
default="ipc:///tmp/feeds_frame")
|
|
argparser.add_argument('--zmq-trajectory-addr',
|
|
help='Manually specity communication addr for the trajectory messages',
|
|
type=str,
|
|
default="ipc:///tmp/feeds_traj")
|
|
|
|
argparser.add_argument("--save-for-training",
|
|
help="Specify the path in which to save",
|
|
type=Path,
|
|
default=None)
|
|
argparser.add_argument("--detector",
|
|
help="Specify the detector to use",
|
|
type=str,
|
|
default=DETECTOR_YOLOv8,
|
|
choices=DETECTORS)
|
|
argparser.add_argument("--tracker",
|
|
help="Specify the detector to use",
|
|
type=str,
|
|
default=TRACKER_BYTETRACK,
|
|
choices=TRACKERS)
|
|
argparser.add_argument("--smooth-tracks",
|
|
help="Smooth the tracker tracks before sending them to the predictor",
|
|
action='store_true')
|
|
config = argparser.parse_args()
|
|
is_running = multiprocessing.Event()
|
|
is_running.set()
|
|
timer_counter = timer.Timer('frame_emitter')
|
|
|
|
router = Tracker(config)
|
|
router.track(is_running, timer_counter.iterations)
|
|
is_running.clear()
|
|
|
|
|
|
class Smoother:
|
|
|
|
def __init__(self, window_len=6, convolution=False):
|
|
# for some reason this smoother messes the predictions. Probably skews the points too much??
|
|
if convolution:
|
|
self.smoother = ConvolutionSmoother(window_len=window_len, window_type='hanning', copy=None)
|
|
else:
|
|
# "Unlike Kalman filtering, which focuses on predicting and updating the current state using historical measurements, Kalman smoothing enhances the accuracy of past state values"
|
|
# see https://medium.com/@shahalkp1/kalman-smoothing-using-tsmoothie-0175260464e5
|
|
self.smoother = KalmanSmoother(component='level_trend', component_noise={'level':0.02, 'season': .01, 'trend':0.02},n_seasons = 2, copy=None)
|
|
|
|
|
|
|
|
def smooth(self, points: List[float]):
|
|
self.smoother.smooth(points)
|
|
return self.smoother.smooth_data[0]
|
|
|
|
def smooth_track(self, track: Track) -> Track:
|
|
ls = [d.l for d in track.history]
|
|
ts = [d.t for d in track.history]
|
|
ws = [d.w for d in track.history]
|
|
hs = [d.h for d in track.history]
|
|
self.smoother.smooth(ls)
|
|
ls = self.smoother.smooth_data[0]
|
|
self.smoother.smooth(ts)
|
|
ts = self.smoother.smooth_data[0]
|
|
self.smoother.smooth(ws)
|
|
ws = self.smoother.smooth_data[0]
|
|
self.smoother.smooth(hs)
|
|
hs = self.smoother.smooth_data[0]
|
|
new_history = [Detection(d.track_id, l, t, w, h, d.conf, d.state, d.frame_nr, d.det_class) for l, t, w, h, d in zip(ls,ts,ws,hs, track.history)]
|
|
return Track(track.track_id, new_history, track.predictor_history, track.predictions, track.fps)
|
|
|
|
def smooth_frame_tracks(self, frame: Frame) -> Frame:
|
|
new_tracks = []
|
|
for track in frame.tracks.values():
|
|
new_track = self.smooth_track(track)
|
|
new_tracks.append(new_track)
|
|
frame.tracks = {t.track_id: t for t in new_tracks}
|
|
return frame
|
|
|
|
def smooth_frame_predictions(self, frame: Frame) -> Frame:
|
|
|
|
for track in frame.tracks.values():
|
|
new_predictions = []
|
|
if not track.predictions:
|
|
continue
|
|
|
|
for prediction in track.predictions:
|
|
xs = [d[0] for d in prediction]
|
|
ys = [d[1] for d in prediction]
|
|
|
|
self.smoother.smooth(xs)
|
|
xs = self.smoother.smooth_data[0]
|
|
self.smoother.smooth(ys)
|
|
ys = self.smoother.smooth_data[0]
|
|
|
|
smooth_prediction = [[x,y] for x, y in zip(xs, ys)]
|
|
|
|
new_predictions.append(smooth_prediction)
|
|
track.predictions = new_predictions
|
|
|
|
return frame |