tracker tool for fast tracking data n camera undistort

This commit is contained in:
Ruben van de Ven 2024-11-06 16:22:03 +01:00
parent a0c63c4929
commit 9284ce8849
6 changed files with 272 additions and 148 deletions

View file

@ -7,6 +7,7 @@ readme = "README.md"
[tool.poetry.scripts]
trapserv = "trap.plumber:start"
tracker = "trap.tools:tracker_preprocess"
[tool.poetry.dependencies]

View file

@ -38,7 +38,7 @@ class AnimationRenderer:
self.prediction_sock = context.socket(zmq.SUB)
self.prediction_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!!
self.prediction_sock.setsockopt(zmq.SUBSCRIBE, b'')
self.prediction_sock.connect(config.zmq_prediction_addr if not self.config.bypass_prediction else config.zmq_trajectory_addr)
self.prediction_sock.connect(config.zmq_prediction_addr)
self.tracker_sock = context.socket(zmq.SUB)
self.tracker_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!!
@ -73,7 +73,7 @@ class AnimationRenderer:
# , fullscreen=self.config.render_window
display = pyglet.canvas.get_display()
screen = display.get_screens()[1]
screen = display.get_screens()[0]
# self.window = pyglet.window.Window(width=self.frame_size[0], height=self.frame_size[1], config=config, fullscreen=False, screen=screens[1])
self.window = pyglet.window.Window(width=screen.width, height=screen.height, config=config, fullscreen=True, screen=screen)
@ -108,8 +108,10 @@ class AnimationRenderer:
self.batch_anim = pyglet.graphics.Batch()
self.debug_lines = [
pyglet.shapes.Line(1380, self.config.camera.h, 1380, 690, 2, (255,255,255,255), batch=self.batch_overlay),
pyglet.shapes.Line(0, 660, 1380, 675, 2, (255,255,255,255), batch=self.batch_overlay),
pyglet.shapes.Line(1380, self.config.camera.h, 1380, 670, 2, (255,255,255,255), batch=self.batch_overlay),
pyglet.shapes.Line(0, 660, 1380, 670, 2, (255,255,255,255), batch=self.batch_overlay),
pyglet.shapes.Line(1140, 760, 1140, 675, 2, (255,255,255,255), batch=self.batch_overlay),
pyglet.shapes.Line(0, 750, 1380, 760, 2, (255,255,255,255), batch=self.batch_overlay),
]

View file

@ -120,6 +120,7 @@ class Frame:
time: float= field(default_factory=lambda: time.time())
tracks: Optional[dict[str, Track]] = None
H: Optional[np.array] = None
camera: Optional[Camera] = None
def aslist(self) -> [dict]:
return { t.track_id:
@ -134,6 +135,13 @@ class Frame:
} for t in self.tracks.values()
}
def video_src_from_config(config):
if config.video_loop:
video_srcs: Iterable[Path] = cycle(config.video_src)
else:
video_srcs: Iterable[Path] = config.video_src
return video_srcs
class FrameEmitter:
'''
Emit frame in a separate threat so they can be throttled,
@ -151,10 +159,7 @@ class FrameEmitter:
logger.info(f"Connection socket {config.zmq_frame_addr}")
if self.config.video_loop:
self.video_srcs: Iterable[Path] = cycle(self.config.video_src)
else:
self.video_srcs: [Path] = self.config.video_src
self.video_srcs: video_src_from_config(self.config)
def emit_video(self):
@ -212,7 +217,7 @@ class FrameEmitter:
# hack to mask out area
cv2.rectangle(img, (0,0), (800,200), (0,0,0), -1)
frame = Frame(index=i, img=img, H=video_H)
frame = Frame(index=i, img=img, H=self.config.H, camera=self.config.camera)
# TODO: this is very dirty, need to find another way.
# perhaps multiprocessing Array?
self.frame_sock.send(pickle.dumps(frame))

View file

@ -253,7 +253,8 @@ class PreviewRenderer:
self.prediction_sock = context.socket(zmq.SUB)
self.prediction_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!!
self.prediction_sock.setsockopt(zmq.SUBSCRIBE, b'')
self.prediction_sock.connect(config.zmq_prediction_addr if not self.config.bypass_prediction else config.zmq_trajectory_addr)
# self.prediction_sock.connect(config.zmq_prediction_addr if not self.config.bypass_prediction else config.zmq_trajectory_addr)
self.prediction_sock.connect(config.zmq_prediction_addr)
self.tracker_sock = context.socket(zmq.SUB)
self.tracker_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!!

72
trap/tools.py Normal file
View file

@ -0,0 +1,72 @@
from trap.config import parser
from trap.frame_emitter import video_src_from_config, Frame
from trap.tracker import DETECTOR_YOLOv8, _yolov8_track, Track, TrainingDataWriter
from collections import defaultdict
import logging
import cv2
from typing import List, Iterable
from ultralytics import YOLO
from ultralytics.engine.results import Results as YOLOResult
import tqdm
config = parser.parse_args()
logger = logging.getLogger('tools')
def tracker_preprocess():
video_srcs = video_src_from_config(config)
if not hasattr(config, "H"):
print("Set homography file with --homography param")
return
if config.detector != DETECTOR_YOLOv8:
print("Only YOLO for now...")
return
model = YOLO('EXPERIMENTS/yolov8x.pt')
with TrainingDataWriter(config.save_for_training) as writer:
for video_nr, video_path in enumerate(video_srcs):
logger.info(f"Play from '{str(video_path)}'")
video = cv2.VideoCapture(str(video_path))
fps = video.get(cv2.CAP_PROP_FPS)
frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)
i = 0
if config.video_offset:
logger.info(f"Start at frame {config.video_offset}")
video.set(cv2.CAP_PROP_POS_FRAMES, config.video_offset)
i = config.video_offset
bar = tqdm.tqdm()
tracks = defaultdict(lambda: Track())
while True:
bar.update()
ret, img = video.read()
i+=1
# seek to 0 if video has finished. Infinite loop
if not ret:
# now loading multiple files
break
frame = Frame(index=bar.n, img=img, H=config.H, camera=config.camera)
detections = _yolov8_track(frame, model, classes=[0])
# detections = _yolov8_track(frame, model, imgsz=1440, classes=[0])
bar.set_description(f"[{video_nr}/{len(video_srcs)}] [{i}/{frame_count}] {str(video_path)} -- Detections {len(detections)}: {[d.conf for d in detections]}")
for detection in detections:
track = tracks[detection.track_id]
track.track_id = detection.track_id # for new tracks
track.history.append(detection) # add to history
active_track_ids = [d.track_id for d in detections]
active_tracks = {t.track_id: t for t in tracks.values() if t.track_id in active_track_ids}
writer.add(frame, active_tracks.values())
logger.info("Done!")

View file

@ -8,7 +8,7 @@ from multiprocessing import Event
from pathlib import Path
import pickle
import time
from typing import Optional
from typing import Optional, List
import numpy as np
import torch
import zmq
@ -47,8 +47,89 @@ DETECTOR_YOLOv8 = 'ultralytics'
DETECTORS = [DETECTOR_RETINANET, DETECTOR_MASKRCNN, DETECTOR_FASTERRCNN, DETECTOR_YOLOv8]
def _yolov8_track(frame: Frame, model: YOLO, **kwargs) -> List[Detection]:
results: List[YOLOResult] = list(model.track(frame.img, persist=True, tracker="bytetrack.yaml", verbose=False, **kwargs))
if results[0].boxes is None or results[0].boxes.id is None:
# work around https://github.com/ultralytics/ultralytics/issues/5968
return []
return [Detection(track_id, bbox[0]-.5*bbox[2], bbox[1]-.5*bbox[3], bbox[2], bbox[3], 1, DetectionState.Confirmed, frame.index) for bbox, track_id in zip(results[0].boxes.xywh.cpu(), results[0].boxes.id.int().cpu().tolist())]
class TrainingDataWriter:
def __init__(self, training_path = Optional[Path]):
if training_path is None:
self.path = None
return
if not isinstance(training_path, Path):
raise ValueError("save-for-training should be a path")
if not training_path.exists():
logger.info(f"Making path for training data: {training_path}")
training_path.mkdir(parents=True, exist_ok=False)
else:
logger.warning(f"Path for training-data exists: {training_path}. Continuing assuming that's ok.")
# following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py
self.path = training_path
def __enter__(self):
if self.path:
self.training_fp = open(self.path / 'all.txt', 'w')
# following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py
self.csv = csv.DictWriter(self.training_fp, fieldnames=['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'], delimiter='\t', quoting=csv.QUOTE_NONE)
self.count = 0
return self
def add(self, frame: Frame, tracks: List[Track]):
if not self.path:
# skip if disabled
return
self.csv.writerows([{
'frame_id': round(frame.index * 10., 1), # not really time
'track_id': t.track_id,
'l': float(t.history[-1].l), # to float, so we're sure it's not a torch.tensor()
't': float(t.history[-1].t),
'w': float(t.history[-1].w),
'h': float(t.history[-1].h),
'x': t.get_projected_history(frame.H, frame.camera)[-1][0],
'y': t.get_projected_history(frame.H, frame.camera)[-1][1],
'state': t.history[-1].state.value
# only keep _actual_detections, no lost entries
} for t in tracks
# if t.history[-1].state != DetectionState.Lost
])
self.count += len(tracks)
def __exit__(self, exc_type, exc_value, exc_tb):
# ... ignore exception (type, value, traceback)
if not self.path:
return
self.training_fp.close()
lines = {
'train': int(self.count * .8),
'val': int(self.count * .12),
'test': int(self.count * .08),
}
logger.info(f"Splitting gathered data from {self.training_fp.name}")
with open(self.training_fp.name, 'r') as source_fp:
for name, line_nrs in lines.items():
dir_path = self.path / name
dir_path.mkdir(exist_ok=True)
file = dir_path / 'tracked.txt'
logger.debug(f"- Write {line_nrs} lines to {file}")
with file.open('w') as target_fp:
for i in range(line_nrs):
target_fp.write(source_fp.readline())
class Tracker:
def __init__(self, config: Namespace, is_running: Event):
self.config = config
@ -98,7 +179,7 @@ class Tracker:
# embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth"
)
elif self.config.detector == DETECTOR_YOLOv8:
self.model = YOLO('EXPERIMENTS/yolov8x.pt')
self.model = YOLO('EXPERIMENTS/yolov8x.pt', classes=0)
else:
raise RuntimeError(f"{self.config.detector} is not implemented yet. See --help")
@ -120,156 +201,118 @@ class Tracker:
def track(self):
prev_run_time = 0
training_fp = None
training_csv = None
training_frames = 0
# training_fp = None
# training_csv = None
# training_frames = 0
if self.config.save_for_training is not None:
if not isinstance(self.config.save_for_training, Path):
raise ValueError("save-for-training should be a path")
if not self.config.save_for_training.exists():
logger.info(f"Making path for training data: {self.config.save_for_training}")
self.config.save_for_training.mkdir(parents=True, exist_ok=False)
else:
logger.warning(f"Path for training-data exists: {self.config.save_for_training}. Continuing assuming that's ok.")
training_fp = open(self.config.save_for_training / 'all.txt', 'w')
# following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py
training_csv = csv.DictWriter(training_fp, fieldnames=['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'], delimiter='\t', quoting=csv.QUOTE_NONE)
# if self.config.save_for_training is not None:
# if not isinstance(self.config.save_for_training, Path):
# raise ValueError("save-for-training should be a path")
# if not self.config.save_for_training.exists():
# logger.info(f"Making path for training data: {self.config.save_for_training}")
# self.config.save_for_training.mkdir(parents=True, exist_ok=False)
# else:
# logger.warning(f"Path for training-data exists: {self.config.save_for_training}. Continuing assuming that's ok.")
# training_fp = open(self.config.save_for_training / 'all.txt', 'w')
# # following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py
# training_csv = csv.DictWriter(training_fp, fieldnames=['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'], delimiter='\t', quoting=csv.QUOTE_NONE)
prev_frame_i = -1
while self.is_running.is_set():
# this waiting for target_dt causes frame loss. E.g. with target_dt at .1, it
# skips exactly 1 frame on a 10 fps video (which, it obviously should not do)
# so for now, timing should move to emitter
# this_run_time = time.time()
# # logger.debug(f'test {prev_run_time - this_run_time}')
# time.sleep(max(0, prev_run_time - this_run_time + TARGET_DT))
# prev_run_time = time.time()
with TrainingDataWriter(self.config.save_for_training) as writer:
while self.is_running.is_set():
# this waiting for target_dt causes frame loss. E.g. with target_dt at .1, it
# skips exactly 1 frame on a 10 fps video (which, it obviously should not do)
# so for now, timing should move to emitter
# this_run_time = time.time()
# # logger.debug(f'test {prev_run_time - this_run_time}')
# time.sleep(max(0, prev_run_time - this_run_time + TARGET_DT))
# prev_run_time = time.time()
zmq_ev = self.frame_sock.poll(timeout=2000)
if not zmq_ev:
logger.warn('skip poll after 2000ms')
# when there's no data after timeout, loop so that is_running is checked
continue
zmq_ev = self.frame_sock.poll(timeout=2000)
if not zmq_ev:
logger.warn('skip poll after 2000ms')
# when there's no data after timeout, loop so that is_running is checked
continue
start_time = time.time()
frame: Frame = self.frame_sock.recv_pyobj() # frame delivery in current setup: 0.012-0.03s
start_time = time.time()
frame: Frame = self.frame_sock.recv_pyobj() # frame delivery in current setup: 0.012-0.03s
if frame.index > (prev_frame_i+1):
logger.warn(f"Dropped {frame.index - prev_frame_i - 1} frames ({frame.index=}, {prev_frame_i=})")
if frame.index > (prev_frame_i+1):
logger.warn(f"Dropped {frame.index - prev_frame_i - 1} frames ({frame.index=}, {prev_frame_i=})")
prev_frame_i = frame.index
# load homography into frame (TODO: should this be done in emitter?)
if frame.H is None:
# logger.warning('Falling back to default H')
# fallback: load configured H
frame.H = self.H
# logger.info(f"Frame delivery delay = {time.time()-frame.time}s")
if self.config.detector == DETECTOR_YOLOv8:
detections: [Detection] = self._yolov8_track(frame)
else :
detections: [Detection] = self._resnet_track(frame.img, scale = 1)
# Store detections into tracklets
projected_coordinates = []
for detection in detections:
track = self.tracks[detection.track_id]
track.track_id = detection.track_id # for new tracks
prev_frame_i = frame.index
# load homography into frame (TODO: should this be done in emitter?)
if frame.H is None:
# logger.warning('Falling back to default H')
# fallback: load configured H
frame.H = self.H
track.history.append(detection) # add to history
# projected_coordinates.append(track.get_projected_history(self.H)) # then get full history
# logger.info(f"Frame delivery delay = {time.time()-frame.time}s")
if self.config.detector == DETECTOR_YOLOv8:
detections: [Detection] = _yolov8_track(frame, self.model)
else :
detections: [Detection] = self._resnet_track(frame.img, scale = 1)
# TODO: hadle occlusions, and dissappearance
# if len(track.history) > 30: # retain 90 tracks for 90 frames
# track.history.pop(0)
# trajectories = {}
# for detection in detections:
# tid = str(detection.track_id)
# track = self.tracks[detection.track_id]
# coords = track.get_projected_history(self.H) # get full history
# trajectories[tid] = {
# "id": tid,
# "det_conf": detection.conf,
# "bbox": detection.to_ltwh(),
# "history": [{"x":c[0], "y":c[1]} for c in coords[0]] if not self.config.bypass_prediction else coords[0].tolist() # already doubles nested, fine for test
# }
active_track_ids = [d.track_id for d in detections]
active_tracks = {t.track_id: t for t in self.tracks.values() if t.track_id in active_track_ids}
# logger.info(f"{trajectories}")
frame.tracks = active_tracks
# if self.config.bypass_prediction:
# self.trajectory_socket.send_string(json.dumps(trajectories))
# else:
# self.trajectory_socket.send(pickle.dumps(frame))
if self.config.smooth_tracks:
frame = self.smoother.smooth_frame_tracks(frame)
self.trajectory_socket.send_pyobj(frame)
current_time = time.time()
logger.debug(f"Trajectories: {len(active_tracks)}. Current frame delay = {current_time-frame.time}s (trajectories: {current_time - start_time}s)")
# self.trajectory_socket.send_string(json.dumps(trajectories))
# provide a {ID: {id: ID, history: [[x,y],[x,y],...]}}
# TODO: provide a track object that actually keeps history (unlike tracker)
#TODO calculate fps (also for other loops to see asynchonity)
# fpsfilter=fpsfilter*.9+(1/dt)*.1 #trust value in order to stabilize fps display
if training_csv:
training_csv.writerows([{
'frame_id': round(frame.index * 10., 1), # not really time
'track_id': t.track_id,
'l': t.history[-1].l,
't': t.history[-1].t,
'w': t.history[-1].w,
'h': t.history[-1].h,
'x': t.get_projected_history(frame.H)[-1][0],
'y': t.get_projected_history(frame.H)[-1][1],
'state': t.history[-1].state.value
# only keep _actual_detections, no lost entries
} for t in active_tracks.values()
# if t.history[-1].state != DetectionState.Lost
])
training_frames += len(active_tracks)
# print(time.time() - start_time)
if training_fp:
training_fp.close()
lines = {
'train': int(training_frames * .8),
'val': int(training_frames * .12),
'test': int(training_frames * .08),
}
logger.info(f"Splitting gathered data from {training_fp.name}")
with open(training_fp.name, 'r') as source_fp:
for name, line_nrs in lines.items():
dir_path = self.config.save_for_training / name
dir_path.mkdir(exist_ok=True)
file = dir_path / 'tracked.txt'
logger.debug(f"- Write {line_nrs} lines to {file}")
with file.open('w') as target_fp:
for i in range(line_nrs):
target_fp.write(source_fp.readline())
# Store detections into tracklets
projected_coordinates = []
for detection in detections:
track = self.tracks[detection.track_id]
track.track_id = detection.track_id # for new tracks
track.history.append(detection) # add to history
# projected_coordinates.append(track.get_projected_history(self.H)) # then get full history
# TODO: hadle occlusions, and dissappearance
# if len(track.history) > 30: # retain 90 tracks for 90 frames
# track.history.pop(0)
# trajectories = {}
# for detection in detections:
# tid = str(detection.track_id)
# track = self.tracks[detection.track_id]
# coords = track.get_projected_history(self.H) # get full history
# trajectories[tid] = {
# "id": tid,
# "det_conf": detection.conf,
# "bbox": detection.to_ltwh(),
# "history": [{"x":c[0], "y":c[1]} for c in coords[0]] if not self.config.bypass_prediction else coords[0].tolist() # already doubles nested, fine for test
# }
active_track_ids = [d.track_id for d in detections]
active_tracks = {t.track_id: t for t in self.tracks.values() if t.track_id in active_track_ids}
# logger.info(f"{trajectories}")
frame.tracks = active_tracks
# if self.config.bypass_prediction:
# self.trajectory_socket.send_string(json.dumps(trajectories))
# else:
# self.trajectory_socket.send(pickle.dumps(frame))
if self.config.smooth_tracks:
frame = self.smoother.smooth_frame_tracks(frame)
self.trajectory_socket.send_pyobj(frame)
current_time = time.time()
logger.debug(f"Trajectories: {len(active_tracks)}. Current frame delay = {current_time-frame.time}s (trajectories: {current_time - start_time}s)")
# self.trajectory_socket.send_string(json.dumps(trajectories))
# provide a {ID: {id: ID, history: [[x,y],[x,y],...]}}
# TODO: provide a track object that actually keeps history (unlike tracker)
#TODO calculate fps (also for other loops to see asynchonity)
# fpsfilter=fpsfilter*.9+(1/dt)*.1 #trust value in order to stabilize fps display
writer.add(frame, active_tracks.values())
logger.info('Stopping')
def _yolov8_track(self, frame: Frame,) -> [Detection]:
results: [YOLOResult] = self.model.track(frame.img, persist=True, tracker="bytetrack.yaml", verbose=False)
if results[0].boxes is None or results[0].boxes.id is None:
# work around https://github.com/ultralytics/ultralytics/issues/5968
return []
return [Detection(track_id, bbox[0]-.5*bbox[2], bbox[1]-.5*bbox[3], bbox[2], bbox[3], 1, DetectionState.Confirmed, frame.index) for bbox, track_id in zip(results[0].boxes.xywh.cpu(), results[0].boxes.id.int().cpu().tolist())]
def _resnet_track(self, img, scale: float = 1) -> [Detection]:
if scale != 1:
dsize = (int(img.shape[1] * scale), int(img.shape[0] * scale))