diff --git a/pyproject.toml b/pyproject.toml index 9dfdfc0..b82fdd9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ readme = "README.md" [tool.poetry.scripts] trapserv = "trap.plumber:start" +tracker = "trap.tools:tracker_preprocess" [tool.poetry.dependencies] diff --git a/trap/animation_renderer.py b/trap/animation_renderer.py index ac24b28..07f1af5 100644 --- a/trap/animation_renderer.py +++ b/trap/animation_renderer.py @@ -38,7 +38,7 @@ class AnimationRenderer: self.prediction_sock = context.socket(zmq.SUB) self.prediction_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!! self.prediction_sock.setsockopt(zmq.SUBSCRIBE, b'') - self.prediction_sock.connect(config.zmq_prediction_addr if not self.config.bypass_prediction else config.zmq_trajectory_addr) + self.prediction_sock.connect(config.zmq_prediction_addr) self.tracker_sock = context.socket(zmq.SUB) self.tracker_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!! @@ -73,7 +73,7 @@ class AnimationRenderer: # , fullscreen=self.config.render_window display = pyglet.canvas.get_display() - screen = display.get_screens()[1] + screen = display.get_screens()[0] # self.window = pyglet.window.Window(width=self.frame_size[0], height=self.frame_size[1], config=config, fullscreen=False, screen=screens[1]) self.window = pyglet.window.Window(width=screen.width, height=screen.height, config=config, fullscreen=True, screen=screen) @@ -108,8 +108,10 @@ class AnimationRenderer: self.batch_anim = pyglet.graphics.Batch() self.debug_lines = [ - pyglet.shapes.Line(1380, self.config.camera.h, 1380, 690, 2, (255,255,255,255), batch=self.batch_overlay), - pyglet.shapes.Line(0, 660, 1380, 675, 2, (255,255,255,255), batch=self.batch_overlay), + pyglet.shapes.Line(1380, self.config.camera.h, 1380, 670, 2, (255,255,255,255), batch=self.batch_overlay), + pyglet.shapes.Line(0, 660, 1380, 670, 2, (255,255,255,255), batch=self.batch_overlay), + pyglet.shapes.Line(1140, 760, 1140, 675, 2, (255,255,255,255), batch=self.batch_overlay), + pyglet.shapes.Line(0, 750, 1380, 760, 2, (255,255,255,255), batch=self.batch_overlay), ] diff --git a/trap/frame_emitter.py b/trap/frame_emitter.py index 06adf99..6d35d32 100644 --- a/trap/frame_emitter.py +++ b/trap/frame_emitter.py @@ -120,6 +120,7 @@ class Frame: time: float= field(default_factory=lambda: time.time()) tracks: Optional[dict[str, Track]] = None H: Optional[np.array] = None + camera: Optional[Camera] = None def aslist(self) -> [dict]: return { t.track_id: @@ -134,6 +135,13 @@ class Frame: } for t in self.tracks.values() } +def video_src_from_config(config): + if config.video_loop: + video_srcs: Iterable[Path] = cycle(config.video_src) + else: + video_srcs: Iterable[Path] = config.video_src + return video_srcs + class FrameEmitter: ''' Emit frame in a separate threat so they can be throttled, @@ -151,10 +159,7 @@ class FrameEmitter: logger.info(f"Connection socket {config.zmq_frame_addr}") - if self.config.video_loop: - self.video_srcs: Iterable[Path] = cycle(self.config.video_src) - else: - self.video_srcs: [Path] = self.config.video_src + self.video_srcs: video_src_from_config(self.config) def emit_video(self): @@ -212,7 +217,7 @@ class FrameEmitter: # hack to mask out area cv2.rectangle(img, (0,0), (800,200), (0,0,0), -1) - frame = Frame(index=i, img=img, H=video_H) + frame = Frame(index=i, img=img, H=self.config.H, camera=self.config.camera) # TODO: this is very dirty, need to find another way. # perhaps multiprocessing Array? self.frame_sock.send(pickle.dumps(frame)) diff --git a/trap/preview_renderer.py b/trap/preview_renderer.py index 07d6fff..2d3eb12 100644 --- a/trap/preview_renderer.py +++ b/trap/preview_renderer.py @@ -253,7 +253,8 @@ class PreviewRenderer: self.prediction_sock = context.socket(zmq.SUB) self.prediction_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!! self.prediction_sock.setsockopt(zmq.SUBSCRIBE, b'') - self.prediction_sock.connect(config.zmq_prediction_addr if not self.config.bypass_prediction else config.zmq_trajectory_addr) + # self.prediction_sock.connect(config.zmq_prediction_addr if not self.config.bypass_prediction else config.zmq_trajectory_addr) + self.prediction_sock.connect(config.zmq_prediction_addr) self.tracker_sock = context.socket(zmq.SUB) self.tracker_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!! diff --git a/trap/tools.py b/trap/tools.py new file mode 100644 index 0000000..f09244f --- /dev/null +++ b/trap/tools.py @@ -0,0 +1,72 @@ +from trap.config import parser +from trap.frame_emitter import video_src_from_config, Frame +from trap.tracker import DETECTOR_YOLOv8, _yolov8_track, Track, TrainingDataWriter +from collections import defaultdict + +import logging +import cv2 +from typing import List, Iterable + +from ultralytics import YOLO +from ultralytics.engine.results import Results as YOLOResult +import tqdm + +config = parser.parse_args() + +logger = logging.getLogger('tools') + +def tracker_preprocess(): + video_srcs = video_src_from_config(config) + if not hasattr(config, "H"): + print("Set homography file with --homography param") + return + + if config.detector != DETECTOR_YOLOv8: + print("Only YOLO for now...") + return + + model = YOLO('EXPERIMENTS/yolov8x.pt') + + with TrainingDataWriter(config.save_for_training) as writer: + for video_nr, video_path in enumerate(video_srcs): + logger.info(f"Play from '{str(video_path)}'") + video = cv2.VideoCapture(str(video_path)) + fps = video.get(cv2.CAP_PROP_FPS) + frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT) + i = 0 + if config.video_offset: + logger.info(f"Start at frame {config.video_offset}") + video.set(cv2.CAP_PROP_POS_FRAMES, config.video_offset) + i = config.video_offset + + bar = tqdm.tqdm() + tracks = defaultdict(lambda: Track()) + + while True: + bar.update() + ret, img = video.read() + i+=1 + + # seek to 0 if video has finished. Infinite loop + if not ret: + # now loading multiple files + break + + frame = Frame(index=bar.n, img=img, H=config.H, camera=config.camera) + + detections = _yolov8_track(frame, model, classes=[0]) + # detections = _yolov8_track(frame, model, imgsz=1440, classes=[0]) + + bar.set_description(f"[{video_nr}/{len(video_srcs)}] [{i}/{frame_count}] {str(video_path)} -- Detections {len(detections)}: {[d.conf for d in detections]}") + + for detection in detections: + track = tracks[detection.track_id] + track.track_id = detection.track_id # for new tracks + track.history.append(detection) # add to history + + active_track_ids = [d.track_id for d in detections] + active_tracks = {t.track_id: t for t in tracks.values() if t.track_id in active_track_ids} + + writer.add(frame, active_tracks.values()) + + logger.info("Done!") \ No newline at end of file diff --git a/trap/tracker.py b/trap/tracker.py index ed2abff..6e15471 100644 --- a/trap/tracker.py +++ b/trap/tracker.py @@ -8,7 +8,7 @@ from multiprocessing import Event from pathlib import Path import pickle import time -from typing import Optional +from typing import Optional, List import numpy as np import torch import zmq @@ -47,8 +47,89 @@ DETECTOR_YOLOv8 = 'ultralytics' DETECTORS = [DETECTOR_RETINANET, DETECTOR_MASKRCNN, DETECTOR_FASTERRCNN, DETECTOR_YOLOv8] +def _yolov8_track(frame: Frame, model: YOLO, **kwargs) -> List[Detection]: + + results: List[YOLOResult] = list(model.track(frame.img, persist=True, tracker="bytetrack.yaml", verbose=False, **kwargs)) + if results[0].boxes is None or results[0].boxes.id is None: + # work around https://github.com/ultralytics/ultralytics/issues/5968 + return [] + + return [Detection(track_id, bbox[0]-.5*bbox[2], bbox[1]-.5*bbox[3], bbox[2], bbox[3], 1, DetectionState.Confirmed, frame.index) for bbox, track_id in zip(results[0].boxes.xywh.cpu(), results[0].boxes.id.int().cpu().tolist())] +class TrainingDataWriter: + def __init__(self, training_path = Optional[Path]): + if training_path is None: + self.path = None + return + + if not isinstance(training_path, Path): + raise ValueError("save-for-training should be a path") + if not training_path.exists(): + logger.info(f"Making path for training data: {training_path}") + training_path.mkdir(parents=True, exist_ok=False) + else: + logger.warning(f"Path for training-data exists: {training_path}. Continuing assuming that's ok.") + + # following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py + + self.path = training_path + + def __enter__(self): + if self.path: + self.training_fp = open(self.path / 'all.txt', 'w') + # following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py + self.csv = csv.DictWriter(self.training_fp, fieldnames=['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'], delimiter='\t', quoting=csv.QUOTE_NONE) + self.count = 0 + return self + + def add(self, frame: Frame, tracks: List[Track]): + if not self.path: + # skip if disabled + return + + self.csv.writerows([{ + 'frame_id': round(frame.index * 10., 1), # not really time + 'track_id': t.track_id, + 'l': float(t.history[-1].l), # to float, so we're sure it's not a torch.tensor() + 't': float(t.history[-1].t), + 'w': float(t.history[-1].w), + 'h': float(t.history[-1].h), + 'x': t.get_projected_history(frame.H, frame.camera)[-1][0], + 'y': t.get_projected_history(frame.H, frame.camera)[-1][1], + 'state': t.history[-1].state.value + # only keep _actual_detections, no lost entries + } for t in tracks + # if t.history[-1].state != DetectionState.Lost + ]) + self.count += len(tracks) + + def __exit__(self, exc_type, exc_value, exc_tb): + # ... ignore exception (type, value, traceback) + if not self.path: + return + + self.training_fp.close() + lines = { + 'train': int(self.count * .8), + 'val': int(self.count * .12), + 'test': int(self.count * .08), + } + logger.info(f"Splitting gathered data from {self.training_fp.name}") + with open(self.training_fp.name, 'r') as source_fp: + for name, line_nrs in lines.items(): + dir_path = self.path / name + dir_path.mkdir(exist_ok=True) + file = dir_path / 'tracked.txt' + logger.debug(f"- Write {line_nrs} lines to {file}") + with file.open('w') as target_fp: + for i in range(line_nrs): + target_fp.write(source_fp.readline()) + + + + + class Tracker: def __init__(self, config: Namespace, is_running: Event): self.config = config @@ -98,7 +179,7 @@ class Tracker: # embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth" ) elif self.config.detector == DETECTOR_YOLOv8: - self.model = YOLO('EXPERIMENTS/yolov8x.pt') + self.model = YOLO('EXPERIMENTS/yolov8x.pt', classes=0) else: raise RuntimeError(f"{self.config.detector} is not implemented yet. See --help") @@ -120,156 +201,118 @@ class Tracker: def track(self): prev_run_time = 0 - training_fp = None - training_csv = None - training_frames = 0 + # training_fp = None + # training_csv = None + # training_frames = 0 - if self.config.save_for_training is not None: - if not isinstance(self.config.save_for_training, Path): - raise ValueError("save-for-training should be a path") - if not self.config.save_for_training.exists(): - logger.info(f"Making path for training data: {self.config.save_for_training}") - self.config.save_for_training.mkdir(parents=True, exist_ok=False) - else: - logger.warning(f"Path for training-data exists: {self.config.save_for_training}. Continuing assuming that's ok.") - training_fp = open(self.config.save_for_training / 'all.txt', 'w') - # following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py - training_csv = csv.DictWriter(training_fp, fieldnames=['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'], delimiter='\t', quoting=csv.QUOTE_NONE) + # if self.config.save_for_training is not None: + # if not isinstance(self.config.save_for_training, Path): + # raise ValueError("save-for-training should be a path") + # if not self.config.save_for_training.exists(): + # logger.info(f"Making path for training data: {self.config.save_for_training}") + # self.config.save_for_training.mkdir(parents=True, exist_ok=False) + # else: + # logger.warning(f"Path for training-data exists: {self.config.save_for_training}. Continuing assuming that's ok.") + # training_fp = open(self.config.save_for_training / 'all.txt', 'w') + # # following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py + # training_csv = csv.DictWriter(training_fp, fieldnames=['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'], delimiter='\t', quoting=csv.QUOTE_NONE) prev_frame_i = -1 - while self.is_running.is_set(): - # this waiting for target_dt causes frame loss. E.g. with target_dt at .1, it - # skips exactly 1 frame on a 10 fps video (which, it obviously should not do) - # so for now, timing should move to emitter - # this_run_time = time.time() - # # logger.debug(f'test {prev_run_time - this_run_time}') - # time.sleep(max(0, prev_run_time - this_run_time + TARGET_DT)) - # prev_run_time = time.time() + with TrainingDataWriter(self.config.save_for_training) as writer: + while self.is_running.is_set(): + # this waiting for target_dt causes frame loss. E.g. with target_dt at .1, it + # skips exactly 1 frame on a 10 fps video (which, it obviously should not do) + # so for now, timing should move to emitter + # this_run_time = time.time() + # # logger.debug(f'test {prev_run_time - this_run_time}') + # time.sleep(max(0, prev_run_time - this_run_time + TARGET_DT)) + # prev_run_time = time.time() - zmq_ev = self.frame_sock.poll(timeout=2000) - if not zmq_ev: - logger.warn('skip poll after 2000ms') - # when there's no data after timeout, loop so that is_running is checked - continue + zmq_ev = self.frame_sock.poll(timeout=2000) + if not zmq_ev: + logger.warn('skip poll after 2000ms') + # when there's no data after timeout, loop so that is_running is checked + continue - start_time = time.time() - frame: Frame = self.frame_sock.recv_pyobj() # frame delivery in current setup: 0.012-0.03s + start_time = time.time() + frame: Frame = self.frame_sock.recv_pyobj() # frame delivery in current setup: 0.012-0.03s - if frame.index > (prev_frame_i+1): - logger.warn(f"Dropped {frame.index - prev_frame_i - 1} frames ({frame.index=}, {prev_frame_i=})") + if frame.index > (prev_frame_i+1): + logger.warn(f"Dropped {frame.index - prev_frame_i - 1} frames ({frame.index=}, {prev_frame_i=})") - - prev_frame_i = frame.index - # load homography into frame (TODO: should this be done in emitter?) - if frame.H is None: - # logger.warning('Falling back to default H') - # fallback: load configured H - frame.H = self.H - - # logger.info(f"Frame delivery delay = {time.time()-frame.time}s") - - - if self.config.detector == DETECTOR_YOLOv8: - detections: [Detection] = self._yolov8_track(frame) - else : - detections: [Detection] = self._resnet_track(frame.img, scale = 1) - - # Store detections into tracklets - projected_coordinates = [] - for detection in detections: - track = self.tracks[detection.track_id] - track.track_id = detection.track_id # for new tracks + prev_frame_i = frame.index + # load homography into frame (TODO: should this be done in emitter?) + if frame.H is None: + # logger.warning('Falling back to default H') + # fallback: load configured H + frame.H = self.H - track.history.append(detection) # add to history - # projected_coordinates.append(track.get_projected_history(self.H)) # then get full history + # logger.info(f"Frame delivery delay = {time.time()-frame.time}s") + + + if self.config.detector == DETECTOR_YOLOv8: + detections: [Detection] = _yolov8_track(frame, self.model) + else : + detections: [Detection] = self._resnet_track(frame.img, scale = 1) - # TODO: hadle occlusions, and dissappearance - # if len(track.history) > 30: # retain 90 tracks for 90 frames - # track.history.pop(0) - - - # trajectories = {} - # for detection in detections: - # tid = str(detection.track_id) - # track = self.tracks[detection.track_id] - # coords = track.get_projected_history(self.H) # get full history - # trajectories[tid] = { - # "id": tid, - # "det_conf": detection.conf, - # "bbox": detection.to_ltwh(), - # "history": [{"x":c[0], "y":c[1]} for c in coords[0]] if not self.config.bypass_prediction else coords[0].tolist() # already doubles nested, fine for test - # } - active_track_ids = [d.track_id for d in detections] - active_tracks = {t.track_id: t for t in self.tracks.values() if t.track_id in active_track_ids} - # logger.info(f"{trajectories}") - frame.tracks = active_tracks - - # if self.config.bypass_prediction: - # self.trajectory_socket.send_string(json.dumps(trajectories)) - # else: - # self.trajectory_socket.send(pickle.dumps(frame)) - if self.config.smooth_tracks: - frame = self.smoother.smooth_frame_tracks(frame) - - self.trajectory_socket.send_pyobj(frame) - - current_time = time.time() - logger.debug(f"Trajectories: {len(active_tracks)}. Current frame delay = {current_time-frame.time}s (trajectories: {current_time - start_time}s)") - - # self.trajectory_socket.send_string(json.dumps(trajectories)) - # provide a {ID: {id: ID, history: [[x,y],[x,y],...]}} - # TODO: provide a track object that actually keeps history (unlike tracker) - - #TODO calculate fps (also for other loops to see asynchonity) - # fpsfilter=fpsfilter*.9+(1/dt)*.1 #trust value in order to stabilize fps display - if training_csv: - training_csv.writerows([{ - 'frame_id': round(frame.index * 10., 1), # not really time - 'track_id': t.track_id, - 'l': t.history[-1].l, - 't': t.history[-1].t, - 'w': t.history[-1].w, - 'h': t.history[-1].h, - 'x': t.get_projected_history(frame.H)[-1][0], - 'y': t.get_projected_history(frame.H)[-1][1], - 'state': t.history[-1].state.value - # only keep _actual_detections, no lost entries - } for t in active_tracks.values() - # if t.history[-1].state != DetectionState.Lost - ]) - training_frames += len(active_tracks) - # print(time.time() - start_time) - - - if training_fp: - training_fp.close() - lines = { - 'train': int(training_frames * .8), - 'val': int(training_frames * .12), - 'test': int(training_frames * .08), - } - logger.info(f"Splitting gathered data from {training_fp.name}") - with open(training_fp.name, 'r') as source_fp: - for name, line_nrs in lines.items(): - dir_path = self.config.save_for_training / name - dir_path.mkdir(exist_ok=True) - file = dir_path / 'tracked.txt' - logger.debug(f"- Write {line_nrs} lines to {file}") - with file.open('w') as target_fp: - for i in range(line_nrs): - target_fp.write(source_fp.readline()) + # Store detections into tracklets + projected_coordinates = [] + for detection in detections: + track = self.tracks[detection.track_id] + track.track_id = detection.track_id # for new tracks + + track.history.append(detection) # add to history + # projected_coordinates.append(track.get_projected_history(self.H)) # then get full history + + # TODO: hadle occlusions, and dissappearance + # if len(track.history) > 30: # retain 90 tracks for 90 frames + # track.history.pop(0) + + + # trajectories = {} + # for detection in detections: + # tid = str(detection.track_id) + # track = self.tracks[detection.track_id] + # coords = track.get_projected_history(self.H) # get full history + # trajectories[tid] = { + # "id": tid, + # "det_conf": detection.conf, + # "bbox": detection.to_ltwh(), + # "history": [{"x":c[0], "y":c[1]} for c in coords[0]] if not self.config.bypass_prediction else coords[0].tolist() # already doubles nested, fine for test + # } + active_track_ids = [d.track_id for d in detections] + active_tracks = {t.track_id: t for t in self.tracks.values() if t.track_id in active_track_ids} + # logger.info(f"{trajectories}") + frame.tracks = active_tracks + + # if self.config.bypass_prediction: + # self.trajectory_socket.send_string(json.dumps(trajectories)) + # else: + # self.trajectory_socket.send(pickle.dumps(frame)) + if self.config.smooth_tracks: + frame = self.smoother.smooth_frame_tracks(frame) + + self.trajectory_socket.send_pyobj(frame) + + current_time = time.time() + logger.debug(f"Trajectories: {len(active_tracks)}. Current frame delay = {current_time-frame.time}s (trajectories: {current_time - start_time}s)") + + # self.trajectory_socket.send_string(json.dumps(trajectories)) + # provide a {ID: {id: ID, history: [[x,y],[x,y],...]}} + # TODO: provide a track object that actually keeps history (unlike tracker) + + #TODO calculate fps (also for other loops to see asynchonity) + # fpsfilter=fpsfilter*.9+(1/dt)*.1 #trust value in order to stabilize fps display + writer.add(frame, active_tracks.values()) + + + logger.info('Stopping') - def _yolov8_track(self, frame: Frame,) -> [Detection]: - results: [YOLOResult] = self.model.track(frame.img, persist=True, tracker="bytetrack.yaml", verbose=False) - if results[0].boxes is None or results[0].boxes.id is None: - # work around https://github.com/ultralytics/ultralytics/issues/5968 - return [] - return [Detection(track_id, bbox[0]-.5*bbox[2], bbox[1]-.5*bbox[3], bbox[2], bbox[3], 1, DetectionState.Confirmed, frame.index) for bbox, track_id in zip(results[0].boxes.xywh.cpu(), results[0].boxes.id.int().cpu().tolist())] - + def _resnet_track(self, img, scale: float = 1) -> [Detection]: if scale != 1: dsize = (int(img.shape[1] * scale), int(img.shape[0] * scale))