From 0f9661177101edc087e2ffb49f2fcdabaed466fb Mon Sep 17 00:00:00 2001 From: Ruben van de Ven Date: Tue, 3 Dec 2024 15:21:52 +0100 Subject: [PATCH] Tweak training process --- trap/config.py | 7 + trap/cv_renderer.py | 509 ++++++++++++++++++++++++++++++++++++++ trap/frame_emitter.py | 33 ++- trap/plumber.py | 11 +- trap/prediction_server.py | 106 +++++--- trap/preview_renderer.py | 67 ++++- trap/process_data.py | 330 +++++++++++------------- trap/tools.py | 137 +++++++--- trap/tracker.py | 100 +++++--- 9 files changed, 1000 insertions(+), 300 deletions(-) create mode 100644 trap/cv_renderer.py diff --git a/trap/config.py b/trap/config.py index 37809c1..4fa179b 100644 --- a/trap/config.py +++ b/trap/config.py @@ -90,6 +90,13 @@ class CameraAction(argparse.Action): setattr(namespace, 'camera', camera) + +inference_parser.add_argument("--step-size", + # TODO)) Make dataset/model metadata + help="sample step size (should be the same as for data processing and augmentation)", + type=int, + default=1, + ) inference_parser.add_argument("--model_dir", help="directory with the model to use for inference", type=str, # TODO: make into Path diff --git a/trap/cv_renderer.py b/trap/cv_renderer.py new file mode 100644 index 0000000..8df9912 --- /dev/null +++ b/trap/cv_renderer.py @@ -0,0 +1,509 @@ +# used for "Forward Referencing of type annotations" +from __future__ import annotations + +import time +import ffmpeg +from argparse import Namespace +import datetime +import logging +from multiprocessing import Event +from multiprocessing.synchronize import Event as BaseEvent +import cv2 +import numpy as np +import json +import pyglet +import pyglet.event +import zmq +import tempfile +from pathlib import Path +import shutil +import math +from typing import Iterable, Optional + + +from pyglet import shapes +from PIL import Image + +from trap.frame_emitter import DetectionState, Frame, Track, Camera +from trap.preview_renderer import FrameWriter +from trap.tools import draw_track, draw_track_predictions, draw_track_projected, draw_trackjectron_history, to_point + + + +logger = logging.getLogger("trap.simple_renderer") + +class CvRenderer: + def __init__(self, config: Namespace, is_running: BaseEvent): + self.config = config + self.is_running = is_running + + context = zmq.Context() + self.prediction_sock = context.socket(zmq.SUB) + self.prediction_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!! + self.prediction_sock.setsockopt(zmq.SUBSCRIBE, b'') + # self.prediction_sock.connect(config.zmq_prediction_addr if not self.config.bypass_prediction else config.zmq_trajectory_addr) + self.prediction_sock.connect(config.zmq_prediction_addr) + + self.tracker_sock = context.socket(zmq.SUB) + self.tracker_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!! + self.tracker_sock.setsockopt(zmq.SUBSCRIBE, b'') + self.tracker_sock.connect(config.zmq_trajectory_addr) + + self.frame_sock = context.socket(zmq.SUB) + self.frame_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. NB. make sure this comes BEFORE connect, otherwise it's ignored!! + self.frame_sock.setsockopt(zmq.SUBSCRIBE, b'') + self.frame_sock.connect(config.zmq_frame_addr) + + + self.H = self.config.H + + + self.inv_H = np.linalg.pinv(self.H) + + # TODO: get FPS from frame_emitter + # self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720)) + self.fps = 60 + self.frame_size = (self.config.camera.w,self.config.camera.h) + self.hide_stats = False + self.out_writer = self.start_writer() if self.config.render_file else None + self.streaming_process = self.start_streaming() if self.config.render_url else None + + self.first_time: float|None = None + self.frame: Frame|None= None + self.tracker_frame: Frame|None = None + self.prediction_frame: Frame|None = None + + + # self.init_shapes() + + # self.init_labels() + + + def init_shapes(self): + ''' + Due to error when running headless, we need to configure options before extending the shapes class + ''' + class GradientLine(shapes.Line): + def __init__(self, x, y, x2, y2, width=1, color1=[255,255,255], color2=[255,255,255], batch=None, group=None): + # print('colors!', colors) + # assert len(colors) == 6 + + r, g, b, *a = color1 + self._rgba1 = (r, g, b, a[0] if a else 255) + r, g, b, *a = color2 + self._rgba2 = (r, g, b, a[0] if a else 255) + + # print('rgba', self._rgba) + + super().__init__(x, y, x2, y2, width, color1, batch=None, group=None) + # 10.2f}s" + self.labels['frame_latency'].text = f"{self.frame.time - time.time():.2f}s" + + if self.tracker_frame: + self.labels['tracker_idx'].text = f"{self.tracker_frame.index - self.frame.index}" + self.labels['tracker_time'].text = f"{self.tracker_frame.time - time.time():.3f}s" + self.labels['track_len'].text = f"{len(self.tracker_frame.tracks)} tracks" + + if self.prediction_frame: + self.labels['pred_idx'].text = f"{self.prediction_frame.index - self.frame.index}" + self.labels['pred_time'].text = f"{self.prediction_frame.time - time.time():.3f}s" + # self.labels['track_len'].text = f"{len(self.prediction_frame.tracks)} tracks" + + + # cv2.putText(img, f"{frame.index:06d}", (20,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) + # cv2.putText(img, f"{frame.time - first_time:.3f}s", (120,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) + + # if prediction_frame: + # # render Δt and Δ frames + # cv2.putText(img, f"{prediction_frame.index - frame.index}", (90,17), cv2.FONT_HERSHEY_PLAIN, 1, info_color, 1) + # cv2.putText(img, f"{prediction_frame.time - time.time():.2f}s", (200,17), cv2.FONT_HERSHEY_PLAIN, 1, info_color, 1) + # cv2.putText(img, f"{len(prediction_frame.tracks)} tracks", (500,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) + # cv2.putText(img, f"h: {np.average([len(t.history or []) for t in prediction_frame.tracks.values()]):.2f}", (580,17), cv2.FONT_HERSHEY_PLAIN, 1, info_color, 1) + # cv2.putText(img, f"ph: {np.average([len(t.predictor_history or []) for t in prediction_frame.tracks.values()]):.2f}", (660,17), cv2.FONT_HERSHEY_PLAIN, 1, info_color, 1) + # cv2.putText(img, f"p: {np.average([len(t.predictions or []) for t in prediction_frame.tracks.values()]):.2f}", (740,17), cv2.FONT_HERSHEY_PLAIN, 1, info_color, 1) + + # options = [] + # for option in ['prediction_horizon','num_samples','full_dist','gmm_mode','z_mode', 'model_dir']: + # options.append(f"{option}: {config.__dict__[option]}") + + + # cv2.putText(img, options.pop(-1), (20,img.shape[0]-30), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) + # cv2.putText(img, " | ".join(options), (20,img.shape[0]-10), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) + + + + def check_frames(self, dt): + new_tracks = False + try: + self.frame: Frame = self.frame_sock.recv_pyobj(zmq.NOBLOCK) + if not self.first_time: + self.first_time = self.frame.time + img = cv2.GaussianBlur(self.frame.img, (15, 15), 0) + img = cv2.flip(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), 0) + img = pyglet.image.ImageData(self.frame_size[0], self.frame_size[1], 'RGB', img.tobytes()) + # don't draw in batch, so that it is the background + self.video_sprite = pyglet.sprite.Sprite(img=img, batch=self.batch_bg) + self.video_sprite.opacity = 100 + except zmq.ZMQError as e: + # idx = frame.index if frame else "NONE" + # logger.debug(f"reuse video frame {idx}") + pass + try: + self.prediction_frame: Frame = self.prediction_sock.recv_pyobj(zmq.NOBLOCK) + new_tracks = True + except zmq.ZMQError as e: + pass + try: + self.tracker_frame: Frame = self.tracker_sock.recv_pyobj(zmq.NOBLOCK) + new_tracks = True + except zmq.ZMQError as e: + pass + + + + def on_key_press(self, symbol, modifiers): + print('A key was pressed, use f to hide') + if symbol == ord('f'): + self.window.set_fullscreen(not self.window.fullscreen) + if symbol == ord('h'): + self.hide_stats = not self.hide_stats + + def check_running(self, dt): + if not self.is_running.is_set(): + self.window.close() + self.event_loop.exit() + + def on_close(self): + self.is_running.clear() + + def on_refresh(self, dt: float): + # update shapes + # self.bg = + for track_id, track in self.drawn_tracks.items(): + track.update_drawn_positions(dt) + + + self.refresh_labels(dt) + + # self.shape1 = shapes.Circle(700, 150, 100, color=(50, 0, 30), batch=self.batch_anim) + # self.shape3 = shapes.Circle(800, 150, 100, color=(100, 225, 30), batch=self.batch_anim) + pass + + def on_draw(self): + self.window.clear() + + self.batch_bg.draw() + + for track in self.drawn_tracks.values(): + for shape in track.shapes: + shape.draw() # for some reason the batches don't work + for track in self.drawn_tracks.values(): + for shapes in track.pred_shapes: + for shape in shapes: + shape.draw() + # self.batch_anim.draw() + self.batch_overlay.draw() + + + # pyglet.graphics.draw(3, pyglet.gl.GL_LINE, ("v2i", (100,200, 600,800)), ('c3B', (255,255,255, 255,255,255))) + + + + if not self.hide_stats: + self.fps_display.draw() + + # if streaming, capture buffer and send + try: + if self.streaming_process or self.out_writer: + buf = pyglet.image.get_buffer_manager().get_color_buffer() + img_data = buf.get_image_data() + data = img_data.get_data() # alternative: .get_data("RGBA", image_data.pitch) + img = np.asanyarray(data).reshape((img_data.height, img_data.width, 4)) + img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB) + img = np.flip(img, 0) + # img = cv2.flip(img, cv2.0) + + # cv2.imshow('frame', img) + # cv2.waitKey(1) + if self.streaming_process: + self.streaming_process.stdin.write(img.tobytes()) + if self.out_writer: + self.out_writer.write(img) + except Exception as e: + logger.exception(e) + + + def start_writer(self): + if not self.config.output_dir.exists(): + raise FileNotFoundError("Path does not exist") + + date_str = datetime.datetime.now().isoformat(timespec="minutes") + filename = self.config.output_dir / f"render_predictions-{date_str}-{self.config.detector}.mp4" + logger.info(f"Write to {filename}") + + return FrameWriter(str(filename), self.fps, self.frame_size) + + fourcc = cv2.VideoWriter_fourcc(*'vp09') + + return cv2.VideoWriter(str(filename), fourcc, self.fps, self.frame_size) + + def start_streaming(self): + return ( + ffmpeg + .input('pipe:', format='rawvideo',codec="rawvideo", pix_fmt='bgr24', s='{}x{}'.format(*self.frame_size)) + .output( + self.config.render_url, + #codec = "copy", # use same codecs of the original video + codec='libx264', + listen=1, # enables HTTP server + pix_fmt="yuv420p", + preset="ultrafast", + tune="zerolatency", + # g=f"{self.fps*2}", + g=f"{60*2}", + analyzeduration="2000000", + probesize="1000000", + f='mpegts' + ) + .overwrite_output() + .run_async(pipe_stdin=True) + ) + # return process + + + + + def run(self): + frame = None + prediction_frame = None + tracker_frame = None + + i=0 + first_time = None + + while self.is_running.is_set(): + i+=1 + + + # zmq_ev = self.frame_sock.poll(timeout=2000) + # if not zmq_ev: + # # when no data comes in, loop so that is_running is checked + # continue + + try: + frame: Frame = self.frame_sock.recv_pyobj(zmq.NOBLOCK) + except zmq.ZMQError as e: + # idx = frame.index if frame else "NONE" + # logger.debug(f"reuse video frame {idx}") + pass + # else: + # logger.debug(f'new video frame {frame.index}') + + + if frame is None: + # might need to wait a few iterations before first frame comes available + time.sleep(.1) + continue + + try: + prediction_frame: Frame = self.prediction_sock.recv_pyobj(zmq.NOBLOCK) + except zmq.ZMQError as e: + logger.debug(f'reuse prediction') + + try: + tracker_frame: Frame = self.tracker_sock.recv_pyobj(zmq.NOBLOCK) + except zmq.ZMQError as e: + logger.debug(f'reuse tracks') + + if first_time is None: + first_time = frame.time + + img = decorate_frame(frame, tracker_frame, prediction_frame, first_time, self.config) + + img_path = (self.config.output_dir / f"{i:05d}.png").resolve() + + logger.debug(f"write frame {frame.time - first_time:.3f}s") + if self.out_writer: + self.out_writer.write(img) + if self.streaming_process: + self.streaming_process.stdin.write(img.tobytes()) + if self.config.render_window: + cv2.imshow('frame',img) + cv2.waitKey(1) + logger.info('Stopping') + + # if i>2: + if self.streaming_process: + self.streaming_process.stdin.close() + if self.out_writer: + self.out_writer.release() + if self.streaming_process: + # oddly wrapped, because both close and release() take time. + logger.info('wait for closing stream') + self.streaming_process.wait() + + logger.info('stopped') +# colorset = itertools.product([0,255], repeat=3) # but remove white +# colorset = [(0, 0, 0), +# (0, 0, 255), +# (0, 255, 0), +# (0, 255, 255), +# (255, 0, 0), +# (255, 0, 255), +# (255, 255, 0) +# ] +colorset = [ + (255,255,100), + (255,100,255), + (100,255,255), + ] +# colorset = [ +# (0,0,0), +# ] + +def convert_world_space_to_img_space(H: cv2.Mat): + """Transform the given matrix so that it immediately converts + the points to img space""" + new_H = H.copy() + new_H[:2] = H[:2] * 100 + return new_H + +def convert_world_points_to_img_points(points: Iterable): + """Transform the given matrix so that it immediately converts + the points to img space""" + if isinstance(points, np.ndarray): + return np.array(points) * 100 + return [[p[0]*100, p[1]*100] for p in points] + +# Deprecated +def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame, first_time: float, config: Namespace) -> np.array: + # TODO: replace opencv with QPainter to support alpha? https://doc.qt.io/qtforpython-5/PySide2/QtGui/QPainter.html#PySide2.QtGui.PySide2.QtGui.QPainter.drawImage + # or https://github.com/pygobject/pycairo?tab=readme-ov-file + # or https://pyglet.readthedocs.io/en/latest/programming_guide/shapes.html + # and use http://code.astraw.com/projects/motmot/pygarrayimage.html or https://gist.github.com/nkymut/1cb40ea6ae4de0cf9ded7332f1ca0d55 + # or https://api.arcade.academy/en/stable/index.html (supports gradient color in line -- "Arcade is built on top of Pyglet and OpenGL.") + + undistorted_img = cv2.undistort(frame.img, config.camera.mtx, config.camera.dist, None, config.camera.newcameramtx) + dst_img = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H),(config.camera.w,config.camera.h)) + + overlay = np.zeros(dst_img.shape, np.uint8) + # Fill image with red color(set each pixel to red) + overlay[:] = (0, 0, 0) + + img = cv2.addWeighted(dst_img, .7, overlay, .3, 0) + # img = frame.img.copy() + + # all not working: + # if i == 1: + # # thanks to GpG for fixing scaling issue: https://stackoverflow.com/a/39668864 + # scale_factor = 1./20 # from 10m to 1000px + # S = np.array([[scale_factor, 0,0],[0,scale_factor,0 ],[ 0,0,1 ]]) + # new_H = S * self.H * np.linalg.inv(S) + # warpedFrame = cv2.warpPerspective(img, new_H, (1000,1000)) + # cv2.imwrite(str(self.config.output_dir / "orig.png"), warpedFrame) + cv2.rectangle(img, (0,0), (img.shape[1],25), (0,0,0), -1) + + if not tracker_frame: + cv2.putText(img, f"and track", (650,17), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,0), 1) + else: + for track_id, track in tracker_frame.tracks.items(): + inv_H = np.linalg.pinv(tracker_frame.H) + draw_track_projected(img, track, int(track_id), config.camera, convert_world_points_to_img_points) + + if not prediction_frame: + cv2.putText(img, f"Waiting for prediction...", (500,17), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,0), 1) + # continue + else: + for track_id, track in prediction_frame.tracks.items(): + inv_H = np.linalg.pinv(prediction_frame.H) + # draw_track(img, track, int(track_id)) + draw_trackjectron_history(img, track, int(track_id), convert_world_points_to_img_points) + draw_track_predictions(img, track, int(track_id)+1, config.camera, convert_world_points_to_img_points) + cv2.putText(img, f"{len(track.predictor_history) if track.predictor_history else 'none'}", to_point(track.history[0].get_foot_coords()), cv2.FONT_HERSHEY_COMPLEX, 1, (255,255,255), 1) + + base_color = (255,)*3 + info_color = (255,255,0) + + cv2.putText(img, f"{frame.index:06d}", (20,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) + cv2.putText(img, f"{frame.time - first_time:.3f}s", (120,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) + + if prediction_frame: + # render Δt and Δ frames + cv2.putText(img, f"{prediction_frame.index - frame.index}", (90,17), cv2.FONT_HERSHEY_PLAIN, 1, info_color, 1) + cv2.putText(img, f"{prediction_frame.time - time.time():.2f}s", (200,17), cv2.FONT_HERSHEY_PLAIN, 1, info_color, 1) + cv2.putText(img, f"{len(prediction_frame.tracks)} tracks", (500,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) + cv2.putText(img, f"h: {np.average([len(t.history or []) for t in prediction_frame.tracks.values()]):.2f}", (580,17), cv2.FONT_HERSHEY_PLAIN, 1, info_color, 1) + cv2.putText(img, f"ph: {np.average([len(t.predictor_history or []) for t in prediction_frame.tracks.values()]):.2f}", (660,17), cv2.FONT_HERSHEY_PLAIN, 1, info_color, 1) + cv2.putText(img, f"p: {np.average([len(t.predictions or []) for t in prediction_frame.tracks.values()]):.2f}", (740,17), cv2.FONT_HERSHEY_PLAIN, 1, info_color, 1) + + options = [] + for option in ['prediction_horizon','num_samples','full_dist','gmm_mode','z_mode', 'model_dir']: + options.append(f"{option}: {config.__dict__[option]}") + + + cv2.putText(img, options.pop(-1), (20,img.shape[0]-30), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) + cv2.putText(img, " | ".join(options), (20,img.shape[0]-10), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) + + return img + + +def run_cv_renderer(config: Namespace, is_running: BaseEvent): + renderer = CvRenderer(config, is_running) + renderer.run() \ No newline at end of file diff --git a/trap/frame_emitter.py b/trap/frame_emitter.py index 3b2650a..4845f36 100644 --- a/trap/frame_emitter.py +++ b/trap/frame_emitter.py @@ -166,8 +166,9 @@ class Track: predictor_history: Optional[list] = None # in image space predictions: Optional[list] = None fps: int = 12 + source: Optional[int] = None # to keep track of processed tracks - def get_projected_history(self, H, camera: Optional[Camera]= None) -> np.array: + def get_projected_history(self, H: Optional[cv2.Mat] = None, camera: Optional[Camera]= None) -> np.array: foot_coordinates = [d.get_foot_coords() for d in self.history] # TODO)) Undistort points before perspective transform if len(foot_coordinates): @@ -188,11 +189,15 @@ class Track: # new_history = [Detection(d.track_id, l, t, w, h, d.conf, d.state, d.frame_nr, d.det_class) for l, t, w, h, d in zip(ls,ts,ws,hs, track.history)] # new_track = Track(track.track_id, new_history, track.predictor_history, track.predictions) new_history = [] - for j in range(len(self.history)-1): + for j in range(len(self.history)): a = self.history[j] + new_history.append(Detection(a.track_id, a.l, a.t, a.w, a.h, a.conf, a.state, a.frame_nr, a.det_class)) + + if j+1 >= len(self.history): + break + b = self.history[j+1] gap = b.frame_nr - a.frame_nr - new_history.append(Detection(a.track_id, a.l, a.t, a.w, a.h, a.conf, a.state, a.frame_nr, a.det_class)) if gap < 1: logger.error(f"WARNING, gap between frames {a.frame_nr} -> {b.frame_nr} is negative?") if gap > 1: @@ -213,11 +218,29 @@ class Track: self.predictions, self.fps) + def is_complete(self): + diffs = [(b.frame_nr - a.frame_nr) for a,b in zip(self.history[:-1], self.history[1:])] + return any([d != 1 for d in diffs]) + + + def get_sampled(self, step_size = 1, offset=0): + if not self.is_complete(): + t = self.get_with_interpolated_history() + else: + t = self + + return Track( + t.track_id, + t.history[offset::step_size], + t.predictor_history, + t.predictions, + t.fps/step_size) + def to_trajectron_node(self, camera: Camera, env: Environment) -> Node: positions = self.get_projected_history(None, camera) - velocity = np.gradient(positions, self.fps, axis=0) - acceleration = np.gradient(velocity, self.fps, axis=0) + velocity = np.gradient(positions, 1/self.fps, axis=0) + acceleration = np.gradient(velocity, 1/self.fps, axis=0) new_first_idx = self.history[0].frame_nr diff --git a/trap/plumber.py b/trap/plumber.py index e91c0e4..e1a7b83 100644 --- a/trap/plumber.py +++ b/trap/plumber.py @@ -7,6 +7,7 @@ import signal import sys import time from trap.config import parser +from trap.cv_renderer import run_cv_renderer from trap.frame_emitter import run_frame_emitter from trap.prediction_server import run_prediction_server from trap.preview_renderer import run_preview_renderer @@ -94,11 +95,13 @@ def start(): if args.render_file or args.render_url or args.render_window: if not args.render_no_preview or args.render_file or args.render_url: procs.append( - ExceptionHandlingProcess(target=run_preview_renderer, kwargs={'config': args, 'is_running': isRunning}, name='preview') + # ExceptionHandlingProcess(target=run_cv_renderer, kwargs={'config': args, 'is_running': isRunning}, name='preview') + ExceptionHandlingProcess(target=run_cv_renderer, kwargs={'config': args, 'is_running': isRunning}, name='preview') + ) + if args.render_no_preview: + procs.append( + ExceptionHandlingProcess(target=run_animation_renderer, kwargs={'config': args, 'is_running': isRunning}, name='renderer') ) - procs.append( - ExceptionHandlingProcess(target=run_animation_renderer, kwargs={'config': args, 'is_running': isRunning}, name='renderer') - ) if not args.bypass_prediction: procs.append( diff --git a/trap/prediction_server.py b/trap/prediction_server.py index c3434df..cfdcaf2 100644 --- a/trap/prediction_server.py +++ b/trap/prediction_server.py @@ -289,6 +289,8 @@ class PredictionServer: # on no data loop so that is_running is checked continue + t_init = time.time() + data = self.trajectory_socket.recv() # print('recv tracker frame') frame: Frame = pickle.loads(data) @@ -308,39 +310,55 @@ class PredictionServer: # TODO: modify this into a mapping function between JS data an the expected Node format # node = FakeNode(online_env.NodeType.PEDESTRIAN) - history = [[h['x'], h['y']] for h in track.get_projected_history_as_dict(frame.H, self.config.camera)] - if self.config.cm_to_m: - history = history_cm_to_m(history) + + if self.config.step_size > 1: + if (len(track.history) % self.config.step_size) != 0: + # only add when having a new step + continue + track = track.get_sampled(self.config.step_size) - history = np.array(history) - x = history[:, 0] #- cx # we can create bigger steps by doing history[::5,0] - y = history[:, 1] #- cy # history[::5,1] - if self.config.center_data: - x -= cx - y -= cy - # TODO: calculate dt based on input - vx = derivative_of(x, .1) #eval_scene.dt - vy = derivative_of(y, .1) - ax = derivative_of(vx, .1) - ay = derivative_of(vy, .1) + if len(track.history) < 2: + continue + + node = track.to_trajectron_node(self.config.camera, online_env) + # print(node.data.data[-1]) + input_dict[node] = np.array(object=node.data.data[-1]) + # print("history", node.data.data[-10:]) + # print("get", node.get(np.array([frame.index-10,frame.index]), {'position': ['x', 'y']})) + + # history = [[h['x'], h['y']] for h in track.get_projected_history_as_dict(frame.H, self.config.camera)] + # if self.config.cm_to_m: + # history = history_cm_to_m(history) + + # history = np.array(history) + # x = history[:, 0] #- cx # we can create bigger steps by doing history[::5,0] + # y = history[:, 1] #- cy # history[::5,1] + # if self.config.center_data: + # x -= cx + # y -= cy + # # TODO: calculate dt based on input + # vx = derivative_of(x, .1) #eval_scene.dt + # vy = derivative_of(y, .1) + # ax = derivative_of(vx, .1) + # ay = derivative_of(vy, .1) - data_dict = {('position', 'x'): x[:], # [-10:-1] - ('position', 'y'): y[:], # [-10:-1] - ('velocity', 'x'): vx[:], # [-10:-1] - ('velocity', 'y'): vy[:], # [-10:-1] - ('acceleration', 'x'): ax[:], # [-10:-1] - ('acceleration', 'y'): ay[:]} # [-10:-1] - data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']]) + # data_dict = {('position', 'x'): x[:], # [-10:-1] + # ('position', 'y'): y[:], # [-10:-1] + # ('velocity', 'x'): vx[:], # [-10:-1] + # ('velocity', 'y'): vy[:], # [-10:-1] + # ('acceleration', 'x'): ax[:], # [-10:-1] + # ('acceleration', 'y'): ay[:]} # [-10:-1] + # data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']]) - node_data = pd.DataFrame(data_dict, columns=data_columns) - node = Node( - node_type=online_env.NodeType.PEDESTRIAN, - node_id=identifier, - data=node_data, - first_timestep=timestep - ) + # node_data = pd.DataFrame(data_dict, columns=data_columns) + # node = Node( + # node_type=online_env.NodeType.PEDESTRIAN, + # node_id=identifier, + # data=node_data, + # first_timestep=timestep + # ) - input_dict[node] = np.array(object=[x[-1],y[-1],vx[-1],vy[-1],ax[-1],ay[-1]]) + # input_dict[node] = np.array(object=[x[-1],y[-1],vx[-1],vy[-1],ax[-1],ay[-1]]) # print(input_dict) @@ -350,7 +368,8 @@ class PredictionServer: # And want to update the network # data = json.dumps({}) - self.send_frame(frame) + # TODO)) signal doing nothing + # self.send_frame(frame) continue @@ -375,6 +394,7 @@ class PredictionServer: # in the OnlineMultimodalGenerativeCVAE (see trajectron.model.online_mgcvae.py) each node's distribution # is put stored in self.latent.p_dist by OnlineMultimodalGenerativeCVAE.p_z_x(). Type: torch.distributions.OneHotCategorical # Later sampling in discrete_latent.py: DiscreteLatent.sample_p() + # print(input_dict) dists, preds = trajectron.incremental_forward(input_dict, maps, prediction_horizon=self.config.prediction_horizon, # TODO: make variable @@ -383,11 +403,7 @@ class PredictionServer: gmm_mode=self.config.gmm_mode, # "If True: The mode of the Gaussian Mixture Model (GMM) is sampled (see trajectron.model.mgcvae.py)" z_mode=self.config.z_mode # "Predictions from the model’s most-likely high-level latent behavior mode" (see trajecton.models.components.discrete_latent:sample_p(most_likely_z=z_mode)) ) - end = time.time() - logger.debug("took %.2f s (= %.2f Hz) w/ %d nodes and %d edges" % (end - start, - 1. / (end - start), len(trajectron.nodes), - trajectron.scene_graph.get_num_edges())) - + # unsure what this bit from online_prediction.py does: # detailed_preds_dict = dict() # for node in eval_scene.nodes: @@ -399,16 +415,22 @@ class PredictionServer: # histories_dict provides the trajectory used for prediction # futures_dict is the Ground Truth, which is unvailable in an online setting - prediction_dict, histories_dict, futures_dict = prediction_output_to_trajectories({timestep: preds}, + prediction_dict, histories_dict, futures_dict = prediction_output_to_trajectories({frame.index: preds}, eval_scene.dt, hyperparams['maximum_history_length'], hyperparams['prediction_horizon'] ) + - + end = time.time() + logger.debug("took %.2f s (= %.2f Hz) w/ %d nodes and %d edges -- init: %.2f s" % (end - start, + 1. / (end - start), len(trajectron.nodes), + trajectron.scene_graph.get_num_edges(), start-t_init)) + # if self.config.center_data: # prediction_dict, histories_dict, futures_dict = offset_trajectron_dict(prediction_dict, cx, cy), offset_trajectron_dict(histories_dict, cx, cy), offset_trajectron_dict(futures_dict, cx, cy) - + print('pred timesteps', list(prediction_dict.keys())) + print('histories', [n.data.data.shape[0] for n in prediction_dict[frame.index].keys()]) if self.config.cm_to_m: # convert back to fit homography prediction_dict, histories_dict, futures_dict = prediction_m_to_cm(prediction_dict), prediction_m_to_cm(histories_dict), prediction_m_to_cm(futures_dict) @@ -425,13 +447,14 @@ class PredictionServer: response = {} logger.debug(f"{histories_dict=}") - for node in histories_dict: history = histories_dict[node] # future = futures_dict[node] # ground truth dict predictions = prediction_dict[node] + # print('preds', len(predictions[0][0])) if not len(history) or np.isnan(history[-1]).any(): + logger.warning('skip for no history') continue # response[node.id] = { @@ -442,7 +465,8 @@ class PredictionServer: # 'predictions': predictions[0].tolist() # use batch 0 # } - frame.tracks[node.id].predictor_history = history.tolist() + + frame.tracks[node.id].predictor_history = history.tolist() #node.data[:,{'position': ['x', 'y']}].tolist() frame.tracks[node.id].predictions = predictions[0].tolist() # use batch 0 # data = json.dumps(response) @@ -455,7 +479,7 @@ class PredictionServer: frame = self.smoother.smooth_frame_predictions(frame) self.send_frame(frame) - time.sleep(.5) + logger.info('Stopping') diff --git a/trap/preview_renderer.py b/trap/preview_renderer.py index a5c8331..f95695c 100644 --- a/trap/preview_renderer.py +++ b/trap/preview_renderer.py @@ -71,12 +71,15 @@ class DrawnTrack: self.renderer = renderer self.camera = camera self.H = H # TODO)) Move H to Camera object - self.set_track(track, H) - self.set_predictions(track, H) self.drawn_positions = [] self.drawn_predictions = [] + self.drawn_pred_history = [] self.shapes: list[pyglet.shapes.Line] = [] self.pred_shapes: list[list[pyglet.shapes.Line]] = [] + self.pred_history_shapes: list[pyglet.shapes.Line] = [] + + self.set_track(track, H) + self.set_predictions(track, H) def set_track(self, track: Track, H = None): self.update_at = time.time() @@ -91,14 +94,20 @@ class DrawnTrack: def set_predictions(self, track: Track, H = None): pred_coords = [] + pred_history_coords = [] if track.predictions: if self.draw_projection == PROJECTION_IMG: for pred_i, pred in enumerate(track.predictions): pred_coords.append(cv2.perspectiveTransform(np.array([pred]), self.inv_H)[0].tolist()) + + pred_history_coords = cv2.perspectiveTransform(np.array([track.predictor_history]), self.inv_H)[0].tolist() elif self.draw_projection == PROJECTION_MAP: pred_coords = [pred for pred in track.predictions] + pred_history_coords = track.predictor_history + self.pred_track = track self.pred_coords = pred_coords + self.pred_history_coords = pred_history_coords # color = (128,0,128) if pred_i else (128, @@ -107,6 +116,8 @@ class DrawnTrack: use dt to lerp the drawn positions in the direction of current prediction ''' # TODO: make lerp, currently quick way to get results + + # 1. track history for i, pos in enumerate(self.drawn_positions): self.drawn_positions[i][0] = int(exponentialDecay(self.drawn_positions[i][0], self.coords[i][0], 16, dt)) self.drawn_positions[i][1] = int(exponentialDecay(self.drawn_positions[i][1], self.coords[i][1], 16, dt)) @@ -114,6 +125,16 @@ class DrawnTrack: if len(self.coords) > len(self.drawn_positions): self.drawn_positions.extend(self.coords[len(self.drawn_positions):]) + # 2. history as seen by predictor (Trajectron) + for i, pos in enumerate(self.drawn_pred_history): + if len(self.pred_history_coords) > i: + self.drawn_pred_history[i][0] = int(exponentialDecay(self.drawn_pred_history[i][0], self.pred_history_coords[i][0], 16, dt)) + self.drawn_pred_history[i][1] = int(exponentialDecay(self.drawn_pred_history[i][1], self.pred_history_coords[i][1], 16, dt)) + + if len(self.pred_history_coords) > len(self.drawn_pred_history): + self.drawn_pred_history.extend(self.coords[len(self.drawn_pred_history):]) + + # 3. predictions if len(self.pred_coords): for a, drawn_prediction in enumerate(self.drawn_predictions): for i, pos in enumerate(drawn_prediction): @@ -137,6 +158,8 @@ class DrawnTrack: # self.drawn_predictions.extend(self.pred_coords[len(self.drawn_predictions):]) # self.drawn_positions = self.coords + + # finally: update shapes from coordinates self.update_shapes(dt) return self.drawn_positions @@ -167,11 +190,37 @@ class DrawnTrack: line.radius = int(exponentialDecay(line.radius, 1.5, 3, dt)) line.color = color line.opacity = int(exponentialDecay(line.opacity, 180, 8, dt)) - + # TODO: basically a duplication of the above, do this smarter? # TODO: add intermediate segment color = colorset[self.track_id % len(colorset)] + if len(self.pred_history_shapes) > len(self.drawn_pred_history): + self.pred_history_shapes = self.pred_history_shapes[:len(self.drawn_pred_history)] + + # for i, pos in self.drawn_pred_history.enumerate(): + for ci in range(1, len(self.drawn_pred_history)): + x, y = [int(p) for p in self.drawn_pred_history[ci-1]] + x2, y2 = [int(p) for p in self.drawn_pred_history[ci]] + + y, y2 = self.renderer.window.height - y, self.renderer.window.height - y2 + + if ci >= len(self.pred_history_shapes): + # line = self.renderer.gradientLine(x, y, x2, y2, 3, color, color, batch=self.renderer.batch_anim) + line = pyglet.shapes.Line(x,y ,x2, y2, 2.5, color, batch=self.renderer.batch_anim) + # line = pyglet.shapes.Arc(x2, y2, 10, thickness=2, color=color, batch=self.renderer.batch_anim) + line.opacity = 120 + self.pred_history_shapes.append(line) + + else: + line = self.pred_history_shapes[ci-1] + line.x, line.y = x, y + line.x2, line.y2 = x2, y2 + # line.radius = int(exponentialDecay(line.radius, 1.5, 3, dt)) + line.color = color + line.opacity = int(exponentialDecay(line.opacity, 180, 8, dt)) + + for a, drawn_predictions in enumerate(self.drawn_predictions): if len(self.pred_shapes) <= a: self.pred_shapes.append([]) @@ -694,14 +743,14 @@ class PreviewRenderer: # (255, 0, 255), # (255, 255, 0) # ] -# colorset = [ -# (255,255,100), -# (255,100,255), -# (100,255,255), -# ] colorset = [ - (0,0,0), + (255,255,100), + (255,100,255), + (100,255,255), ] +# colorset = [ +# (0,0,0), +# ] # Deprecated def decorate_frame(frame: Frame, prediction_frame: Frame, first_time: float, config: Namespace) -> np.array: diff --git a/trap/process_data.py b/trap/process_data.py index 5120653..f5c99c6 100644 --- a/trap/process_data.py +++ b/trap/process_data.py @@ -1,6 +1,10 @@ +from collections import defaultdict +import datetime from pathlib import Path import sys import os +import time +from attr import dataclass import numpy as np import pandas as pd import dill @@ -8,7 +12,9 @@ import tqdm import argparse from typing import List -from trap.tracker import Smoother +from trap.config import CameraAction, HomographyAction +from trap.frame_emitter import Camera +from trap.tracker import Smoother, TrackReader #sys.path.append("../../") from trajectron.environment import Environment, Scene, Node @@ -23,7 +29,7 @@ frame_diff = 10 desired_frame_diff = 1 dt = 1/FPS # dt per frame (e.g. 1/FPS) smooth_window = FPS * 1.5 # see also tracker.py -min_track_length = 10 +min_track_length = 20 standardization = { 'PEDESTRIAN': { @@ -43,54 +49,39 @@ standardization = { } -def augment_scene(scene, angle): - def rotate_pc(pc, alpha): - M = np.array([[np.cos(alpha), -np.sin(alpha)], - [np.sin(alpha), np.cos(alpha)]]) - return M @ pc - data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']]) +class RollingAverage(): + def __init__(self): + self.v = 0 + self.n = 0 + + def add(self, v): + self.v = (self.v * self.n + v) / (self.n +1) + self.n += 1 - scene_aug = Scene(timesteps=scene.timesteps, dt=scene.dt, name=scene.name) - - alpha = angle * np.pi / 180 - - for node in scene.nodes: - x = node.data.position.x.copy() - y = node.data.position.y.copy() - - x, y = rotate_pc(np.array([x, y]), alpha) - - vx = derivative_of(x, scene.dt) - vy = derivative_of(y, scene.dt) - ax = derivative_of(vx, scene.dt) - ay = derivative_of(vy, scene.dt) - - data_dict = {('position', 'x'): x, - ('position', 'y'): y, - ('velocity', 'x'): vx, - ('velocity', 'y'): vy, - ('acceleration', 'x'): ax, - ('acceleration', 'y'): ay} - - node_data = pd.DataFrame(data_dict, columns=data_columns) - - node = Node(node_type=node.type, node_id=node.id, data=node_data, first_timestep=node.first_timestep) - - scene_aug.nodes.append(node) - return scene_aug + return self.v -def augment(scene): - scene_aug = np.random.choice(scene.augmented) - scene_aug.temporal_scene_graph = scene.temporal_scene_graph - return scene_aug +@dataclass +class TrackIteration: + smooth: bool + step_size: int + step_offset: int + @classmethod + def iteration_variations(cls, smooth = True, toggle_smooth=True, sample_step_size=1): + iterations: List[TrackIteration] = [] + for i in range(sample_step_size): + iterations.append(TrackIteration(smooth, sample_step_size, i)) + if toggle_smooth: + iterations.append(TrackIteration(not smooth, sample_step_size, i)) + return iterations # maybe_makedirs('trajectron-data') # for desired_source in [ 'hof2', ]:# ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']: -def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, cm_to_m: bool, center_data: bool, bin_positions: bool): +def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, cm_to_m: bool, center_data: bool, bin_positions: bool, camera: Camera, step_size: int): + name += f"-{datetime.date.today()}" print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}") nl = 0 @@ -101,36 +92,32 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c smoother = Smoother(window_len=smooth_window, convolution=False) if smooth_tracks else None + reader = TrackReader(src_dir, camera.fps) - files = list(src_dir.glob("*/*.txt")) - print(files) - all_data = pd.concat((pd.read_csv(f, sep='\t', index_col=False, header=None) for f in files), axis=0, ignore_index=True) - print(all_data.shape) - if all_data.shape[1] == 8: - all_data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y'] - elif all_data.shape[1] == 9: - all_data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y', 'state'] - else: - raise Exception("Unknown data format. Check column count") - - if cm_to_m: - all_data['pos_x'] /= 100 - all_data['pos_y'] /= 100 - - - mean_x, mean_y = all_data['pos_x'].mean(), all_data['pos_y'].mean() - cx = .5 * all_data['pos_x'].min() + .5 * all_data['pos_x'].max() - cy = .5 * all_data['pos_y'].min() + .5 * all_data['pos_y'].max() - # bins of .5 meter - # print(np.ceil(all_data['pos_x'].max())*2)) - if bin_positions: - space_x = np.linspace(0, np.ceil(all_data['pos_x'].max()), int(np.ceil(all_data['pos_x'].max())*2)+1) - space_y = np.linspace(0, np.ceil(all_data['pos_y'].max()), int(np.ceil(all_data['pos_y'].max())*2)+1) + total = len(reader) + bar = tqdm.tqdm(total=total) - print(f"Dataset means: {mean_x=} {mean_y=}, (min: ({all_data['pos_x'].min()}, {all_data['pos_y'].min()}), max: ({all_data['pos_x'].max()}, {all_data['pos_y'].max()}))") - print(f"Dataset centers: {cx=} {cy=}") + destinations = { + 'train': int(total * .8), + 'val': int(total * .12), + 'test': int(total * .08), + } - for data_class in ['train', 'val', 'test']: + max_track = reader.get(str(max([int(k) for k in reader._tracks.keys()]))) + max_frame_nr = max_track.history[-1].frame_nr + print(max_frame_nr) + + # separate call so cursor is kept during multiple loops + track_iterator = iter(reader) + + dt1 = RollingAverage() + dt2 = RollingAverage() + dt3 = RollingAverage() + dt4 = RollingAverage() + + print(f"Camera FPS: {camera.fps}, actual fps: {camera.fps/step_size} (or {(1/camera.fps)*step_size})") + + for data_class, nr_of_items in destinations.items(): env = Environment(node_type_list=['PEDESTRIAN'], standardization=standardization) attention_radius = dict() attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.PEDESTRIAN)] = 2.0 @@ -139,144 +126,98 @@ def process_data(src_dir: Path, dst_dir: Path, name: str, smooth_tracks: bool, c scenes = [] split_id = f"{name}_{data_class}" data_dict_path = dst_dir / (split_id + '.pkl') - subpath = src_dir / data_class + # subpath = src_dir / data_class - print(data_dict_path) + # prev_src_file = None + # scene = None + + scene_nodes = defaultdict(lambda: []) + iterations = TrackIteration.iteration_variations(smooth_tracks, False, step_size) + + for i, track in zip(range(nr_of_items), track_iterator): + bar.update() + + track_source = track.source + + + + # if track.source != prev_src_file: + # scene = - - for file in subpath.glob("*.txt"): - print(file) + + tot = (dt1.v+dt2.v+dt3.v+dt4.v) + if tot: + bar.set_description(f"{data_dict_path.name} {track_source} ({dt1.v/tot:.4f}, {dt2.v/tot:.4f}, {dt3.v/tot:.4f}, {dt4.v/tot:.4f}) - {len(scene_nodes)}") + + # for file in subpath.glob("*.txt"):] input_data_dict = dict() - - data = pd.read_csv(file, sep='\t', index_col=False, header=None) - - if data.shape[1] == 8: - data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y'] - elif data.shape[1] == 9: - data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y', 'state'] - else: - raise Exception("Unknown data format. Check column count") - # data['frame_id'] = pd.to_numeric(data['frame_id'], downcast='integer') - data['track_id'] = pd.to_numeric(data['track_id'], downcast='integer') + if len(track.history) < min_track_length: + continue - data['frame_id'] = (data['frame_id'] // frame_diff).astype(int) + a = time.time() + interpolated_track = track.get_with_interpolated_history() + b = time.time() + for i_nr, iteration_settings in enumerate(iterations): - data['frame_id'] -= data['frame_id'].min() + if iteration_settings.smooth: + track = smoother.smooth_track(interpolated_track) + # track = Smoother(smooth_window, False).smooth_track(track) + else: + track = interpolated_track # TODO)) Copy & move smooth outside iter loop + c = time.time() - data['node_type'] = 'PEDESTRIAN' - data['node_id'] = data['track_id'].astype(str) - data.sort_values('frame_id', inplace=True) - - # cm to m - if cm_to_m: - data['pos_x'] /= 100 - data['pos_y'] /= 100 - - if center_data: - data['pos_x'] -= cx - data['pos_y'] -= cy - - - if bin_positions: - data['pos_x'] =np.digitize(data['pos_x'], bins=space_x) - data['pos_y'] =np.digitize(data['pos_y'], bins=space_y) - print(data['pos_x']) - - # Mean Position - - print("Means: x:", data['pos_x'].mean(), "y:", data['pos_y'].mean()) - - # TODO)) If this normalization is here, it should also be in prediction_server.py - # data['pos_x'] = data['pos_x'] - data['pos_x'].mean() - # data['pos_y'] = data['pos_y'] - data['pos_y'].mean() - # data['pos_x'] = data['pos_x'] - cx - # data['pos_y'] = data['pos_y'] - cy - - max_timesteps = data['frame_id'].max() - - scene = Scene(timesteps=max_timesteps+1, dt=dt, name=split_id, aug_func=augment if data_class == 'train' else None) - - for node_id in tqdm.tqdm(pd.unique(data['node_id'])): - node_df = data[data['node_id'] == node_id] - if not np.all(np.diff(node_df['frame_id']) == 1): - # print(f"Interval in {node_id} not always 1") - # print(node_df['frame_id']) - # print(np.diff(node_df['frame_id']) != 1) - # mask=np.append(False, np.diff(node_df['frame_id']) != 1) - # print(node_df[mask]['frame_id']) - skipped_for_error += 1 - continue + if iteration_settings.step_size > 1: + track = track.get_sampled(iteration_settings.step_size, iteration_settings.step_offset) + # redo test, it might fall out again + if len(track.history) < min_track_length: + continue - # without repeats, there will mostli likely only be straight movements - # better to filter by time - # only_diff = node_df[['pos_x', 'pos_y']].diff().fillna(1).any(axis=1) - # # print(node_df[['pos_x', 'pos_y']], ) - # # exit() + # track.get_projected_history(H=None, camera=self.config.camera) + node = track.to_trajectron_node(camera, env) + + d = time.time() + # if center_data: + # data['pos_x'] -= cx + # data['pos_y'] -= cy - # # mask positions - # node_values = node_df[only_diff][['pos_x', 'pos_y']].values - # print(node_values) - if bin_positions: - node_values = node_df.iloc[::5, :][['pos_x', 'pos_y']].values - else: - node_values = node_df[['pos_x', 'pos_y']].values - # print(node_values) + # if bin_positions: + # data['pos_x'] =np.digitize(data['pos_x'], bins=space_x) + # data['pos_y'] =np.digitize(data['pos_y'], bins=space_y) + # print(data['pos_x']) - if node_values.shape[0] < min_track_length: - continue - - new_first_idx = node_df['frame_id'].iloc[0] - - x = node_values[:, 0] - y = node_values[:, 1] - if smoother: - x = smoother.smooth(x) - y = smoother.smooth(y) - - vx = derivative_of(x, scene.dt) - vy = derivative_of(y, scene.dt) - ax = derivative_of(vx, scene.dt) - ay = derivative_of(vy, scene.dt) - - data_dict = {('position', 'x'): x, - ('position', 'y'): y, - ('velocity', 'x'): vx, - ('velocity', 'y'): vy, - ('acceleration', 'x'): ax, - ('acceleration', 'y'): ay} - - node_data = pd.DataFrame(data_dict, columns=data_columns) - node = Node(node_type=env.NodeType.PEDESTRIAN, node_id=node_id, data=node_data) - node.first_timestep = new_first_idx - - scene.nodes.append(node) + scene_nodes[f"{track_source}_{i_nr}"].append(node) created+=1 - # if data_class == 'train': - # scene.augmented = list() - # angles = np.arange(0, 360, 15) if data_class == 'train' else [0] - # for angle in angles: - # scene.augmented.append(augment_scene(scene, angle)) - # print(scene) - scenes.append(scene) + e = time.time() + + dt1.add(b-a) + dt2.add(c-b) + dt3.add(d-c) + dt4.add(e-d) + + for scene_nr, nodes in scene_nodes.items(): + scene = Scene(timesteps=nodes[-1].last_timestep, dt=(1/camera.fps)*step_size, name=f'{split_id}_{scene_nr}', aug_func=None) + scene.nodes.extend(nodes) + scenes.append(scene) + print(f'Processed {len(scenes):.2f} scene for data class {data_class}') env.scenes = scenes - print(env.scenes) + # print(env.scenes) if len(scenes) > 0: with open(data_dict_path, 'wb') as f: dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL) - print(f"Linear: {l}") - print(f"Non-Linear: {nl}") + # print(f"Linear: {l}") + # print(f"Non-Linear: {nl}") print(f"error: {skipped_for_error}, used: {created}") def main(): @@ -288,7 +229,34 @@ def main(): parser.add_argument("--cm-to-m", action='store_true', help=f"If homography is in cm, convert tracked points to meter for beter results") parser.add_argument("--center-data", action='store_true', help=f"Normalise around center") parser.add_argument("--bin-positions", action='store_true', help=f"Experiment to put round positions to a grid") + parser.add_argument("--step-size", type=int, default=1, help=f"Take only every n-th point") + parser.add_argument("--camera-fps", + help="Camera FPS", + type=int, + default=12) + parser.add_argument("--homography", + help="File with homography params", + type=Path, + default='../DATASETS/VIRAT_subset_0102x/VIRAT_0102_homography_img2world.txt', + action=HomographyAction) + parser.add_argument("--calibration", + help="File with camera intrinsics and lens distortion params (calibration.json)", + # type=Path, + default=None, + action=CameraAction) args = parser.parse_args() - process_data(**args.__dict__) + + # process_data(**args.__dict__) + process_data( + args.src_dir, + args.dst_dir, + args.name, + args.smooth_tracks, + args.cm_to_m, + args.center_data, + args.bin_positions, + args.camera, + args.step_size + ) diff --git a/trap/tools.py b/trap/tools.py index 77b819f..8c6b634 100644 --- a/trap/tools.py +++ b/trap/tools.py @@ -10,13 +10,13 @@ import numpy as np import pandas as pd import trap.tracker from trap.config import parser -from trap.frame_emitter import Detection, DetectionState, video_src_from_config, Frame -from trap.tracker import DETECTOR_YOLOv8, Smoother, _yolov8_track, Track, TrainingDataWriter, Tracker, read_tracks_json +from trap.frame_emitter import Camera, Detection, DetectionState, video_src_from_config, Frame +from trap.tracker import DETECTOR_YOLOv8, Smoother, TrackReader, _yolov8_track, Track, TrainingDataWriter, Tracker, read_tracks_json from collections import defaultdict import logging import cv2 -from typing import List, Iterable, Optional +from typing import Callable, List, Iterable, Optional from ultralytics import YOLO from ultralytics.engine.results import Results as YOLOResult @@ -185,6 +185,79 @@ def tracker_compare(): bar.set_description(f"[{frames.video_nr}/{len(frames.video_srcs)}] [{frames.frame_idx}/{frames.frame_count}] {str(frames.video_path)}") +def draw_track_predictions(img: cv2.Mat, track: Track, color_index: int, camera:Camera, convert_points: Optional[Callable]): + if not track.predictions: + return + + current_point = track.get_projected_history(camera=camera)[-1] + + if convert_points: + current_point = convert_points([current_point])[0] + + for pred_i, pred in enumerate(track.predictions): + pred_coords = pred #cv2.perspectiveTransform(np.array([pred]), inv_H)[0].tolist() + if convert_points: + pred_coords = convert_points(pred_coords) + # color = (128,0,128) if pred_i else (128,128,0) + + color = bgr_colors[color_index % len(bgr_colors)] + + for ci in range(0, len(pred_coords)): + if ci == 0: + # TODO)) prev point + # continue + start = [int(p) for p in current_point] + # start = [int(p) for p in coords[-1]] + # start = [0,0]? + # print(start) + else: + start = [int(p) for p in pred_coords[ci-1]] + end = [int(p) for p in pred_coords[ci]] + cv2.line(img, start, end, color, 1, lineType=cv2.LINE_AA) + # cv2.circle(img, end, 2, color, 1, lineType=cv2.LINE_AA) + +def draw_trackjectron_history(img: cv2.Mat, track: Track, color_index: int, convert_points: Optional[Callable]): + if not track.predictor_history: + return + + coords = track.predictor_history #cv2.perspectiveTransform(np.array([track.predictor_history]), inv_H)[0].tolist() + if convert_points: + coords = convert_points(coords) + # color = (128,0,128) if pred_i else (128,128,0) + + color = tuple(b/2 for b in bgr_colors[color_index % len(bgr_colors)]) + + for ci in range(0, len(coords)): + if ci == 0: + # TODO)) prev point + continue + # start = [int(p) for p in coords[-1]] + # start = [0,0]? + # print(start) + else: + start = [int(p) for p in coords[ci-1]] + end = [int(p) for p in coords[ci]] + cv2.line(img, start, end, color, 1, lineType=cv2.LINE_AA) + cv2.circle(img, end, 4, color, 1, lineType=cv2.LINE_AA) + +def draw_track_projected(img: cv2.Mat, track: Track, color_index: int, camera: Camera, convert_points: Optional[Callable]): + history = track.get_projected_history(camera=camera) + + if convert_points: + history = convert_points(history) + + cv2.putText(img, f"{track.track_id} ({len(history)})", to_point(history[0]), cv2.FONT_HERSHEY_DUPLEX, 1, color=bgr_colors[color_index % len(bgr_colors)]) + + point_color = bgr_colors[color_index % len(bgr_colors)] + cv2.circle(img, to_point(history[0]), 3, point_color, 2) + + for j in range(len(history)-1): + a = history[j] + b = history[j+1] + + cv2.line(img, to_point(a), to_point(b), point_color, 1) + cv2.circle(img, to_point(b), 3, point_color, 2) + def draw_track(img: cv2.Mat, track: Track, color_index: int): history = track.history @@ -197,16 +270,16 @@ def draw_track(img: cv2.Mat, track: Track, color_index: int): a = history[j] b = history[j+1] # TODO)) replace with Track.get_with_interpolated_history() - gap = b.frame_nr - a.frame_nr - 1 - if gap < 0: - print(f"WARNING, gap between frames {a.frame_nr} -> {b.frame_nr} is negative?") - if gap > 0: - for g in range(gap): - p1 = a.get_foot_coords() - p2 = b.get_foot_coords() - point = (lerp(p1[0], p2[0], g/gap), lerp(p1[1], p2[1], g/gap)) + # gap = b.frame_nr - a.frame_nr - 1 + # if gap < 0: + # print(f"WARNING, gap between frames {a.frame_nr} -> {b.frame_nr} is negative?") + # if gap > 0: + # for g in range(gap): + # p1 = a.get_foot_coords() + # p2 = b.get_foot_coords() + # point = (lerp(p1[0], p2[0], g/gap), lerp(p1[1], p2[1], g/gap)) - cv2.circle(img, to_point(point), 3, (0,0,255), 1) + # cv2.circle(img, to_point(point), 3, (0,0,255), 1) color = detection_color(b, color_index, a) cv2.line(img, to_point(a.get_foot_coords()), to_point(b.get_foot_coords()), color, 1) @@ -222,27 +295,30 @@ def blacklist_tracks(): backdrop = cv2.imread('../DATASETS/hof3/output.png') blacklist = [] path: Path = config.save_for_training - blacklist_file = path / "blacklist.jsonl" - whitelist_file = path / "whitelist.jsonl" # for skipping - tracks_file = path / "tracks.json" - FPS = 12 # TODO)) From config + reader = TrackReader(path, config.camera.fps, exclude_whitelisted = True) + # blacklist_file = path / "blacklist.jsonl" + # whitelist_file = path / "whitelist.jsonl" # for skipping + # tracks_file = path / "tracks.json" - if whitelist_file.exists(): - # with whitelist_file.open('r') as fp: - with jsonlines.open(whitelist_file, 'r') as reader: - whitelist = [l for l in reader.iter(type=str)] - else: - whitelist = [] - + # FPS = 12 # TODO)) From config + + # if whitelist_file.exists(): + # # with whitelist_file.open('r') as fp: + # with jsonlines.open(whitelist_file, 'r') as reader: + # whitelist = [l for l in reader.iter(type=str)] + # else: + # whitelist = [] + smoother = Smoother() try: - for track in read_tracks_json(tracks_file, blacklist_file, FPS): - if track.track_id in whitelist: - logger.info(f'skip whitelisted {track.track_id}') + for track in reader: + + if len(track.history) < 5: continue img = backdrop.copy() - draw_track(img, track, 0) + draw_track(img, track.get_with_interpolated_history(), 0) + draw_track(img, smoother.smooth_track(track.get_with_interpolated_history()).get_sampled(5), 1) imgS = cv2.resize(img, (1920, 1080)) cv2.imshow('frame', imgS) @@ -253,14 +329,15 @@ def blacklist_tracks(): elif k == ord('s'): break # skip for now elif k == ord('y'): - with jsonlines.open(whitelist_file, mode='a') as writer: + print('whitelist', track.track_id) + with jsonlines.open(reader.whitelist_file, mode='a') as writer: # skip next time around writer.write(track.track_id) break elif k == ord('n'): print('blacklist', track.track_id) - logger.info(f"Append {len(blacklist)} items to {str(blacklist_file)}") - with jsonlines.open(blacklist_file, mode='a') as writer: + # logger.info(f"Append {len(track)} items to {str(reader.blacklist_file)}") + with jsonlines.open(reader.blacklist_file, mode='a') as writer: writer.write(track.track_id) break else: diff --git a/trap/tracker.py b/trap/tracker.py index 0fcf2e5..9c059e8 100644 --- a/trap/tracker.py +++ b/trap/tracker.py @@ -83,7 +83,7 @@ class Multifile(): def __iter__(self): for path in self.srcs: - self.current_file = path.name + self.current_file = path with path.open('r') as fp: for l in fp: yield l @@ -91,28 +91,46 @@ class Multifile(): def readline(self): return self.g.__next__() -FIELDNAMES = ['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'] +FIELDNAMES = ['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state', 'source'] -def read_tracks_json(path: Path, blacklist_path: Path, fps): - """ - Reader for tracks.json produced by TrainingDataWriter - """ - with path.open('r') as fp: - tracks_dict: dict = json.load(fp) +class TrackReader: + def __init__(self, path: Path, fps: int, include_blacklisted = False, exclude_whitelisted = False): + self.blacklist_file = path / "blacklist.jsonl" + self.whitelist_file = path / "whitelist.jsonl" # for skipping + self.tracks_file = path / "tracks.json" + + with self.tracks_file.open('r') as fp: + tracks_dict: dict = json.load(fp) + + if self.blacklist_file.exists(): + with jsonlines.open(self.blacklist_file, 'r') as reader: + blacklist = [track_id for track_id in reader.iter(type=str)] + else: + blacklist = [] + + + if self.whitelist_file.exists(): + with jsonlines.open(self.whitelist_file, 'r') as reader: + whitelist = [track_id for track_id in reader.iter(type=str)] + else: + whitelist = [] + + + self._tracks = { track_id: detection_values + for track_id, detection_values in tracks_dict.items() + if (include_blacklisted or track_id not in blacklist) and + (not exclude_whitelisted or track_id not in whitelist) + } + self.fps = fps - if blacklist_path.exists(): - with jsonlines.open(blacklist_path, 'r') as reader: - blacklist = [track_id for track_id in reader.iter(type=str)] - - else: - blacklist = [] - - for track_id, detection_values in tracks_dict.items(): - if track_id in blacklist: - continue - + def __len__(self): + return len(self._tracks) + + def get(self, track_id): + detection_values = self._tracks[track_id] history = [] # for detection_values in + source = None for detection_items in detection_values: d = dict(zip(FIELDNAMES, detection_items)) history.append(Detection( @@ -124,10 +142,23 @@ def read_tracks_json(path: Path, blacklist_path: Path, fps): nan, d['state'], d['frame_id'], - 1 + 1, )) + source = int(d['source']) - yield Track(track_id, history, fps=fps) + return Track(track_id, history, fps=self.fps, source=source) + + def __iter__(self): + for track_id in self._tracks: + yield self.get(track_id) + +def read_tracks_json(path: Path, fps): + """ + Reader for tracks.json produced by TrainingDataWriter + """ + reader = TrackReader(path, fps) + for t in reader: + yield t class TrainingDataWriter: def __init__(self, training_path: Optional[Path]): @@ -210,7 +241,6 @@ def rewrite_raw_track_files(path: Path): tracks_file = path / 'tracks.json' tracks = defaultdict(lambda: []) - offset = 0 max_track_id = 0 prev_file = None @@ -218,6 +248,8 @@ def rewrite_raw_track_files(path: Path): # all-2024-11-12T13:30.txt file_date = None + src_file_nr = 0 + for name, line_nrs in destinations.items(): dir_path = path / name dir_path.mkdir(exist_ok=True) @@ -233,17 +265,23 @@ def rewrite_raw_track_files(path: Path): logger.info(f'{name} - update offset {offset} ({sources.current_file})') prev_file = current_file + src_file_nr += 1 - file_date = datetime.strptime(current_file.name, 'all-%Y-%m-%dT%H:%M.txt') + try: + file_date = datetime.strptime(current_file.name, 'all-%Y-%m-%dT%H:%M.txt') + except ValueError as e: + logger.error(str(e)) + file_date = None - if file_date: - frame_date = file_date + timedelta(seconds = int(parts[0])//10) - else: - frame_date = None parts = line.split('\t') track_id = int(parts[1]) + offset + if file_date: + frame_date = file_date + timedelta(seconds = int(float(parts[0]))//10) + else: + frame_date = None + if track_id > max_track_id: max_track_id = track_id @@ -254,7 +292,7 @@ def rewrite_raw_track_files(path: Path): tracks[track_id].append([ int(parts[0] / 10), track_id, - ] + parts[2:8] + [int(parts[8])]) + ] + parts[2:8] + [int(parts[8]), src_file_nr]) with tracks_file.open('w') as fp: logger.info(f"Write {len(tracks)} tracks to {str(tracks_file)}") @@ -492,7 +530,9 @@ class Tracker: # "history": [{"x":c[0], "y":c[1]} for c in coords[0]] if not self.config.bypass_prediction else coords[0].tolist() # already doubles nested, fine for test # } active_track_ids = [d.track_id for d in detections] - active_tracks = {t.track_id: t for t in self.tracks.values() if t.track_id in active_track_ids} + active_tracks = {t.track_id: t.get_with_interpolated_history() for t in self.tracks.values() if t.track_id in active_track_ids} + # active_tracks = {t.track_id: t for t in self.tracks.values() if t.track_id in active_track_ids} + # active_tracks = {t.track_id: t for t in self.tracks.values() if t.track_id in active_track_ids} # logger.info(f"{trajectories}") frame.tracks = active_tracks @@ -627,7 +667,7 @@ class Smoother: self.smoother.smooth(hs) hs = self.smoother.smooth_data[0] new_history = [Detection(d.track_id, l, t, w, h, d.conf, d.state, d.frame_nr, d.det_class) for l, t, w, h, d in zip(ls,ts,ws,hs, track.history)] - new_track = Track(track.track_id, new_history, track.predictor_history, track.predictions, track.fps) + return Track(track.track_id, new_history, track.predictor_history, track.predictions, track.fps) def smooth_frame_tracks(self, frame: Frame) -> Frame: new_tracks = []