# used for "Forward Referencing of type annotations" from __future__ import annotations import datetime import json import logging from pathlib import Path import time from argparse import ArgumentParser, Namespace from multiprocessing.synchronize import Event as BaseEvent from typing import Dict, List, Optional from charset_normalizer import detect import cv2 import ffmpeg import numpy as np import pyglet import zmq from pyglet import shapes from trap.base import Detection from trap.counter import CounterListerner from trap.frame_emitter import Frame, Track from trap.lines import load_lines_from_svg from trap.node import Node from trap.preview_renderer import FrameWriter from trap.tools import draw_track_predictions, draw_track_projected, to_point from trap.utils import convert_world_points_to_img_points logger = logging.getLogger("trap.simple_renderer") class CvRenderer(Node): def setup(self): self.prediction_sock = self.sub(self.config.zmq_prediction_addr) self.tracker_sock = self.sub(self.config.zmq_trajectory_addr) self.detector_sock = self.sub(self.config.zmq_detection_addr) self.frame_sock = self.sub(self.config.zmq_frame_addr) # self.H = self.config.H # self.inv_H = np.linalg.pinv(self.H) # TODO: get FPS from frame_emitter # self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720)) self.fps = 60 self.frame_size = None # configure on first frame recv # self.frame_size = (self.config.camera.projected_w,self.config.camera.projected_h) self.out_writer = self.start_writer() if self.config.render_file else None self.streaming_process = self.start_streaming() if self.config.render_url else None self.first_time: float|None = None self.frame: Frame|None= None self.tracker_frame: Frame|None = None self.prediction_frame: Frame|None = None self.detections: List[Detection]|None = None self.tracks: Dict[str, Track] = {} self.predictions: Dict[str, Track] = {} self.scale = 100 self.debug_lines = debug_lines = load_lines_from_svg(self.config.debug_map, self.scale, '') if self.config.debug_map else [] def refresh_labels(self, dt: float): """Every frame""" if self.frame: self.labels['frame_idx'].text = f"{self.frame.index:06d}" self.labels['frame_time'].text = f"{self.frame.time - self.first_time: >10.2f}s" self.labels['frame_latency'].text = f"{self.frame.time - time.time():.2f}s" if self.tracker_frame: self.labels['tracker_idx'].text = f"{self.tracker_frame.index - self.frame.index}" self.labels['tracker_time'].text = f"{self.tracker_frame.time - time.time():.3f}s" self.labels['track_len'].text = f"{len(self.tracker_frame.tracks)} tracks" if self.prediction_frame: self.labels['pred_idx'].text = f"{self.prediction_frame.index - self.frame.index}" self.labels['pred_time'].text = f"{self.prediction_frame.time - time.time():.3f}s" # self.labels['track_len'].text = f"{len(self.prediction_frame.tracks)} tracks" def start_writer(self): if not self.config.output_dir.exists(): raise FileNotFoundError("Path does not exist") date_str = datetime.datetime.now().isoformat(timespec="minutes") filename = self.config.output_dir / f"render_predictions-{date_str}-{self.config.detector}.mp4" logger.info(f"Write to {filename}") return FrameWriter(str(filename), self.fps, None) # fourcc = cv2.VideoWriter_fourcc(*'vp09') # return cv2.VideoWriter(str(filename), fourcc, self.fps, self.frame_size) def start_streaming(self, frame_size=(1920,1080)): return ( ffmpeg .input('pipe:', format='rawvideo',codec="rawvideo", pix_fmt='bgr24', s='{}x{}'.format(*frame_size)) .output( self.config.render_url, #codec = "copy", # use same codecs of the original video codec='libx264', listen=1, # enables HTTP server pix_fmt="yuv420p", preset="ultrafast", tune="zerolatency", # g=f"{self.fps*2}", g=f"{60*2}", analyzeduration="2000000", probesize="1000000", f='mpegts' ) .overwrite_output() .run_async(pipe_stdin=True) ) # return process def run(self): self.frame = None prediction_frame = None tracker_frame = None i=0 first_time = None cv2.namedWindow("frame", cv2.WINDOW_NORMAL) # https://gist.github.com/ronekko/dc3747211543165108b11073f929b85e cv2.moveWindow("frame", 0, -1) if self.config.full_screen: cv2.setWindowProperty("frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN) cv2.setMouseCallback('frame',self.click_print_position) # bgsub = cv2.createBackgroundSubtractorMOG2(120, 50, detectShadows=True) while self.run_loop(): i += 1 # zmq_ev = self.frame_sock.poll(timeout=2000) # if not zmq_ev: # # when no data comes in, loop so that is_running is checked # continue try: self.frame: Frame = self.frame_sock.recv_pyobj(zmq.NOBLOCK) except zmq.ZMQError as e: # idx = frame.index if frame else "NONE" # logger.debug(f"reuse video frame {idx}") pass # else: # logger.debug(f'new video frame {frame.index}') if self.frame is None: # might need to wait a few iterations before first frame comes available time.sleep(.1) continue try: prediction_frame: Frame = self.prediction_sock.recv_pyobj(zmq.NOBLOCK) for track_id, track in prediction_frame.tracks.items(): prediction_id = f"{track_id}-{track.history[-1].frame_nr}" self.predictions[prediction_id] = track except zmq.ZMQError as e: logger.debug(f'reuse prediction') try: tracker_frame: Frame = self.tracker_sock.recv_pyobj(zmq.NOBLOCK) for track_id, track in tracker_frame.tracks.items(): self.tracks[track_id] = track except zmq.ZMQError as e: logger.debug(f'reuse tracks') try: self.detections = self.detector_sock.recv_pyobj(zmq.NOBLOCK) # print('detections') except zmq.ZMQError as e: # print('no detections') # idx = frame.index if frame else "NONE" # logger.debug(f"reuse video frame {idx}") pass if first_time is None: first_time = self.frame.time # img = frame.img # save_file = Path("videos/snap.png") # if not save_file.exists(): # img = frame.camera.img_to_world(frame.img, 100) # cv2.imwrite(save_file, img) img = decorate_frame(self.frame, tracker_frame, prediction_frame, first_time, self.config, self.tracks, self.predictions, self.detections, self.config.render_clusters, self.debug_lines, self.scale) logger.debug(f"write frame {self.frame.time - first_time:.3f}s") if self.out_writer: self.out_writer.write(img) if self.streaming_process: self.streaming_process.stdin.write(img.tobytes()) if not self.config.no_window: cv2.imshow('frame',cv2.resize(img, (1920, 1080))) # cv2.imshow('frame',img) cv2.waitKey(10) # clear out old tracks & predictions: for track_id, track in list(self.tracks.items()): if get_animation_position(track, self.frame) == 1: self.tracks.pop(track_id) for prediction_id, track in list(self.predictions.items()): if get_animation_position(track, self.frame) == 1: self.predictions.pop(prediction_id) logger.info('Stopping') # if i>2: if self.streaming_process: self.streaming_process.stdin.close() if self.out_writer: self.out_writer.release() if self.streaming_process: # oddly wrapped, because both close and release() take time. logger.info('wait for closing stream') self.streaming_process.wait() logger.info('stopped') @classmethod def arg_parser(cls): render_parser = ArgumentParser() render_parser.add_argument('--zmq-frame-addr', help='Manually specity communication addr for the frame messages', type=str, default="ipc:///tmp/feeds_frame") render_parser.add_argument('--zmq-trajectory-addr', help='Manually specity communication addr for the trajectory messages', type=str, default="ipc:///tmp/feeds_traj") render_parser.add_argument('--zmq-detection-addr', help='Manually specity communication addr for the detection messages', type=str, default="ipc:///tmp/feeds_dets") render_parser.add_argument('--zmq-prediction-addr', help='Manually specity communication addr for the prediction messages', type=str, default="ipc:///tmp/feeds_preds") render_parser.add_argument("--render-file", help="Render a video file previewing the prediction, and its delay compared to the current frame", action='store_true') render_parser.add_argument("--no-window", help="Disable a previewing to a window", action='store_true') render_parser.add_argument("--full-screen", help="Set Window full screen", action='store_true') render_parser.add_argument("--render-clusters", help="renders arrowd clusters instead of individual predictions", action='store_true') render_parser.add_argument("--render-url", help="""Stream renderer on given URL. Two easy approaches: - using zmq wrapper one can specify the LISTENING ip. To listen to any incoming connection: zmq:tcp://0.0.0.0:5556 - alternatively, using e.g. UDP one needs to specify the IP of the client. E.g. udp://100.69.123.91:5556/stream Note that with ZMQ you can have multiple clients connecting simultaneously. E.g. using `ffplay zmq:tcp://100.109.175.82:5556` When using udp, connecting can be done using `ffplay udp://100.109.175.82:5556/stream` """, type=str, default=None) render_parser.add_argument('--debug-map', help='specify a map (svg-file) from which to load lines which will be overlayed', type=str, default="../DATASETS/hof3/map_hof.svg") return render_parser def click_print_position(self, event,x,y,flags,param): # if event == cv2.EVENT_LBUTTONDBLCLK: if event == cv2.EVENT_LBUTTONUP: if not self.frame: return scale = 100 print("click position:", x/scale, y/scale) # self.frame.camera.points_img_to_world([[x, y]], 1) # cv2.circle(img,(x,y),100,(255,0,0),-1) mouseX,mouseY = x,y # colorset = itertools.product([0,255], repeat=3) # but remove white # colorset = [(0, 0, 0), # (0, 0, 255), # (0, 255, 0), # (0, 255, 255), # (255, 0, 0), # (255, 0, 255), # (255, 255, 0) # ] colorset = [ (255,255,100), (255,100,255), (100,255,255), ] # colorset = [ # (0,0,0), # ] def get_animation_position(track: Track, current_frame: Frame): fade_duration = current_frame.camera.fps * 3 diff = current_frame.index - track.history[-1].frame_nr return max(0, min(1, diff / fade_duration)) # track.history[-1].frame_nr < (current_frame.index - current_frame.camera.fps * 3) # track.history[-1].frame_nr < (current_frame.index - current_frame.camera.fps * 3) def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame, first_time: float, config: Namespace, tracks: Dict[str, Track], predictions: Dict[str, Track], detections: Optional[List[Detection]], as_clusters = True, debug_lines = [], scale: float = 100) -> np.array: # TODO: replace opencv with QPainter to support alpha? https://doc.qt.io/qtforpython-5/PySide2/QtGui/QPainter.html#PySide2.QtGui.PySide2.QtGui.QPainter.drawImage # or https://github.com/pygobject/pycairo?tab=readme-ov-file # or https://pyglet.readthedocs.io/en/latest/programming_guide/shapes.html # and use http://code.astraw.com/projects/motmot/pygarrayimage.html or https://gist.github.com/nkymut/1cb40ea6ae4de0cf9ded7332f1ca0d55 # or https://api.arcade.academy/en/stable/index.html (supports gradient color in line -- "Arcade is built on top of Pyglet and OpenGL.") dst_img = frame.camera.img_to_world(frame.img, scale) # mask = bg_subtractor.apply(dst_img) # mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB).astype(float) / 255 # dst_img = dst_img * mask # undistorted_img = cv2.undistort(frame.img, config.camera.mtx, config.camera.dist, None, config.camera.newcameramtx) # dst_img = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H),(config.camera.w,config.camera.h)) # dst_img2 = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H), None) # cv2.imwrite('/home/ruben/suspicion/DATASETS/hof3/camera2.png', dst_img2) overlay = np.zeros(dst_img.shape, np.uint8) # Fill image with red color(set each pixel to red) overlay[:] = (0, 0, 0) # img = cv2.addWeighted(dst_img, .2, overlay, .3, 0) img = dst_img.copy() # all not working: # if i == 1: # # thanks to GpG for fixing scaling issue: https://stackoverflow.com/a/39668864 # scale_factor = 1./20 # from 10m to 1000px # S = np.array([[scale_factor, 0,0],[0,scale_factor,0 ],[ 0,0,1 ]]) # new_H = S * self.H * np.linalg.inv(S) # warpedFrame = cv2.warpPerspective(img, new_H, (1000,1000)) # cv2.imwrite(str(self.config.output_dir / "orig.png"), warpedFrame) cv2.rectangle(img, (0,0), (img.shape[1],25), (0,0,0), -1) if detections: for detection in detections: points = [ detection.get_foot_coords(), [detection.l, detection.t], [detection.l + detection.w, detection.t + detection.h], ] points = tracker_frame.camera.points_img_to_world(points, scale) points = [to_point(p) for p in points] # to int w = points[1][0]-points[2][0] feet = [int(points[2][0] + .5 * w), points[2][1]] cv2.rectangle(img, points[1], points[2], (255,255,0), 2) cv2.circle(img, points[0], 5, (255,255,0), 2) cv2.putText(img, f"{detection.conf:.02f}", (points[0][0], points[0][1]+20), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,0), 1) def conversion(points): return convert_world_points_to_img_points(points, scale) if not tracker_frame: cv2.putText(img, f"and track", (650,17), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,0), 1) else: for track_id, track in tracks.items(): inv_H = np.linalg.pinv(tracker_frame.H) draw_track_projected(img, track, int(track_id), tracker_frame.camera, conversion) for line in debug_lines: for rp1, rp2 in zip(line.points, line.points[1:]): p1 = ( int(rp1.position[0]*scale), int(rp1.position[1]*scale), ) p2 = ( int(rp2.position[0]*scale), int(rp2.position[1]*scale), ) cv2.line(img, p1, p2, (255,0,0), 2) # points = [(int(point[0]*scale), int(point[1]*scale)) for point in points] # for num, points in enumerate(frame.camera.debug_lines): # cv2.line(img, points[0], points[1], (255,0,0), 2) # if hasattr(frame.camera, 'debug_points'): # for num, point in enumerate(frame.camera.debug_points): # cv2.circle(img, (int(point[0]*scale), int(point[1]*scale)), 5, (255,0,0), 2) # cv2.putText(img, f"{num}", (int(point[0]*scale)+20, int(point[1]*scale)), cv2.FONT_HERSHEY_PLAIN, 1, (255,0,0), 1) if not prediction_frame: cv2.putText(img, f"Waiting for prediction...", (500,17), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,0), 1) # continue else: for track_id, track in predictions.items(): inv_H = np.linalg.pinv(prediction_frame.H) # For debugging: # draw_trackjectron_history(img, track, int(track.track_id), conversion) anim_position = get_animation_position(track, frame) draw_track_predictions(img, track, int(track.track_id)+1, frame.camera, conversion, anim_position=anim_position, as_clusters=as_clusters) cv2.putText(img, f"{len(track.predictor_history) if track.predictor_history else 'none'}", to_point(track.history[0].get_foot_coords()), cv2.FONT_HERSHEY_COMPLEX, 1, (255,255,255), 1) if prediction_frame.maps: for i, m in enumerate(prediction_frame.maps): map_img = np.ascontiguousarray(np.flipud(np.transpose(m[0], (2, 1, 0))*255), np.uint8) cv2.circle(map_img, (10,50), 5, (0,255,0), 2) cv2.line(map_img, (10,50), (10+15, 50), (0,0,255), 2) cv2.rectangle(map_img, (0,0), (map_img.shape[1]-1, map_img.shape[0]-1), (255,255,255), 1) height, width, _ = map_img.shape padding= 50 y = img.shape[0] - padding - height x = width*i if x+width > img.shape[1]: break # stop drawing maps when there's a lot of them img[y:y+height,x:x+width] = map_img base_color = (255,)*3 info_color = (255,255,0) predictor_color = (255,0,255) tracker_color = (0,255,255) cv2.putText(img, f"{frame.index:06d}", (20,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) cv2.putText(img, f"{frame.time - first_time: >10.2f}s", (150,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) cv2.putText(img, f"{frame.time - time.time():.2f}s", (250,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) options = [] if prediction_frame: # render Δt and Δ frames cv2.putText(img, f"{tracker_frame.index - frame.index}", (90,17), cv2.FONT_HERSHEY_PLAIN, 1, tracker_color, 1) cv2.putText(img, f"{prediction_frame.index - frame.index}", (120,17), cv2.FONT_HERSHEY_PLAIN, 1, predictor_color, 1) cv2.putText(img, f"{tracker_frame.time - time.time():.2f}s", (310,17), cv2.FONT_HERSHEY_PLAIN, 1, tracker_color, 1) cv2.putText(img, f"{prediction_frame.time - time.time():.2f}s", (380,17), cv2.FONT_HERSHEY_PLAIN, 1, predictor_color, 1) cv2.putText(img, f"{len(tracker_frame.tracks)} tracks", (620,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) cv2.putText(img, f"h: {np.average([len(t.history or []) for t in prediction_frame.tracks.values()]):.2f}", (700,17), cv2.FONT_HERSHEY_PLAIN, 1, tracker_color, 1) cv2.putText(img, f"ph: {np.average([len(t.predictor_history or []) for t in prediction_frame.tracks.values()]):.2f}", (780,17), cv2.FONT_HERSHEY_PLAIN, 1, predictor_color, 1) cv2.putText(img, f"p: {np.average([len(t.predictions or []) for t in prediction_frame.tracks.values()]):.2f}", (860,17), cv2.FONT_HERSHEY_PLAIN, 1, predictor_color, 1) for option, value in prediction_frame.log['predictor'].items(): options.append(f"{option}: {value}") if len(options): cv2.putText(img, options.pop(-1), (20,img.shape[0]-30), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) cv2.putText(img, " | ".join(options), (20,img.shape[0]-10), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1) return img def run_cv_renderer(config: Namespace, is_running: BaseEvent, timer_counter): renderer = CvRenderer(config, is_running) renderer.run(timer_counter)