trap/trap/cv_renderer.py
2025-10-28 16:03:57 +01:00

470 lines
No EOL
21 KiB
Python

# used for "Forward Referencing of type annotations"
from __future__ import annotations
import datetime
import json
import logging
from pathlib import Path
import time
from argparse import ArgumentParser, Namespace
from multiprocessing.synchronize import Event as BaseEvent
from typing import Dict, List, Optional
from charset_normalizer import detect
import cv2
import ffmpeg
import numpy as np
import pyglet
import zmq
from pyglet import shapes
from trap.base import Detection
from trap.counter import CounterListerner
from trap.frame_emitter import Frame, Track
from trap.lines import load_lines_from_svg
from trap.node import Node
from trap.preview_renderer import FrameWriter
from trap.tools import draw_track_predictions, draw_track_projected, to_point
from trap.utils import convert_world_points_to_img_points
logger = logging.getLogger("trap.simple_renderer")
class CvRenderer(Node):
def setup(self):
self.prediction_sock = self.sub(self.config.zmq_prediction_addr)
self.tracker_sock = self.sub(self.config.zmq_trajectory_addr)
self.detector_sock = self.sub(self.config.zmq_detection_addr)
self.frame_sock = self.sub(self.config.zmq_frame_addr)
# self.H = self.config.H
# self.inv_H = np.linalg.pinv(self.H)
# TODO: get FPS from frame_emitter
# self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720))
self.fps = 60
self.frame_size = None # configure on first frame recv
# self.frame_size = (self.config.camera.projected_w,self.config.camera.projected_h)
self.out_writer = self.start_writer() if self.config.render_file else None
self.streaming_process = self.start_streaming() if self.config.render_url else None
self.first_time: float|None = None
self.frame: Frame|None= None
self.tracker_frame: Frame|None = None
self.prediction_frame: Frame|None = None
self.detections: List[Detection]|None = None
self.tracks: Dict[str, Track] = {}
self.predictions: Dict[str, Track] = {}
self.scale = 100
self.debug_lines = debug_lines = load_lines_from_svg(self.config.debug_map, self.scale, '') if self.config.debug_map else []
def refresh_labels(self, dt: float):
"""Every frame"""
if self.frame:
self.labels['frame_idx'].text = f"{self.frame.index:06d}"
self.labels['frame_time'].text = f"{self.frame.time - self.first_time: >10.2f}s"
self.labels['frame_latency'].text = f"{self.frame.time - time.time():.2f}s"
if self.tracker_frame:
self.labels['tracker_idx'].text = f"{self.tracker_frame.index - self.frame.index}"
self.labels['tracker_time'].text = f"{self.tracker_frame.time - time.time():.3f}s"
self.labels['track_len'].text = f"{len(self.tracker_frame.tracks)} tracks"
if self.prediction_frame:
self.labels['pred_idx'].text = f"{self.prediction_frame.index - self.frame.index}"
self.labels['pred_time'].text = f"{self.prediction_frame.time - time.time():.3f}s"
# self.labels['track_len'].text = f"{len(self.prediction_frame.tracks)} tracks"
def start_writer(self):
if not self.config.output_dir.exists():
raise FileNotFoundError("Path does not exist")
date_str = datetime.datetime.now().isoformat(timespec="minutes")
filename = self.config.output_dir / f"render_predictions-{date_str}-{self.config.detector}.mp4"
logger.info(f"Write to {filename}")
return FrameWriter(str(filename), self.fps, None)
# fourcc = cv2.VideoWriter_fourcc(*'vp09')
# return cv2.VideoWriter(str(filename), fourcc, self.fps, self.frame_size)
def start_streaming(self, frame_size=(1920,1080)):
return (
ffmpeg
.input('pipe:', format='rawvideo',codec="rawvideo", pix_fmt='bgr24', s='{}x{}'.format(*frame_size))
.output(
self.config.render_url,
#codec = "copy", # use same codecs of the original video
codec='libx264',
listen=1, # enables HTTP server
pix_fmt="yuv420p",
preset="ultrafast",
tune="zerolatency",
# g=f"{self.fps*2}",
g=f"{60*2}",
analyzeduration="2000000",
probesize="1000000",
f='mpegts'
)
.overwrite_output()
.run_async(pipe_stdin=True)
)
# return process
def run(self):
self.frame = None
prediction_frame = None
tracker_frame = None
i=0
first_time = None
cv2.namedWindow("frame", cv2.WINDOW_NORMAL)
# https://gist.github.com/ronekko/dc3747211543165108b11073f929b85e
cv2.moveWindow("frame", 0, -1)
if self.config.full_screen:
cv2.setWindowProperty("frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)
cv2.setMouseCallback('frame',self.click_print_position)
# bgsub = cv2.createBackgroundSubtractorMOG2(120, 50, detectShadows=True)
while self.run_loop():
i += 1
# zmq_ev = self.frame_sock.poll(timeout=2000)
# if not zmq_ev:
# # when no data comes in, loop so that is_running is checked
# continue
try:
self.frame: Frame = self.frame_sock.recv_pyobj(zmq.NOBLOCK)
except zmq.ZMQError as e:
# idx = frame.index if frame else "NONE"
# logger.debug(f"reuse video frame {idx}")
pass
# else:
# logger.debug(f'new video frame {frame.index}')
if self.frame is None:
# might need to wait a few iterations before first frame comes available
time.sleep(.1)
continue
try:
prediction_frame: Frame = self.prediction_sock.recv_pyobj(zmq.NOBLOCK)
for track_id, track in prediction_frame.tracks.items():
prediction_id = f"{track_id}-{track.history[-1].frame_nr}"
self.predictions[prediction_id] = track
except zmq.ZMQError as e:
logger.debug(f'reuse prediction')
try:
tracker_frame: Frame = self.tracker_sock.recv_pyobj(zmq.NOBLOCK)
for track_id, track in tracker_frame.tracks.items():
self.tracks[track_id] = track
except zmq.ZMQError as e:
logger.debug(f'reuse tracks')
try:
self.detections = self.detector_sock.recv_pyobj(zmq.NOBLOCK)
# print('detections')
except zmq.ZMQError as e:
# print('no detections')
# idx = frame.index if frame else "NONE"
# logger.debug(f"reuse video frame {idx}")
pass
if first_time is None:
first_time = self.frame.time
# img = frame.img
# save_file = Path("videos/snap.png")
# if not save_file.exists():
# img = frame.camera.img_to_world(frame.img, 100)
# cv2.imwrite(save_file, img)
img = decorate_frame(self.frame, tracker_frame, prediction_frame, first_time, self.config, self.tracks, self.predictions, self.detections, self.config.render_clusters, self.debug_lines, self.scale)
logger.debug(f"write frame {self.frame.time - first_time:.3f}s")
if self.out_writer:
self.out_writer.write(img)
if self.streaming_process:
self.streaming_process.stdin.write(img.tobytes())
if not self.config.no_window:
cv2.imshow('frame',cv2.resize(img, (1920, 1080)))
# cv2.imshow('frame',img)
cv2.waitKey(10)
# clear out old tracks & predictions:
for track_id, track in list(self.tracks.items()):
if get_animation_position(track, self.frame) == 1:
self.tracks.pop(track_id)
for prediction_id, track in list(self.predictions.items()):
if get_animation_position(track, self.frame) == 1:
self.predictions.pop(prediction_id)
logger.info('Stopping')
# if i>2:
if self.streaming_process:
self.streaming_process.stdin.close()
if self.out_writer:
self.out_writer.release()
if self.streaming_process:
# oddly wrapped, because both close and release() take time.
logger.info('wait for closing stream')
self.streaming_process.wait()
logger.info('stopped')
@classmethod
def arg_parser(cls):
render_parser = ArgumentParser()
render_parser.add_argument('--zmq-frame-addr',
help='Manually specity communication addr for the frame messages',
type=str,
default="ipc:///tmp/feeds_frame")
render_parser.add_argument('--zmq-trajectory-addr',
help='Manually specity communication addr for the trajectory messages',
type=str,
default="ipc:///tmp/feeds_traj")
render_parser.add_argument('--zmq-detection-addr',
help='Manually specity communication addr for the detection messages',
type=str,
default="ipc:///tmp/feeds_dets")
render_parser.add_argument('--zmq-prediction-addr',
help='Manually specity communication addr for the prediction messages',
type=str,
default="ipc:///tmp/feeds_preds")
render_parser.add_argument("--render-file",
help="Render a video file previewing the prediction, and its delay compared to the current frame",
action='store_true')
render_parser.add_argument("--no-window",
help="Disable a previewing to a window",
action='store_true')
render_parser.add_argument("--full-screen",
help="Set Window full screen",
action='store_true')
render_parser.add_argument("--render-clusters",
help="renders arrowd clusters instead of individual predictions",
action='store_true')
render_parser.add_argument("--render-url",
help="""Stream renderer on given URL. Two easy approaches:
- using zmq wrapper one can specify the LISTENING ip. To listen to any incoming connection: zmq:tcp://0.0.0.0:5556
- alternatively, using e.g. UDP one needs to specify the IP of the client. E.g. udp://100.69.123.91:5556/stream
Note that with ZMQ you can have multiple clients connecting simultaneously. E.g. using `ffplay zmq:tcp://100.109.175.82:5556`
When using udp, connecting can be done using `ffplay udp://100.109.175.82:5556/stream`
""",
type=str,
default=None)
render_parser.add_argument('--debug-map',
help='specify a map (svg-file) from which to load lines which will be overlayed',
type=str,
default="../DATASETS/hof3/map_hof.svg")
return render_parser
def click_print_position(self, event,x,y,flags,param):
# if event == cv2.EVENT_LBUTTONDBLCLK:
if event == cv2.EVENT_LBUTTONUP:
if not self.frame:
return
scale = 100
print("click position:", x/scale, y/scale)
# self.frame.camera.points_img_to_world([[x, y]], 1)
# cv2.circle(img,(x,y),100,(255,0,0),-1)
mouseX,mouseY = x,y
# colorset = itertools.product([0,255], repeat=3) # but remove white
# colorset = [(0, 0, 0),
# (0, 0, 255),
# (0, 255, 0),
# (0, 255, 255),
# (255, 0, 0),
# (255, 0, 255),
# (255, 255, 0)
# ]
colorset = [
(255,255,100),
(255,100,255),
(100,255,255),
]
# colorset = [
# (0,0,0),
# ]
def get_animation_position(track: Track, current_frame: Frame):
fade_duration = current_frame.camera.fps * 3
diff = current_frame.index - track.history[-1].frame_nr
return max(0, min(1, diff / fade_duration))
# track.history[-1].frame_nr < (current_frame.index - current_frame.camera.fps * 3)
# track.history[-1].frame_nr < (current_frame.index - current_frame.camera.fps * 3)
def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame, first_time: float, config: Namespace, tracks: Dict[str, Track], predictions: Dict[str, Track], detections: Optional[List[Detection]], as_clusters = True, debug_lines = [], scale: float = 100) -> np.array:
# TODO: replace opencv with QPainter to support alpha? https://doc.qt.io/qtforpython-5/PySide2/QtGui/QPainter.html#PySide2.QtGui.PySide2.QtGui.QPainter.drawImage
# or https://github.com/pygobject/pycairo?tab=readme-ov-file
# or https://pyglet.readthedocs.io/en/latest/programming_guide/shapes.html
# and use http://code.astraw.com/projects/motmot/pygarrayimage.html or https://gist.github.com/nkymut/1cb40ea6ae4de0cf9ded7332f1ca0d55
# or https://api.arcade.academy/en/stable/index.html (supports gradient color in line -- "Arcade is built on top of Pyglet and OpenGL.")
dst_img = frame.camera.img_to_world(frame.img, scale)
# mask = bg_subtractor.apply(dst_img)
# mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB).astype(float) / 255
# dst_img = dst_img * mask
# undistorted_img = cv2.undistort(frame.img, config.camera.mtx, config.camera.dist, None, config.camera.newcameramtx)
# dst_img = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H),(config.camera.w,config.camera.h))
# dst_img2 = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H), None)
# cv2.imwrite('/home/ruben/suspicion/DATASETS/hof3/camera2.png', dst_img2)
overlay = np.zeros(dst_img.shape, np.uint8)
# Fill image with red color(set each pixel to red)
overlay[:] = (0, 0, 0)
# img = cv2.addWeighted(dst_img, .2, overlay, .3, 0)
img = dst_img.copy()
# all not working:
# if i == 1:
# # thanks to GpG for fixing scaling issue: https://stackoverflow.com/a/39668864
# scale_factor = 1./20 # from 10m to 1000px
# S = np.array([[scale_factor, 0,0],[0,scale_factor,0 ],[ 0,0,1 ]])
# new_H = S * self.H * np.linalg.inv(S)
# warpedFrame = cv2.warpPerspective(img, new_H, (1000,1000))
# cv2.imwrite(str(self.config.output_dir / "orig.png"), warpedFrame)
cv2.rectangle(img, (0,0), (img.shape[1],25), (0,0,0), -1)
if detections:
for detection in detections:
points = [
detection.get_foot_coords(),
[detection.l, detection.t],
[detection.l + detection.w, detection.t + detection.h],
]
points = tracker_frame.camera.points_img_to_world(points, scale)
points = [to_point(p) for p in points] # to int
w = points[1][0]-points[2][0]
feet = [int(points[2][0] + .5 * w), points[2][1]]
cv2.rectangle(img, points[1], points[2], (255,255,0), 2)
cv2.circle(img, points[0], 5, (255,255,0), 2)
cv2.putText(img, f"{detection.conf:.02f}", (points[0][0], points[0][1]+20), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,0), 1)
def conversion(points):
return convert_world_points_to_img_points(points, scale)
if not tracker_frame:
cv2.putText(img, f"and track", (650,17), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,0), 1)
else:
for track_id, track in tracks.items():
inv_H = np.linalg.pinv(tracker_frame.H)
draw_track_projected(img, track, int(track_id), tracker_frame.camera, conversion)
for line in debug_lines:
for rp1, rp2 in zip(line.points, line.points[1:]):
p1 = (
int(rp1.position[0]*scale),
int(rp1.position[1]*scale),
)
p2 = (
int(rp2.position[0]*scale),
int(rp2.position[1]*scale),
)
cv2.line(img, p1, p2, (255,0,0), 2)
# points = [(int(point[0]*scale), int(point[1]*scale)) for point in points]
# for num, points in enumerate(frame.camera.debug_lines):
# cv2.line(img, points[0], points[1], (255,0,0), 2)
# if hasattr(frame.camera, 'debug_points'):
# for num, point in enumerate(frame.camera.debug_points):
# cv2.circle(img, (int(point[0]*scale), int(point[1]*scale)), 5, (255,0,0), 2)
# cv2.putText(img, f"{num}", (int(point[0]*scale)+20, int(point[1]*scale)), cv2.FONT_HERSHEY_PLAIN, 1, (255,0,0), 1)
if not prediction_frame:
cv2.putText(img, f"Waiting for prediction...", (500,17), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,0), 1)
# continue
else:
for track_id, track in predictions.items():
inv_H = np.linalg.pinv(prediction_frame.H)
# For debugging:
# draw_trackjectron_history(img, track, int(track.track_id), conversion)
anim_position = get_animation_position(track, frame)
draw_track_predictions(img, track, int(track.track_id)+1, frame.camera, conversion, anim_position=anim_position, as_clusters=as_clusters)
cv2.putText(img, f"{len(track.predictor_history) if track.predictor_history else 'none'}", to_point(track.history[0].get_foot_coords()), cv2.FONT_HERSHEY_COMPLEX, 1, (255,255,255), 1)
if prediction_frame.maps:
for i, m in enumerate(prediction_frame.maps):
map_img = np.ascontiguousarray(np.flipud(np.transpose(m[0], (2, 1, 0))*255), np.uint8)
cv2.circle(map_img, (10,50), 5, (0,255,0), 2)
cv2.line(map_img, (10,50), (10+15, 50), (0,0,255), 2)
cv2.rectangle(map_img, (0,0), (map_img.shape[1]-1, map_img.shape[0]-1), (255,255,255), 1)
height, width, _ = map_img.shape
padding= 50
y = img.shape[0] - padding - height
x = width*i
if x+width > img.shape[1]:
break # stop drawing maps when there's a lot of them
img[y:y+height,x:x+width] = map_img
base_color = (255,)*3
info_color = (255,255,0)
predictor_color = (255,0,255)
tracker_color = (0,255,255)
cv2.putText(img, f"{frame.index:06d}", (20,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1)
cv2.putText(img, f"{frame.time - first_time: >10.2f}s", (150,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1)
cv2.putText(img, f"{frame.time - time.time():.2f}s", (250,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1)
options = []
if prediction_frame:
# render Δt and Δ frames
cv2.putText(img, f"{tracker_frame.index - frame.index}", (90,17), cv2.FONT_HERSHEY_PLAIN, 1, tracker_color, 1)
cv2.putText(img, f"{prediction_frame.index - frame.index}", (120,17), cv2.FONT_HERSHEY_PLAIN, 1, predictor_color, 1)
cv2.putText(img, f"{tracker_frame.time - time.time():.2f}s", (310,17), cv2.FONT_HERSHEY_PLAIN, 1, tracker_color, 1)
cv2.putText(img, f"{prediction_frame.time - time.time():.2f}s", (380,17), cv2.FONT_HERSHEY_PLAIN, 1, predictor_color, 1)
cv2.putText(img, f"{len(tracker_frame.tracks)} tracks", (620,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1)
cv2.putText(img, f"h: {np.average([len(t.history or []) for t in prediction_frame.tracks.values()]):.2f}", (700,17), cv2.FONT_HERSHEY_PLAIN, 1, tracker_color, 1)
cv2.putText(img, f"ph: {np.average([len(t.predictor_history or []) for t in prediction_frame.tracks.values()]):.2f}", (780,17), cv2.FONT_HERSHEY_PLAIN, 1, predictor_color, 1)
cv2.putText(img, f"p: {np.average([len(t.predictions or []) for t in prediction_frame.tracks.values()]):.2f}", (860,17), cv2.FONT_HERSHEY_PLAIN, 1, predictor_color, 1)
for option, value in prediction_frame.log['predictor'].items():
options.append(f"{option}: {value}")
if len(options):
cv2.putText(img, options.pop(-1), (20,img.shape[0]-30), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1)
cv2.putText(img, " | ".join(options), (20,img.shape[0]-10), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1)
return img
def run_cv_renderer(config: Namespace, is_running: BaseEvent, timer_counter):
renderer = CvRenderer(config, is_running)
renderer.run(timer_counter)