470 lines
No EOL
21 KiB
Python
470 lines
No EOL
21 KiB
Python
# used for "Forward Referencing of type annotations"
|
|
from __future__ import annotations
|
|
|
|
import datetime
|
|
import json
|
|
import logging
|
|
from pathlib import Path
|
|
import time
|
|
from argparse import ArgumentParser, Namespace
|
|
from multiprocessing.synchronize import Event as BaseEvent
|
|
from typing import Dict, List, Optional
|
|
|
|
from charset_normalizer import detect
|
|
import cv2
|
|
import ffmpeg
|
|
import numpy as np
|
|
import pyglet
|
|
import zmq
|
|
from pyglet import shapes
|
|
|
|
from trap.base import Detection
|
|
from trap.counter import CounterListerner
|
|
from trap.frame_emitter import Frame, Track
|
|
from trap.lines import load_lines_from_svg
|
|
from trap.node import Node
|
|
from trap.preview_renderer import FrameWriter
|
|
from trap.tools import draw_track_predictions, draw_track_projected, to_point
|
|
from trap.utils import convert_world_points_to_img_points
|
|
|
|
logger = logging.getLogger("trap.simple_renderer")
|
|
|
|
class CvRenderer(Node):
|
|
def setup(self):
|
|
self.prediction_sock = self.sub(self.config.zmq_prediction_addr)
|
|
self.tracker_sock = self.sub(self.config.zmq_trajectory_addr)
|
|
self.detector_sock = self.sub(self.config.zmq_detection_addr)
|
|
self.frame_sock = self.sub(self.config.zmq_frame_addr)
|
|
|
|
# self.H = self.config.H
|
|
# self.inv_H = np.linalg.pinv(self.H)
|
|
|
|
# TODO: get FPS from frame_emitter
|
|
# self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720))
|
|
self.fps = 60
|
|
self.frame_size = None # configure on first frame recv
|
|
# self.frame_size = (self.config.camera.projected_w,self.config.camera.projected_h)
|
|
|
|
self.out_writer = self.start_writer() if self.config.render_file else None
|
|
self.streaming_process = self.start_streaming() if self.config.render_url else None
|
|
|
|
self.first_time: float|None = None
|
|
self.frame: Frame|None= None
|
|
self.tracker_frame: Frame|None = None
|
|
self.prediction_frame: Frame|None = None
|
|
self.detections: List[Detection]|None = None
|
|
|
|
self.tracks: Dict[str, Track] = {}
|
|
self.predictions: Dict[str, Track] = {}
|
|
|
|
self.scale = 100
|
|
self.debug_lines = debug_lines = load_lines_from_svg(self.config.debug_map, self.scale, '') if self.config.debug_map else []
|
|
|
|
|
|
def refresh_labels(self, dt: float):
|
|
"""Every frame"""
|
|
|
|
if self.frame:
|
|
self.labels['frame_idx'].text = f"{self.frame.index:06d}"
|
|
self.labels['frame_time'].text = f"{self.frame.time - self.first_time: >10.2f}s"
|
|
self.labels['frame_latency'].text = f"{self.frame.time - time.time():.2f}s"
|
|
|
|
if self.tracker_frame:
|
|
self.labels['tracker_idx'].text = f"{self.tracker_frame.index - self.frame.index}"
|
|
self.labels['tracker_time'].text = f"{self.tracker_frame.time - time.time():.3f}s"
|
|
self.labels['track_len'].text = f"{len(self.tracker_frame.tracks)} tracks"
|
|
|
|
if self.prediction_frame:
|
|
self.labels['pred_idx'].text = f"{self.prediction_frame.index - self.frame.index}"
|
|
self.labels['pred_time'].text = f"{self.prediction_frame.time - time.time():.3f}s"
|
|
# self.labels['track_len'].text = f"{len(self.prediction_frame.tracks)} tracks"
|
|
|
|
def start_writer(self):
|
|
if not self.config.output_dir.exists():
|
|
raise FileNotFoundError("Path does not exist")
|
|
|
|
date_str = datetime.datetime.now().isoformat(timespec="minutes")
|
|
filename = self.config.output_dir / f"render_predictions-{date_str}-{self.config.detector}.mp4"
|
|
logger.info(f"Write to {filename}")
|
|
|
|
return FrameWriter(str(filename), self.fps, None)
|
|
|
|
# fourcc = cv2.VideoWriter_fourcc(*'vp09')
|
|
|
|
# return cv2.VideoWriter(str(filename), fourcc, self.fps, self.frame_size)
|
|
|
|
def start_streaming(self, frame_size=(1920,1080)):
|
|
return (
|
|
ffmpeg
|
|
.input('pipe:', format='rawvideo',codec="rawvideo", pix_fmt='bgr24', s='{}x{}'.format(*frame_size))
|
|
.output(
|
|
self.config.render_url,
|
|
#codec = "copy", # use same codecs of the original video
|
|
codec='libx264',
|
|
listen=1, # enables HTTP server
|
|
pix_fmt="yuv420p",
|
|
preset="ultrafast",
|
|
tune="zerolatency",
|
|
# g=f"{self.fps*2}",
|
|
g=f"{60*2}",
|
|
analyzeduration="2000000",
|
|
probesize="1000000",
|
|
f='mpegts'
|
|
)
|
|
.overwrite_output()
|
|
.run_async(pipe_stdin=True)
|
|
)
|
|
# return process
|
|
|
|
def run(self):
|
|
self.frame = None
|
|
prediction_frame = None
|
|
tracker_frame = None
|
|
|
|
i=0
|
|
first_time = None
|
|
|
|
cv2.namedWindow("frame", cv2.WINDOW_NORMAL)
|
|
# https://gist.github.com/ronekko/dc3747211543165108b11073f929b85e
|
|
cv2.moveWindow("frame", 0, -1)
|
|
if self.config.full_screen:
|
|
cv2.setWindowProperty("frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)
|
|
|
|
cv2.setMouseCallback('frame',self.click_print_position)
|
|
# bgsub = cv2.createBackgroundSubtractorMOG2(120, 50, detectShadows=True)
|
|
|
|
while self.run_loop():
|
|
i += 1
|
|
|
|
# zmq_ev = self.frame_sock.poll(timeout=2000)
|
|
# if not zmq_ev:
|
|
# # when no data comes in, loop so that is_running is checked
|
|
# continue
|
|
|
|
try:
|
|
self.frame: Frame = self.frame_sock.recv_pyobj(zmq.NOBLOCK)
|
|
except zmq.ZMQError as e:
|
|
# idx = frame.index if frame else "NONE"
|
|
# logger.debug(f"reuse video frame {idx}")
|
|
pass
|
|
# else:
|
|
# logger.debug(f'new video frame {frame.index}')
|
|
|
|
|
|
if self.frame is None:
|
|
# might need to wait a few iterations before first frame comes available
|
|
time.sleep(.1)
|
|
continue
|
|
|
|
try:
|
|
prediction_frame: Frame = self.prediction_sock.recv_pyobj(zmq.NOBLOCK)
|
|
for track_id, track in prediction_frame.tracks.items():
|
|
prediction_id = f"{track_id}-{track.history[-1].frame_nr}"
|
|
self.predictions[prediction_id] = track
|
|
except zmq.ZMQError as e:
|
|
logger.debug(f'reuse prediction')
|
|
|
|
try:
|
|
tracker_frame: Frame = self.tracker_sock.recv_pyobj(zmq.NOBLOCK)
|
|
|
|
for track_id, track in tracker_frame.tracks.items():
|
|
self.tracks[track_id] = track
|
|
except zmq.ZMQError as e:
|
|
logger.debug(f'reuse tracks')
|
|
|
|
try:
|
|
self.detections = self.detector_sock.recv_pyobj(zmq.NOBLOCK)
|
|
# print('detections')
|
|
except zmq.ZMQError as e:
|
|
# print('no detections')
|
|
# idx = frame.index if frame else "NONE"
|
|
# logger.debug(f"reuse video frame {idx}")
|
|
pass
|
|
|
|
if first_time is None:
|
|
first_time = self.frame.time
|
|
|
|
# img = frame.img
|
|
# save_file = Path("videos/snap.png")
|
|
# if not save_file.exists():
|
|
# img = frame.camera.img_to_world(frame.img, 100)
|
|
# cv2.imwrite(save_file, img)
|
|
|
|
img = decorate_frame(self.frame, tracker_frame, prediction_frame, first_time, self.config, self.tracks, self.predictions, self.detections, self.config.render_clusters, self.debug_lines, self.scale)
|
|
|
|
logger.debug(f"write frame {self.frame.time - first_time:.3f}s")
|
|
if self.out_writer:
|
|
self.out_writer.write(img)
|
|
if self.streaming_process:
|
|
self.streaming_process.stdin.write(img.tobytes())
|
|
if not self.config.no_window:
|
|
cv2.imshow('frame',cv2.resize(img, (1920, 1080)))
|
|
# cv2.imshow('frame',img)
|
|
cv2.waitKey(10)
|
|
|
|
# clear out old tracks & predictions:
|
|
|
|
for track_id, track in list(self.tracks.items()):
|
|
if get_animation_position(track, self.frame) == 1:
|
|
self.tracks.pop(track_id)
|
|
for prediction_id, track in list(self.predictions.items()):
|
|
if get_animation_position(track, self.frame) == 1:
|
|
self.predictions.pop(prediction_id)
|
|
|
|
logger.info('Stopping')
|
|
|
|
# if i>2:
|
|
if self.streaming_process:
|
|
self.streaming_process.stdin.close()
|
|
if self.out_writer:
|
|
self.out_writer.release()
|
|
if self.streaming_process:
|
|
# oddly wrapped, because both close and release() take time.
|
|
logger.info('wait for closing stream')
|
|
self.streaming_process.wait()
|
|
|
|
logger.info('stopped')
|
|
|
|
|
|
@classmethod
|
|
def arg_parser(cls):
|
|
render_parser = ArgumentParser()
|
|
render_parser.add_argument('--zmq-frame-addr',
|
|
help='Manually specity communication addr for the frame messages',
|
|
type=str,
|
|
default="ipc:///tmp/feeds_frame")
|
|
render_parser.add_argument('--zmq-trajectory-addr',
|
|
help='Manually specity communication addr for the trajectory messages',
|
|
type=str,
|
|
default="ipc:///tmp/feeds_traj")
|
|
|
|
render_parser.add_argument('--zmq-detection-addr',
|
|
help='Manually specity communication addr for the detection messages',
|
|
type=str,
|
|
default="ipc:///tmp/feeds_dets")
|
|
|
|
render_parser.add_argument('--zmq-prediction-addr',
|
|
help='Manually specity communication addr for the prediction messages',
|
|
type=str,
|
|
default="ipc:///tmp/feeds_preds")
|
|
|
|
render_parser.add_argument("--render-file",
|
|
help="Render a video file previewing the prediction, and its delay compared to the current frame",
|
|
action='store_true')
|
|
render_parser.add_argument("--no-window",
|
|
help="Disable a previewing to a window",
|
|
action='store_true')
|
|
|
|
render_parser.add_argument("--full-screen",
|
|
help="Set Window full screen",
|
|
action='store_true')
|
|
render_parser.add_argument("--render-clusters",
|
|
help="renders arrowd clusters instead of individual predictions",
|
|
action='store_true')
|
|
|
|
render_parser.add_argument("--render-url",
|
|
help="""Stream renderer on given URL. Two easy approaches:
|
|
- using zmq wrapper one can specify the LISTENING ip. To listen to any incoming connection: zmq:tcp://0.0.0.0:5556
|
|
- alternatively, using e.g. UDP one needs to specify the IP of the client. E.g. udp://100.69.123.91:5556/stream
|
|
Note that with ZMQ you can have multiple clients connecting simultaneously. E.g. using `ffplay zmq:tcp://100.109.175.82:5556`
|
|
When using udp, connecting can be done using `ffplay udp://100.109.175.82:5556/stream`
|
|
""",
|
|
type=str,
|
|
default=None)
|
|
render_parser.add_argument('--debug-map',
|
|
help='specify a map (svg-file) from which to load lines which will be overlayed',
|
|
type=str,
|
|
default="../DATASETS/hof3/map_hof.svg")
|
|
return render_parser
|
|
|
|
def click_print_position(self, event,x,y,flags,param):
|
|
|
|
# if event == cv2.EVENT_LBUTTONDBLCLK:
|
|
if event == cv2.EVENT_LBUTTONUP:
|
|
if not self.frame:
|
|
return
|
|
scale = 100
|
|
print("click position:", x/scale, y/scale)
|
|
# self.frame.camera.points_img_to_world([[x, y]], 1)
|
|
# cv2.circle(img,(x,y),100,(255,0,0),-1)
|
|
mouseX,mouseY = x,y
|
|
|
|
# colorset = itertools.product([0,255], repeat=3) # but remove white
|
|
# colorset = [(0, 0, 0),
|
|
# (0, 0, 255),
|
|
# (0, 255, 0),
|
|
# (0, 255, 255),
|
|
# (255, 0, 0),
|
|
# (255, 0, 255),
|
|
# (255, 255, 0)
|
|
# ]
|
|
colorset = [
|
|
(255,255,100),
|
|
(255,100,255),
|
|
(100,255,255),
|
|
]
|
|
# colorset = [
|
|
# (0,0,0),
|
|
# ]
|
|
|
|
def get_animation_position(track: Track, current_frame: Frame):
|
|
fade_duration = current_frame.camera.fps * 3
|
|
diff = current_frame.index - track.history[-1].frame_nr
|
|
return max(0, min(1, diff / fade_duration))
|
|
# track.history[-1].frame_nr < (current_frame.index - current_frame.camera.fps * 3)
|
|
# track.history[-1].frame_nr < (current_frame.index - current_frame.camera.fps * 3)
|
|
|
|
|
|
|
|
|
|
|
|
def decorate_frame(frame: Frame, tracker_frame: Frame, prediction_frame: Frame, first_time: float, config: Namespace, tracks: Dict[str, Track], predictions: Dict[str, Track], detections: Optional[List[Detection]], as_clusters = True, debug_lines = [], scale: float = 100) -> np.array:
|
|
|
|
# TODO: replace opencv with QPainter to support alpha? https://doc.qt.io/qtforpython-5/PySide2/QtGui/QPainter.html#PySide2.QtGui.PySide2.QtGui.QPainter.drawImage
|
|
# or https://github.com/pygobject/pycairo?tab=readme-ov-file
|
|
# or https://pyglet.readthedocs.io/en/latest/programming_guide/shapes.html
|
|
# and use http://code.astraw.com/projects/motmot/pygarrayimage.html or https://gist.github.com/nkymut/1cb40ea6ae4de0cf9ded7332f1ca0d55
|
|
# or https://api.arcade.academy/en/stable/index.html (supports gradient color in line -- "Arcade is built on top of Pyglet and OpenGL.")
|
|
dst_img = frame.camera.img_to_world(frame.img, scale)
|
|
# mask = bg_subtractor.apply(dst_img)
|
|
# mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB).astype(float) / 255
|
|
# dst_img = dst_img * mask
|
|
|
|
# undistorted_img = cv2.undistort(frame.img, config.camera.mtx, config.camera.dist, None, config.camera.newcameramtx)
|
|
# dst_img = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H),(config.camera.w,config.camera.h))
|
|
# dst_img2 = cv2.warpPerspective(undistorted_img,convert_world_space_to_img_space(config.camera.H), None)
|
|
# cv2.imwrite('/home/ruben/suspicion/DATASETS/hof3/camera2.png', dst_img2)
|
|
|
|
overlay = np.zeros(dst_img.shape, np.uint8)
|
|
# Fill image with red color(set each pixel to red)
|
|
overlay[:] = (0, 0, 0)
|
|
|
|
# img = cv2.addWeighted(dst_img, .2, overlay, .3, 0)
|
|
img = dst_img.copy()
|
|
|
|
# all not working:
|
|
# if i == 1:
|
|
# # thanks to GpG for fixing scaling issue: https://stackoverflow.com/a/39668864
|
|
# scale_factor = 1./20 # from 10m to 1000px
|
|
# S = np.array([[scale_factor, 0,0],[0,scale_factor,0 ],[ 0,0,1 ]])
|
|
# new_H = S * self.H * np.linalg.inv(S)
|
|
# warpedFrame = cv2.warpPerspective(img, new_H, (1000,1000))
|
|
# cv2.imwrite(str(self.config.output_dir / "orig.png"), warpedFrame)
|
|
cv2.rectangle(img, (0,0), (img.shape[1],25), (0,0,0), -1)
|
|
|
|
if detections:
|
|
for detection in detections:
|
|
points = [
|
|
detection.get_foot_coords(),
|
|
[detection.l, detection.t],
|
|
[detection.l + detection.w, detection.t + detection.h],
|
|
]
|
|
points = tracker_frame.camera.points_img_to_world(points, scale)
|
|
points = [to_point(p) for p in points] # to int
|
|
|
|
w = points[1][0]-points[2][0]
|
|
feet = [int(points[2][0] + .5 * w), points[2][1]]
|
|
cv2.rectangle(img, points[1], points[2], (255,255,0), 2)
|
|
cv2.circle(img, points[0], 5, (255,255,0), 2)
|
|
cv2.putText(img, f"{detection.conf:.02f}", (points[0][0], points[0][1]+20), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,0), 1)
|
|
|
|
|
|
def conversion(points):
|
|
return convert_world_points_to_img_points(points, scale)
|
|
|
|
if not tracker_frame:
|
|
cv2.putText(img, f"and track", (650,17), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,0), 1)
|
|
else:
|
|
for track_id, track in tracks.items():
|
|
inv_H = np.linalg.pinv(tracker_frame.H)
|
|
draw_track_projected(img, track, int(track_id), tracker_frame.camera, conversion)
|
|
|
|
for line in debug_lines:
|
|
for rp1, rp2 in zip(line.points, line.points[1:]):
|
|
p1 = (
|
|
int(rp1.position[0]*scale),
|
|
int(rp1.position[1]*scale),
|
|
)
|
|
p2 = (
|
|
int(rp2.position[0]*scale),
|
|
int(rp2.position[1]*scale),
|
|
)
|
|
cv2.line(img, p1, p2, (255,0,0), 2)
|
|
# points = [(int(point[0]*scale), int(point[1]*scale)) for point in points]
|
|
|
|
# for num, points in enumerate(frame.camera.debug_lines):
|
|
# cv2.line(img, points[0], points[1], (255,0,0), 2)
|
|
|
|
|
|
|
|
# if hasattr(frame.camera, 'debug_points'):
|
|
# for num, point in enumerate(frame.camera.debug_points):
|
|
# cv2.circle(img, (int(point[0]*scale), int(point[1]*scale)), 5, (255,0,0), 2)
|
|
# cv2.putText(img, f"{num}", (int(point[0]*scale)+20, int(point[1]*scale)), cv2.FONT_HERSHEY_PLAIN, 1, (255,0,0), 1)
|
|
|
|
if not prediction_frame:
|
|
cv2.putText(img, f"Waiting for prediction...", (500,17), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,0), 1)
|
|
# continue
|
|
else:
|
|
for track_id, track in predictions.items():
|
|
inv_H = np.linalg.pinv(prediction_frame.H)
|
|
# For debugging:
|
|
# draw_trackjectron_history(img, track, int(track.track_id), conversion)
|
|
anim_position = get_animation_position(track, frame)
|
|
draw_track_predictions(img, track, int(track.track_id)+1, frame.camera, conversion, anim_position=anim_position, as_clusters=as_clusters)
|
|
cv2.putText(img, f"{len(track.predictor_history) if track.predictor_history else 'none'}", to_point(track.history[0].get_foot_coords()), cv2.FONT_HERSHEY_COMPLEX, 1, (255,255,255), 1)
|
|
if prediction_frame.maps:
|
|
for i, m in enumerate(prediction_frame.maps):
|
|
map_img = np.ascontiguousarray(np.flipud(np.transpose(m[0], (2, 1, 0))*255), np.uint8)
|
|
cv2.circle(map_img, (10,50), 5, (0,255,0), 2)
|
|
cv2.line(map_img, (10,50), (10+15, 50), (0,0,255), 2)
|
|
cv2.rectangle(map_img, (0,0), (map_img.shape[1]-1, map_img.shape[0]-1), (255,255,255), 1)
|
|
|
|
height, width, _ = map_img.shape
|
|
padding= 50
|
|
y = img.shape[0] - padding - height
|
|
x = width*i
|
|
|
|
if x+width > img.shape[1]:
|
|
break # stop drawing maps when there's a lot of them
|
|
|
|
img[y:y+height,x:x+width] = map_img
|
|
|
|
|
|
|
|
base_color = (255,)*3
|
|
info_color = (255,255,0)
|
|
predictor_color = (255,0,255)
|
|
tracker_color = (0,255,255)
|
|
|
|
cv2.putText(img, f"{frame.index:06d}", (20,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1)
|
|
cv2.putText(img, f"{frame.time - first_time: >10.2f}s", (150,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1)
|
|
cv2.putText(img, f"{frame.time - time.time():.2f}s", (250,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1)
|
|
|
|
options = []
|
|
|
|
if prediction_frame:
|
|
# render Δt and Δ frames
|
|
cv2.putText(img, f"{tracker_frame.index - frame.index}", (90,17), cv2.FONT_HERSHEY_PLAIN, 1, tracker_color, 1)
|
|
cv2.putText(img, f"{prediction_frame.index - frame.index}", (120,17), cv2.FONT_HERSHEY_PLAIN, 1, predictor_color, 1)
|
|
cv2.putText(img, f"{tracker_frame.time - time.time():.2f}s", (310,17), cv2.FONT_HERSHEY_PLAIN, 1, tracker_color, 1)
|
|
cv2.putText(img, f"{prediction_frame.time - time.time():.2f}s", (380,17), cv2.FONT_HERSHEY_PLAIN, 1, predictor_color, 1)
|
|
cv2.putText(img, f"{len(tracker_frame.tracks)} tracks", (620,17), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1)
|
|
cv2.putText(img, f"h: {np.average([len(t.history or []) for t in prediction_frame.tracks.values()]):.2f}", (700,17), cv2.FONT_HERSHEY_PLAIN, 1, tracker_color, 1)
|
|
cv2.putText(img, f"ph: {np.average([len(t.predictor_history or []) for t in prediction_frame.tracks.values()]):.2f}", (780,17), cv2.FONT_HERSHEY_PLAIN, 1, predictor_color, 1)
|
|
cv2.putText(img, f"p: {np.average([len(t.predictions or []) for t in prediction_frame.tracks.values()]):.2f}", (860,17), cv2.FONT_HERSHEY_PLAIN, 1, predictor_color, 1)
|
|
|
|
|
|
for option, value in prediction_frame.log['predictor'].items():
|
|
options.append(f"{option}: {value}")
|
|
|
|
if len(options):
|
|
cv2.putText(img, options.pop(-1), (20,img.shape[0]-30), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1)
|
|
cv2.putText(img, " | ".join(options), (20,img.shape[0]-10), cv2.FONT_HERSHEY_PLAIN, 1, base_color, 1)
|
|
|
|
return img
|
|
|
|
|
|
def run_cv_renderer(config: Namespace, is_running: BaseEvent, timer_counter):
|
|
renderer = CvRenderer(config, is_running)
|
|
renderer.run(timer_counter) |