trap/trap/frame_emitter.py
2024-11-28 16:08:55 +01:00

393 lines
No EOL
14 KiB
Python

from __future__ import annotations
from argparse import Namespace
from dataclasses import dataclass, field
import dataclasses
from enum import IntFlag
from itertools import cycle
import json
import logging
from multiprocessing import Event
from pathlib import Path
import pickle
import sys
import time
from typing import Iterable, List, Optional
import numpy as np
import cv2
import pandas as pd
import zmq
import os
from deep_sort_realtime.deep_sort.track import Track as DeepsortTrack
from deep_sort_realtime.deep_sort.track import TrackState as DeepsortTrackState
from bytetracker.byte_tracker import STrack as ByteTrackTrack
from bytetracker.basetrack import TrackState as ByteTrackTrackState
from trajectron.environment import Environment, Node, Scene
from urllib.parse import urlparse
from trap.utils import lerp
logger = logging.getLogger('trap.frame_emitter')
class DataclassJSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, np.ndarray):
return o.tolist()
if dataclasses.is_dataclass(o):
d = dataclasses.asdict(o)
if isinstance(o, Frame):
# Don't send images over JSON
del d['img']
return d
return super().default(o)
class UrlOrPath():
def __init__(self, string):
self.url = urlparse(str(string))
def __str__(self) -> str:
return self.url.geturl()
def is_url(self) -> bool:
return len(self.url.netloc) > 0
def path(self) -> Path:
if self.is_url():
return Path(self.url.path)
return Path(self.url.geturl()) # can include scheme, such as C:/
class DetectionState(IntFlag):
Tentative = 1 # state before n_init (see DeepsortTrack)
Confirmed = 2 # after tentative
Lost = 4 # lost when DeepsortTrack.time_since_update > 0 but not Deleted
@classmethod
def from_deepsort_track(cls, track: DeepsortTrack):
if track.state == DeepsortTrackState.Tentative:
return cls.Tentative
if track.state == DeepsortTrackState.Confirmed:
if track.time_since_update > 0:
return cls.Lost
return cls.Confirmed
raise RuntimeError("Should not run into Deleted entries here")
@classmethod
def from_bytetrack_track(cls, track: ByteTrackTrack):
if track.state == ByteTrackTrackState.New:
return cls.Tentative
if track.state == ByteTrackTrackState.Lost:
return cls.Lost
# if track.time_since_update > 0:
if track.state == ByteTrackTrackState.Tracked:
return cls.Confirmed
raise RuntimeError("Should not run into Deleted entries here")
@dataclass
class Camera:
mtx: cv2.Mat
dist: cv2.Mat
w: float
h: float
H: cv2.Mat # homography
newcameramtx: cv2.Mat = field(init=False)
roi: cv2.typing.Rect = field(init=False)
fps: float
def __post_init__(self):
self.newcameramtx, self.roi = cv2.getOptimalNewCameraMatrix(self.mtx, self.dist, (self.w,self.h), 1, (self.w,self.h))
# def __init__(self, mtx, dist, w, h, H):
# self.mtx = mtx
# self.dist = dist
# self.w = w
# self.h = h
# self.newcameramtx, self.roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h))
# self.H = H # homography
@dataclass
class Detection:
track_id: str # deepsort track id association
l: int # left - image space
t: int # top - image space
w: int # width - image space
h: int # height - image space
conf: float # object detector probablity
state: DetectionState
frame_nr: int
det_class: str
def get_foot_coords(self) -> list[tuple[float, float]]:
return [self.l + 0.5 * self.w, self.t+self.h]
@classmethod
def from_deepsort(cls, dstrack: DeepsortTrack, frame_nr: int):
return cls(dstrack.track_id, *dstrack.to_ltwh(), dstrack.det_conf, DetectionState.from_deepsort_track(dstrack), frame_nr, dstrack.det_class)
@classmethod
def from_bytetrack(cls, bstrack: ByteTrackTrack, frame_nr: int):
return cls(bstrack.track_id, *bstrack.tlwh, bstrack.score, DetectionState.from_bytetrack_track(bstrack), frame_nr, bstrack.cls)
def get_scaled(self, scale: float = 1):
if scale == 1:
return self
return Detection(
self.track_id,
self.l*scale,
self.t*scale,
self.w*scale,
self.h*scale,
self.conf,
self.state,
self.frame_nr,
self.det_class)
def to_ltwh(self):
return (int(self.l), int(self.t), int(self.w), int(self.h))
def to_ltrb(self):
return (int(self.l), int(self.t), int(self.l+self.w), int(self.t+self.h))
@dataclass
class Track:
"""A bit of an haphazardous wrapper around the 'real' tracker to provide
a history, with which the predictor can work, as we then can deduce velocity
and acceleration.
"""
track_id: str = None
history: List[Detection] = field(default_factory=lambda: [])
predictor_history: Optional[list] = None # in image space
predictions: Optional[list] = None
fps: int = 12
def get_projected_history(self, H, camera: Optional[Camera]= None) -> np.array:
foot_coordinates = [d.get_foot_coords() for d in self.history]
# TODO)) Undistort points before perspective transform
if len(foot_coordinates):
if camera:
coords = cv2.undistortPoints(np.array([foot_coordinates]).astype('float32'), camera.mtx, camera.dist, None, camera.newcameramtx)
coords = cv2.perspectiveTransform(np.array(coords),camera.H)
return coords.reshape((coords.shape[0],2))
else:
coords = cv2.perspectiveTransform(np.array([foot_coordinates]),H)
return coords[0]
return np.array([])
def get_projected_history_as_dict(self, H, camera: Optional[Camera]= None) -> dict:
coords = self.get_projected_history(H, camera)
return [{"x":c[0], "y":c[1]} for c in coords]
def get_with_interpolated_history(self) -> Track:
# new_history = [Detection(d.track_id, l, t, w, h, d.conf, d.state, d.frame_nr, d.det_class) for l, t, w, h, d in zip(ls,ts,ws,hs, track.history)]
# new_track = Track(track.track_id, new_history, track.predictor_history, track.predictions)
new_history = []
for j in range(len(self.history)-1):
a = self.history[j]
b = self.history[j+1]
gap = b.frame_nr - a.frame_nr
new_history.append(Detection(a.track_id, a.l, a.t, a.w, a.h, a.conf, a.state, a.frame_nr, a.det_class))
if gap < 1:
logger.error(f"WARNING, gap between frames {a.frame_nr} -> {b.frame_nr} is negative?")
if gap > 1:
for g in range(1, gap):
l = lerp(a.l, b.l, g/gap)
t = lerp(a.t, b.t, g/gap)
w = lerp(a.w, b.w, g/gap)
h = lerp(a.h, b.h, g/gap)
conf = 0
state = DetectionState.Lost
frame_nr = a.frame_nr + g
new_history.append(Detection(a.track_id, l, t, w, h, conf, state, frame_nr, a.det_class))
return Track(
self.track_id,
new_history,
self.predictor_history,
self.predictions,
self.fps)
def to_trajectron_node(self, camera: Camera, env: Environment) -> Node:
positions = self.get_projected_history(None, camera)
velocity = np.gradient(positions, self.fps, axis=0)
acceleration = np.gradient(velocity, self.fps, axis=0)
new_first_idx = self.history[0].frame_nr
data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
# vx = derivative_of(x, scene.dt)
# vy = derivative_of(y, scene.dt)
# ax = derivative_of(vx, scene.dt)
# ay = derivative_of(vy, scene.dt)
data_dict = {
('position', 'x'): positions[:,0],
('position', 'y'): positions[:,1],
('velocity', 'x'): velocity[:,0],
('velocity', 'y'): velocity[:,1],
('acceleration', 'x'): acceleration[:,0],
('acceleration', 'y'): acceleration[:,1]
}
node_data = pd.DataFrame(data_dict, columns=data_columns)
return Node(node_type=env.NodeType.PEDESTRIAN, node_id=self.track_id, data=node_data, first_timestep=new_first_idx)
@dataclass
class Frame:
index: int
img: np.array
time: float= field(default_factory=lambda: time.time())
tracks: Optional[dict[str, Track]] = None
H: Optional[np.array] = None
camera: Optional[Camera] = None
def aslist(self) -> [dict]:
return { t.track_id:
{
'id': t.track_id,
'history': t.get_projected_history(self.H).tolist(),
'det_conf': t.history[-1].conf,
# 'det_conf': trajectory_data[node.id]['det_conf'],
# 'bbox': trajectory_data[node.id]['bbox'],
# 'history': history.tolist(),
'predictions': t.predictions
} for t in self.tracks.values()
}
def video_src_from_config(config) -> UrlOrPath:
if config.video_loop:
video_srcs: Iterable[UrlOrPath] = cycle(config.video_src)
else:
video_srcs: Iterable[UrlOrPath] = config.video_src
return video_srcs
class FrameEmitter:
'''
Emit frame in a separate threat so they can be throttled,
or thrown away when the rest of the system cannot keep up
'''
def __init__(self, config: Namespace, is_running: Event) -> None:
self.config = config
self.is_running = is_running
context = zmq.Context()
# TODO: to make things faster, a multiprocessing.Array might be a tad faster: https://stackoverflow.com/a/65201859
self.frame_sock = context.socket(zmq.PUB)
self.frame_sock.setsockopt(zmq.CONFLATE, 1) # only keep latest frame. make sure to set BEFORE connect/bind
self.frame_sock.bind(config.zmq_frame_addr)
logger.info(f"Connection socket {config.zmq_frame_addr}")
self.video_srcs = video_src_from_config(self.config)
def emit_video(self):
i = 0
delay_generation = False
for video_path in self.video_srcs:
logger.info(f"Play from '{str(video_path)}'")
if str(video_path).isdigit():
# numeric input is a CV camera
video = cv2.VideoCapture(int(str(video_path)))
# TODO: make config variables
video.set(cv2.CAP_PROP_FRAME_WIDTH, int(self.config.camera.w))
video.set(cv2.CAP_PROP_FRAME_HEIGHT, int(self.config.camera.h))
print("exposure!", video.get(cv2.CAP_PROP_AUTO_EXPOSURE))
video.set(cv2.CAP_PROP_FPS, 5)
fps=5
elif video_path.url.scheme == 'rtsp':
gst = f"rtspsrc location={video_path} latency=0 buffer-mode=auto ! decodebin ! videoconvert ! appsink max-buffers=1 drop=true"
logger.info(f"Capture gstreamer (gst-launch-1.0): {gst}")
video = cv2.VideoCapture(gst, cv2.CAP_GSTREAMER)
fps=12
else:
# os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "fflags;nobuffer|flags;low_delay|avioflags;direct|rtsp_transport;udp"
video = cv2.VideoCapture(str(video_path))
delay_generation = True
fps = video.get(cv2.CAP_PROP_FPS)
target_frame_duration = 1./fps
logger.info(f"Emit frames at {fps} fps")
if self.config.video_offset:
logger.info(f"Start at frame {self.config.video_offset}")
video.set(cv2.CAP_PROP_POS_FRAMES, self.config.video_offset)
i = self.config.video_offset
# if '-' in video_path.path().stem:
# path_stem = video_path.stem[:video_path.stem.rfind('-')]
# else:
# path_stem = video_path.stem
# path_stem += "-homography"
# homography_path = video_path.with_stem(path_stem).with_suffix('.txt')
# logger.info(f'check homography file {homography_path}')
# if homography_path.exists():
# logger.info(f'Found custom homography file! Using {homography_path}')
# video_H = np.loadtxt(homography_path, delimiter=',')
# else:
# video_H = None
video_H = self.config.camera.H
prev_time = time.time()
while self.is_running.is_set():
ret, img = video.read()
# seek to 0 if video has finished. Infinite loop
if not ret:
# now loading multiple files
break
# video.set(cv2.CAP_PROP_POS_FRAMES, 0)
# ret, img = video.read()
# assert ret is not False # not really error proof...
if "DATASETS/hof/" in str(video_path):
# hack to mask out area
cv2.rectangle(img, (0,0), (800,200), (0,0,0), -1)
frame = Frame(index=i, img=img, H=self.config.H, camera=self.config.camera)
# TODO: this is very dirty, need to find another way.
# perhaps multiprocessing Array?
self.frame_sock.send(pickle.dumps(frame))
# only delay consuming the next frame when using a file.
# Otherwise, go ASAP
if delay_generation:
# defer next loop
now = time.time()
time_diff = (now - prev_time)
if time_diff < target_frame_duration:
time.sleep(target_frame_duration - time_diff)
now += target_frame_duration - time_diff
prev_time = now
i += 1
if not self.is_running.is_set():
# if not running, also break out of infinite generator loop
break
logger.info("Stopping")
def run_frame_emitter(config: Namespace, is_running: Event):
router = FrameEmitter(config, is_running)
router.emit_video()
is_running.clear()