Gstreamer for rtsp

This commit is contained in:
Ruben van de Ven 2024-11-07 15:00:05 +01:00
parent 9284ce8849
commit 0af5030845
9 changed files with 335 additions and 62 deletions

22
README.md Normal file
View file

@ -0,0 +1,22 @@
# Trajectory Prediction Video installation
## Install
* Run `bash build_opencv_with_gstreamer.sh` to build opencv with gstreamer support
* Use pyenv + poetry to install
## How to
> See also the sibling repo [traptools](https://git.rubenvandeven.com/security_vision/traptools) for camera calibration and homography tools that are needed for this repo.
These are roughly the steps to go from datagathering to training
1. Make sure to have some recordings with a fixed camera.
* Recording can be done with `ffmpeg -rtsp_transport udp -i rtsp://USER:PASS@IP:554/Streaming/Channels/1.mp4 hof2-cam-$(date "+%Y%m%d-%H%M").mp4`
2. Follow the steps in the auxilary [traptools](https://git.rubenvandeven.com/security_vision/traptools) repository to obtain (1) camera matrix, lens distortion, image dimensions, and (2+3) homography
3. Run the tracker, e.g. `poetry run tracker --detector ultralytics --homography ../DATASETS/NAME/homography.json --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/`
4. Parse tracker data to Trajectron format: `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME`
5. Train Trajectron model `poetry run trajectron_train --eval_every 10 --vis_every 1 --train_data_dict NAME_train.pkl --eval_data_dict NAME_val.pkl --offline_scene_graph no --preprocess_workers 8 --log_dir EXPERIMENTS/models --log_tag _NAME --train_epochs 100 --conf EXPERIMENTS/config.json --batch_size 256 --data_dir EXPERIMENTS/trajectron-data `
6. The run!
* On a video file (you can use a wildcard) `DISPLAY=:1 poetry run trapserv --remote-log-addr 100.69.123.91 --eval_device cuda:0 --detector ultralytics --homography ../DATASETS/NAME/homography.json --video-src ../DATASETS/NAME/*.mp4 --model_dir EXPERIMENTS/models/models_DATE_NAME/--smooth-predictions --num-samples 3 --render-window --calibration ../DATASETS/NAME/calibration.json` (the DISPLAY environment variable is used here to running over SSH connection and display on local monitor)
* or on the RTSP stream. Which uses gstreamer to substantially reduce latency compared to the default ffmpeg bindings in OpenCV.

22
poetry.lock generated
View file

@ -1801,27 +1801,25 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
[[package]]
name = "opencv-python"
version = "4.8.1.78"
version = "4.10.0.84"
description = "Wrapper package for OpenCV python bindings."
optional = false
python-versions = ">=3.6"
files = [
{file = "opencv-python-4.8.1.78.tar.gz", hash = "sha256:cc7adbbcd1112877a39274106cb2752e04984bc01a031162952e97450d6117f6"},
{file = "opencv_python-4.8.1.78-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:91d5f6f5209dc2635d496f6b8ca6573ecdad051a09e6b5de4c399b8e673c60da"},
{file = "opencv_python-4.8.1.78-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:bc31f47e05447da8b3089faa0a07ffe80e114c91ce0b171e6424f9badbd1c5cd"},
{file = "opencv_python-4.8.1.78-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9814beca408d3a0eca1bae7e3e5be68b07c17ecceb392b94170881216e09b319"},
{file = "opencv_python-4.8.1.78-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c406bdb41eb21ea51b4e90dfbc989c002786c3f601c236a99c59a54670a394"},
{file = "opencv_python-4.8.1.78-cp37-abi3-win32.whl", hash = "sha256:a7aac3900fbacf55b551e7b53626c3dad4c71ce85643645c43e91fcb19045e47"},
{file = "opencv_python-4.8.1.78-cp37-abi3-win_amd64.whl", hash = "sha256:b983197f97cfa6fcb74e1da1802c7497a6f94ed561aba6980f1f33123f904956"},
{file = "opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl", hash = "sha256:c1f8e6ba7fd82517ba97d352f51d161c5be51495dc7b6c6f929a8546d650f4ea"},
]
[package.dependencies]
numpy = [
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
]
[package.source]
type = "file"
url = "opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl"
[[package]]
name = "orjson"
version = "3.9.10"
@ -1939,8 +1937,8 @@ files = [
[package.dependencies]
numpy = [
{version = ">=1.22.4,<2", markers = "python_version < \"3.11\""},
{version = ">=1.23.2,<2", markers = "python_version == \"3.11\""},
{version = ">=1.22.4,<2", markers = "python_version < \"3.11\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
@ -3324,7 +3322,7 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,
[[package]]
name = "trajectron-plus-plus"
version = "0.1.1"
description = "Predict trajectories for anomaly detection"
description = "This repository contains the code for Trajectron++: Dynamically-Feasible Trajectory Forecasting With Heterogeneous Data by Tim Salzmann*, Boris Ivanovic*, Punarjay Chakravarty, and Marco Pavone (* denotes equal contribution)."
optional = false
python-versions = "^3.9,<3.12"
files = []
@ -3542,4 +3540,4 @@ watchdog = ["watchdog (>=2.3)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10,<3.12,"
content-hash = "bffa0878a620996b47aa5623b951f09ab010c267880c6dcd5a53741f244e675a"
content-hash = "e92dc4bbdd22d5a5ebe5910f6cef1a45c7796e632fb6cb3debfc16f7b89b4972"

View file

@ -8,6 +8,7 @@ readme = "README.md"
[tool.poetry.scripts]
trapserv = "trap.plumber:start"
tracker = "trap.tools:tracker_preprocess"
process_data = "trap.process_data:main"
[tool.poetry.dependencies]
@ -34,6 +35,7 @@ pandas-helper-calc = {git = "https://github.com/scls19fr/pandas-helper-calc"}
tsmoothie = "^1.0.5"
pyglet = "^2.0.15"
pyglet-cornerpin = "^0.2.0"
opencv-python = {file="./opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl"}
[build-system]
requires = ["poetry-core"]

View file

@ -57,7 +57,7 @@ class AnimationRenderer:
# TODO: get FPS from frame_emitter
# self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720))
self.fps = 60
self.frame_size = (self.config.frame_width,self.config.frame_height)
self.frame_size = (self.config.camera.w,self.config.camera.h)
self.hide_stats = False
self.out_writer = None # self.start_writer() if self.config.render_file else None
self.streaming_process = None # self.start_streaming() if self.config.render_url else None
@ -246,7 +246,7 @@ class AnimationRenderer:
img = pyglet.image.ImageData(self.frame_size[0], self.frame_size[1], 'RGB', img.tobytes())
# don't draw in batch, so that it is the background
self.video_sprite = pyglet.sprite.Sprite(img=img, batch=self.batch_bg)
self.video_sprite.opacity = 30
self.video_sprite.opacity = 100
except zmq.ZMQError as e:
# idx = frame.index if frame else "NONE"
# logger.debug(f"reuse video frame {idx}")

View file

@ -8,6 +8,7 @@ from trap.tracker import DETECTORS
from trap.frame_emitter import Camera
from pyparsing import Optional
from trap.frame_emitter import UrlOrPath
class LambdaParser(argparse.ArgumentParser):
"""Execute lambda functions
@ -85,7 +86,7 @@ class CameraAction(argparse.Action):
# 'camera_matrix': np.array(data['camera_matrix']),
# 'dist_coeff': np.array(data['dist_coeff']),
# }
camera = Camera(np.array(data['camera_matrix']), np.array(data['dist_coeff']), namespace.frame_width, namespace.frame_height)
camera = Camera(np.array(data['camera_matrix']), np.array(data['dist_coeff']), data['dim']['width'], data['dim']['height'], namespace.H)
setattr(namespace, 'camera', camera)
@ -253,10 +254,10 @@ connection_parser.add_argument('--bypass-prediction',
# Frame emitter
frame_emitter_parser.add_argument("--video-src",
help="source video to track from",
type=Path,
help="source video to track from can be either a relative or absolute path, or a url, like an RTSP resource",
type=UrlOrPath,
nargs='+',
default=lambda: list(Path('../DATASETS/VIRAT_subset_0102x/').glob('*.mp4')))
default=lambda: [UrlOrPath(p) for p in Path('../DATASETS/VIRAT_subset_0102x/').glob('*.mp4')])
frame_emitter_parser.add_argument("--video-offset",
help="Start playback from given frame. Note that when src is an array, this applies to all videos individually.",
default=None,
@ -292,14 +293,15 @@ tracker_parser.add_argument("--detector",
tracker_parser.add_argument("--smooth-tracks",
help="Smooth the tracker tracks before sending them to the predictor",
action='store_true')
tracker_parser.add_argument("--frame-width",
help="width of the frames",
type=int,
default=1280)
tracker_parser.add_argument("--frame-height",
help="height of the frames",
type=int,
default=720)
# now in calibration.json
# tracker_parser.add_argument("--frame-width",
# help="width of the frames",
# type=int,
# default=1280)
# tracker_parser.add_argument("--frame-height",
# help="height of the frames",
# type=int,
# default=720)
# Renderer

View file

@ -12,11 +12,30 @@ from typing import Iterable, Optional
import numpy as np
import cv2
import zmq
import os
from deep_sort_realtime.deep_sort.track import Track as DeepsortTrack
from deep_sort_realtime.deep_sort.track import TrackState as DeepsortTrackState
from urllib.parse import urlparse
logger = logging.getLogger('trap.frame_emitter')
class UrlOrPath():
def __init__(self, str):
self.url = urlparse(str)
def __str__(self) -> str:
return self.url.geturl()
def is_url(self) -> bool:
return len(self.url.netloc) > 0
def path(self) -> Path:
if self.is_url():
return Path(self.url.path)
return Path(self.url.geturl()) # can include scheme, such as C:/
class DetectionState(IntFlag):
Tentative = 1 # state before n_init (see DeepsortTrack)
Confirmed = 2 # after tentative
@ -33,12 +52,13 @@ class DetectionState(IntFlag):
raise RuntimeError("Should not run into Deleted entries here")
class Camera:
def __init__(self, mtx, dist, w, h):
def __init__(self, mtx, dist, w, h, H):
self.mtx = mtx
self.dist = dist
self.w = w
self.h = h
self.newcameramtx, self.roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h))
self.H = H # homography
@dataclass
@ -135,11 +155,11 @@ class Frame:
} for t in self.tracks.values()
}
def video_src_from_config(config):
def video_src_from_config(config) -> UrlOrPath:
if config.video_loop:
video_srcs: Iterable[Path] = cycle(config.video_src)
video_srcs: Iterable[UrlOrPath] = cycle(config.video_src)
else:
video_srcs: Iterable[Path] = config.video_src
video_srcs: Iterable[UrlOrPath] = config.video_src
return video_srcs
class FrameEmitter:
@ -159,23 +179,32 @@ class FrameEmitter:
logger.info(f"Connection socket {config.zmq_frame_addr}")
self.video_srcs: video_src_from_config(self.config)
self.video_srcs = video_src_from_config(self.config)
def emit_video(self):
i = 0
delay_generation = False
for video_path in self.video_srcs:
logger.info(f"Play from '{str(video_path)}'")
if str(video_path).isdigit():
# numeric input is a CV camera
video = cv2.VideoCapture(int(str(video_path)))
# TODO: make config variables
video.set(cv2.CAP_PROP_FRAME_WIDTH, int(self.config.frame_width))
video.set(cv2.CAP_PROP_FRAME_HEIGHT, int(self.config.frame_height))
video.set(cv2.CAP_PROP_FRAME_WIDTH, int(self.config.camera.w))
video.set(cv2.CAP_PROP_FRAME_HEIGHT, int(self.config.camera.h))
print("exposure!", video.get(cv2.CAP_PROP_AUTO_EXPOSURE))
video.set(cv2.CAP_PROP_FPS, 5)
fps=5
elif video_path.url.scheme == 'rtsp':
gst = f"rtspsrc location={video_path} latency=0 buffer-mode=auto ! decodebin ! videoconvert ! appsink max-buffers=1 drop=true"
logger.info(f"Capture gstreamer (gst-launch-1.0): {gst}")
video = cv2.VideoCapture(gst, cv2.CAP_GSTREAMER)
fps=12
else:
# os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "fflags;nobuffer|flags;low_delay|avioflags;direct|rtsp_transport;udp"
video = cv2.VideoCapture(str(video_path))
delay_generation = True
fps = video.get(cv2.CAP_PROP_FPS)
target_frame_duration = 1./fps
logger.info(f"Emit frames at {fps} fps")
@ -186,18 +215,20 @@ class FrameEmitter:
i = self.config.video_offset
if '-' in video_path.stem:
path_stem = video_path.stem[:video_path.stem.rfind('-')]
else:
path_stem = video_path.stem
path_stem += "-homography"
homography_path = video_path.with_stem(path_stem).with_suffix('.txt')
logger.info(f'check homography file {homography_path}')
if homography_path.exists():
logger.info(f'Found custom homography file! Using {homography_path}')
video_H = np.loadtxt(homography_path, delimiter=',')
else:
video_H = None
# if '-' in video_path.path().stem:
# path_stem = video_path.stem[:video_path.stem.rfind('-')]
# else:
# path_stem = video_path.stem
# path_stem += "-homography"
# homography_path = video_path.with_stem(path_stem).with_suffix('.txt')
# logger.info(f'check homography file {homography_path}')
# if homography_path.exists():
# logger.info(f'Found custom homography file! Using {homography_path}')
# video_H = np.loadtxt(homography_path, delimiter=',')
# else:
# video_H = None
video_H = self.config.camera.H
prev_time = time.time()
@ -222,6 +253,9 @@ class FrameEmitter:
# perhaps multiprocessing Array?
self.frame_sock.send(pickle.dumps(frame))
# only delay consuming the next frame when using a file.
# Otherwise, go ASAP
if delay_generation:
# defer next loop
now = time.time()
time_diff = (now - prev_time)

View file

@ -88,6 +88,7 @@ class DrawnTrack:
self.inv_H = np.linalg.pinv(self.H)
pred_coords = []
if track.predictions:
if self.draw_projection == PROJECTION_IMG:
for pred_i, pred in enumerate(track.predictions):
pred_coords.append(cv2.perspectiveTransform(np.array([pred]), self.inv_H)[0].tolist())
@ -282,7 +283,7 @@ class PreviewRenderer:
# TODO: get FPS from frame_emitter
# self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720))
self.fps = 60
self.frame_size = (self.config.frame_width,self.config.frame_height)
self.frame_size = (self.config.camera.w,self.config.camera.h)
self.hide_stats = False
self.out_writer = self.start_writer() if self.config.render_file else None
self.streaming_process = self.start_streaming() if self.config.render_url else None

214
trap/process_data.py Normal file
View file

@ -0,0 +1,214 @@
from pathlib import Path
import sys
import os
import numpy as np
import pandas as pd
import dill
import tqdm
import argparse
#sys.path.append("../../")
from trajectron.environment import Environment, Scene, Node
from trajectron.utils import maybe_makedirs
from trajectron.environment import derivative_of
desired_max_time = 100
pred_indices = [2, 3]
state_dim = 6
frame_diff = 10
desired_frame_diff = 1
dt = 0.1 # dt per frame (e.g. 1/FPS)
standardization = {
'PEDESTRIAN': {
'position': {
'x': {'mean': 0, 'std': 1},
'y': {'mean': 0, 'std': 1}
},
'velocity': {
'x': {'mean': 0, 'std': 2},
'y': {'mean': 0, 'std': 2}
},
'acceleration': {
'x': {'mean': 0, 'std': 1},
'y': {'mean': 0, 'std': 1}
}
}
}
def augment_scene(scene, angle):
def rotate_pc(pc, alpha):
M = np.array([[np.cos(alpha), -np.sin(alpha)],
[np.sin(alpha), np.cos(alpha)]])
return M @ pc
data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
scene_aug = Scene(timesteps=scene.timesteps, dt=scene.dt, name=scene.name)
alpha = angle * np.pi / 180
for node in scene.nodes:
x = node.data.position.x.copy()
y = node.data.position.y.copy()
x, y = rotate_pc(np.array([x, y]), alpha)
vx = derivative_of(x, scene.dt)
vy = derivative_of(y, scene.dt)
ax = derivative_of(vx, scene.dt)
ay = derivative_of(vy, scene.dt)
data_dict = {('position', 'x'): x,
('position', 'y'): y,
('velocity', 'x'): vx,
('velocity', 'y'): vy,
('acceleration', 'x'): ax,
('acceleration', 'y'): ay}
node_data = pd.DataFrame(data_dict, columns=data_columns)
node = Node(node_type=node.type, node_id=node.id, data=node_data, first_timestep=node.first_timestep)
scene_aug.nodes.append(node)
return scene_aug
def augment(scene):
scene_aug = np.random.choice(scene.augmented)
scene_aug.temporal_scene_graph = scene.temporal_scene_graph
return scene_aug
# maybe_makedirs('trajectron-data')
# for desired_source in [ 'hof2', ]:# ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']:
def process_data(src_dir: Path, dst_dir: Path, name: str):
print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}")
nl = 0
l = 0
data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
skipped_for_error = 0
created = 0
for data_class in ['train', 'val', 'test']:
env = Environment(node_type_list=['PEDESTRIAN'], standardization=standardization)
attention_radius = dict()
attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.PEDESTRIAN)] = 2.0
env.attention_radius = attention_radius
scenes = []
split_id = f"{name}_{data_class}"
data_dict_path = dst_dir / (split_id + '.pkl')
print(data_dict_path)
subpath = src_dir / data_class
for file in subpath.glob("*.txt"):
print(file)
input_data_dict = dict()
data = pd.read_csv(file, sep='\t', index_col=False, header=None)
if data.shape[1] == 8:
data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y']
elif data.shape[1] == 9:
data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y', 'state']
else:
raise Exception("Unknown data format. Check column count")
# data['frame_id'] = pd.to_numeric(data['frame_id'], downcast='integer')
data['track_id'] = pd.to_numeric(data['track_id'], downcast='integer')
data['frame_id'] = (data['frame_id'] // frame_diff).astype(int)
data['frame_id'] -= data['frame_id'].min()
data['node_type'] = 'PEDESTRIAN'
data['node_id'] = data['track_id'].astype(str)
data.sort_values('frame_id', inplace=True)
# Mean Position
print("Means: x:", data['pos_x'].mean(), "y:", data['pos_y'].mean())
data['pos_x'] = data['pos_x'] - data['pos_x'].mean()
data['pos_y'] = data['pos_y'] - data['pos_y'].mean()
max_timesteps = data['frame_id'].max()
scene = Scene(timesteps=max_timesteps+1, dt=dt, name=split_id, aug_func=augment if data_class == 'train' else None)
for node_id in tqdm.tqdm(pd.unique(data['node_id'])):
node_df = data[data['node_id'] == node_id]
if not np.all(np.diff(node_df['frame_id']) == 1):
# print(f"Interval in {node_id} not always 1")
# print(node_df['frame_id'])
# print(np.diff(node_df['frame_id']) != 1)
# mask=np.append(False, np.diff(node_df['frame_id']) != 1)
# print(node_df[mask]['frame_id'])
skipped_for_error += 1
continue
node_values = node_df[['pos_x', 'pos_y']].values
if node_values.shape[0] < 2:
continue
new_first_idx = node_df['frame_id'].iloc[0]
x = node_values[:, 0]
y = node_values[:, 1]
vx = derivative_of(x, scene.dt)
vy = derivative_of(y, scene.dt)
ax = derivative_of(vx, scene.dt)
ay = derivative_of(vy, scene.dt)
data_dict = {('position', 'x'): x,
('position', 'y'): y,
('velocity', 'x'): vx,
('velocity', 'y'): vy,
('acceleration', 'x'): ax,
('acceleration', 'y'): ay}
node_data = pd.DataFrame(data_dict, columns=data_columns)
node = Node(node_type=env.NodeType.PEDESTRIAN, node_id=node_id, data=node_data)
node.first_timestep = new_first_idx
scene.nodes.append(node)
created+=1
# if data_class == 'train':
# scene.augmented = list()
# angles = np.arange(0, 360, 15) if data_class == 'train' else [0]
# for angle in angles:
# scene.augmented.append(augment_scene(scene, angle))
# print(scene)
scenes.append(scene)
print(f'Processed {len(scenes):.2f} scene for data class {data_class}')
env.scenes = scenes
print(env.scenes)
if len(scenes) > 0:
with open(data_dict_path, 'wb') as f:
dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL)
print(f"Linear: {l}")
print(f"Non-Linear: {nl}")
print(f"error: {skipped_for_error}, used: {created}")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--src-dir", "-s", type=Path, required=True, help="Directory with tracker output in .txt files")
parser.add_argument("--dst-dir", "-d", type=Path, required=True, help="Destination directory to store parsed .pkl files (typically 'trajectron-data')")
parser.add_argument("--name", "-n", type=str, required=True, help="Identifier to prefix the output .pkl files with (result is NAME-train.pkl, NAME-test.pkl)")
args = parser.parse_args()
process_data(**args.__dict__)

View file

@ -179,7 +179,7 @@ class Tracker:
# embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth"
)
elif self.config.detector == DETECTOR_YOLOv8:
self.model = YOLO('EXPERIMENTS/yolov8x.pt', classes=0)
self.model = YOLO('EXPERIMENTS/yolov8x.pt')
else:
raise RuntimeError(f"{self.config.detector} is not implemented yet. See --help")
@ -253,7 +253,7 @@ class Tracker:
if self.config.detector == DETECTOR_YOLOv8:
detections: [Detection] = _yolov8_track(frame, self.model)
detections: [Detection] = _yolov8_track(frame, self.model, classes=[0])
else :
detections: [Detection] = self._resnet_track(frame.img, scale = 1)