Gstreamer for rtsp

This commit is contained in:
Ruben van de Ven 2024-11-07 15:00:05 +01:00
parent 9284ce8849
commit 0af5030845
9 changed files with 335 additions and 62 deletions

22
README.md Normal file
View file

@ -0,0 +1,22 @@
# Trajectory Prediction Video installation
## Install
* Run `bash build_opencv_with_gstreamer.sh` to build opencv with gstreamer support
* Use pyenv + poetry to install
## How to
> See also the sibling repo [traptools](https://git.rubenvandeven.com/security_vision/traptools) for camera calibration and homography tools that are needed for this repo.
These are roughly the steps to go from datagathering to training
1. Make sure to have some recordings with a fixed camera.
* Recording can be done with `ffmpeg -rtsp_transport udp -i rtsp://USER:PASS@IP:554/Streaming/Channels/1.mp4 hof2-cam-$(date "+%Y%m%d-%H%M").mp4`
2. Follow the steps in the auxilary [traptools](https://git.rubenvandeven.com/security_vision/traptools) repository to obtain (1) camera matrix, lens distortion, image dimensions, and (2+3) homography
3. Run the tracker, e.g. `poetry run tracker --detector ultralytics --homography ../DATASETS/NAME/homography.json --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/`
4. Parse tracker data to Trajectron format: `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME`
5. Train Trajectron model `poetry run trajectron_train --eval_every 10 --vis_every 1 --train_data_dict NAME_train.pkl --eval_data_dict NAME_val.pkl --offline_scene_graph no --preprocess_workers 8 --log_dir EXPERIMENTS/models --log_tag _NAME --train_epochs 100 --conf EXPERIMENTS/config.json --batch_size 256 --data_dir EXPERIMENTS/trajectron-data `
6. The run!
* On a video file (you can use a wildcard) `DISPLAY=:1 poetry run trapserv --remote-log-addr 100.69.123.91 --eval_device cuda:0 --detector ultralytics --homography ../DATASETS/NAME/homography.json --video-src ../DATASETS/NAME/*.mp4 --model_dir EXPERIMENTS/models/models_DATE_NAME/--smooth-predictions --num-samples 3 --render-window --calibration ../DATASETS/NAME/calibration.json` (the DISPLAY environment variable is used here to running over SSH connection and display on local monitor)
* or on the RTSP stream. Which uses gstreamer to substantially reduce latency compared to the default ffmpeg bindings in OpenCV.

22
poetry.lock generated
View file

@ -1801,27 +1801,25 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
[[package]] [[package]]
name = "opencv-python" name = "opencv-python"
version = "4.8.1.78" version = "4.10.0.84"
description = "Wrapper package for OpenCV python bindings." description = "Wrapper package for OpenCV python bindings."
optional = false optional = false
python-versions = ">=3.6" python-versions = ">=3.6"
files = [ files = [
{file = "opencv-python-4.8.1.78.tar.gz", hash = "sha256:cc7adbbcd1112877a39274106cb2752e04984bc01a031162952e97450d6117f6"}, {file = "opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl", hash = "sha256:c1f8e6ba7fd82517ba97d352f51d161c5be51495dc7b6c6f929a8546d650f4ea"},
{file = "opencv_python-4.8.1.78-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:91d5f6f5209dc2635d496f6b8ca6573ecdad051a09e6b5de4c399b8e673c60da"},
{file = "opencv_python-4.8.1.78-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:bc31f47e05447da8b3089faa0a07ffe80e114c91ce0b171e6424f9badbd1c5cd"},
{file = "opencv_python-4.8.1.78-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9814beca408d3a0eca1bae7e3e5be68b07c17ecceb392b94170881216e09b319"},
{file = "opencv_python-4.8.1.78-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c406bdb41eb21ea51b4e90dfbc989c002786c3f601c236a99c59a54670a394"},
{file = "opencv_python-4.8.1.78-cp37-abi3-win32.whl", hash = "sha256:a7aac3900fbacf55b551e7b53626c3dad4c71ce85643645c43e91fcb19045e47"},
{file = "opencv_python-4.8.1.78-cp37-abi3-win_amd64.whl", hash = "sha256:b983197f97cfa6fcb74e1da1802c7497a6f94ed561aba6980f1f33123f904956"},
] ]
[package.dependencies] [package.dependencies]
numpy = [ numpy = [
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
{version = ">=1.23.5", markers = "python_version >= \"3.11\""},
] ]
[package.source]
type = "file"
url = "opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl"
[[package]] [[package]]
name = "orjson" name = "orjson"
version = "3.9.10" version = "3.9.10"
@ -1939,8 +1937,8 @@ files = [
[package.dependencies] [package.dependencies]
numpy = [ numpy = [
{version = ">=1.22.4,<2", markers = "python_version < \"3.11\""},
{version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""},
{version = ">=1.22.4,<2", markers = "python_version < \"3.11\""},
] ]
python-dateutil = ">=2.8.2" python-dateutil = ">=2.8.2"
pytz = ">=2020.1" pytz = ">=2020.1"
@ -3324,7 +3322,7 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,
[[package]] [[package]]
name = "trajectron-plus-plus" name = "trajectron-plus-plus"
version = "0.1.1" version = "0.1.1"
description = "Predict trajectories for anomaly detection" description = "This repository contains the code for Trajectron++: Dynamically-Feasible Trajectory Forecasting With Heterogeneous Data by Tim Salzmann*, Boris Ivanovic*, Punarjay Chakravarty, and Marco Pavone (* denotes equal contribution)."
optional = false optional = false
python-versions = "^3.9,<3.12" python-versions = "^3.9,<3.12"
files = [] files = []
@ -3542,4 +3540,4 @@ watchdog = ["watchdog (>=2.3)"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.10,<3.12," python-versions = "^3.10,<3.12,"
content-hash = "bffa0878a620996b47aa5623b951f09ab010c267880c6dcd5a53741f244e675a" content-hash = "e92dc4bbdd22d5a5ebe5910f6cef1a45c7796e632fb6cb3debfc16f7b89b4972"

View file

@ -8,6 +8,7 @@ readme = "README.md"
[tool.poetry.scripts] [tool.poetry.scripts]
trapserv = "trap.plumber:start" trapserv = "trap.plumber:start"
tracker = "trap.tools:tracker_preprocess" tracker = "trap.tools:tracker_preprocess"
process_data = "trap.process_data:main"
[tool.poetry.dependencies] [tool.poetry.dependencies]
@ -34,6 +35,7 @@ pandas-helper-calc = {git = "https://github.com/scls19fr/pandas-helper-calc"}
tsmoothie = "^1.0.5" tsmoothie = "^1.0.5"
pyglet = "^2.0.15" pyglet = "^2.0.15"
pyglet-cornerpin = "^0.2.0" pyglet-cornerpin = "^0.2.0"
opencv-python = {file="./opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl"}
[build-system] [build-system]
requires = ["poetry-core"] requires = ["poetry-core"]

View file

@ -57,7 +57,7 @@ class AnimationRenderer:
# TODO: get FPS from frame_emitter # TODO: get FPS from frame_emitter
# self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720)) # self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720))
self.fps = 60 self.fps = 60
self.frame_size = (self.config.frame_width,self.config.frame_height) self.frame_size = (self.config.camera.w,self.config.camera.h)
self.hide_stats = False self.hide_stats = False
self.out_writer = None # self.start_writer() if self.config.render_file else None self.out_writer = None # self.start_writer() if self.config.render_file else None
self.streaming_process = None # self.start_streaming() if self.config.render_url else None self.streaming_process = None # self.start_streaming() if self.config.render_url else None
@ -246,7 +246,7 @@ class AnimationRenderer:
img = pyglet.image.ImageData(self.frame_size[0], self.frame_size[1], 'RGB', img.tobytes()) img = pyglet.image.ImageData(self.frame_size[0], self.frame_size[1], 'RGB', img.tobytes())
# don't draw in batch, so that it is the background # don't draw in batch, so that it is the background
self.video_sprite = pyglet.sprite.Sprite(img=img, batch=self.batch_bg) self.video_sprite = pyglet.sprite.Sprite(img=img, batch=self.batch_bg)
self.video_sprite.opacity = 30 self.video_sprite.opacity = 100
except zmq.ZMQError as e: except zmq.ZMQError as e:
# idx = frame.index if frame else "NONE" # idx = frame.index if frame else "NONE"
# logger.debug(f"reuse video frame {idx}") # logger.debug(f"reuse video frame {idx}")

View file

@ -8,6 +8,7 @@ from trap.tracker import DETECTORS
from trap.frame_emitter import Camera from trap.frame_emitter import Camera
from pyparsing import Optional from pyparsing import Optional
from trap.frame_emitter import UrlOrPath
class LambdaParser(argparse.ArgumentParser): class LambdaParser(argparse.ArgumentParser):
"""Execute lambda functions """Execute lambda functions
@ -85,7 +86,7 @@ class CameraAction(argparse.Action):
# 'camera_matrix': np.array(data['camera_matrix']), # 'camera_matrix': np.array(data['camera_matrix']),
# 'dist_coeff': np.array(data['dist_coeff']), # 'dist_coeff': np.array(data['dist_coeff']),
# } # }
camera = Camera(np.array(data['camera_matrix']), np.array(data['dist_coeff']), namespace.frame_width, namespace.frame_height) camera = Camera(np.array(data['camera_matrix']), np.array(data['dist_coeff']), data['dim']['width'], data['dim']['height'], namespace.H)
setattr(namespace, 'camera', camera) setattr(namespace, 'camera', camera)
@ -253,10 +254,10 @@ connection_parser.add_argument('--bypass-prediction',
# Frame emitter # Frame emitter
frame_emitter_parser.add_argument("--video-src", frame_emitter_parser.add_argument("--video-src",
help="source video to track from", help="source video to track from can be either a relative or absolute path, or a url, like an RTSP resource",
type=Path, type=UrlOrPath,
nargs='+', nargs='+',
default=lambda: list(Path('../DATASETS/VIRAT_subset_0102x/').glob('*.mp4'))) default=lambda: [UrlOrPath(p) for p in Path('../DATASETS/VIRAT_subset_0102x/').glob('*.mp4')])
frame_emitter_parser.add_argument("--video-offset", frame_emitter_parser.add_argument("--video-offset",
help="Start playback from given frame. Note that when src is an array, this applies to all videos individually.", help="Start playback from given frame. Note that when src is an array, this applies to all videos individually.",
default=None, default=None,
@ -292,14 +293,15 @@ tracker_parser.add_argument("--detector",
tracker_parser.add_argument("--smooth-tracks", tracker_parser.add_argument("--smooth-tracks",
help="Smooth the tracker tracks before sending them to the predictor", help="Smooth the tracker tracks before sending them to the predictor",
action='store_true') action='store_true')
tracker_parser.add_argument("--frame-width", # now in calibration.json
help="width of the frames", # tracker_parser.add_argument("--frame-width",
type=int, # help="width of the frames",
default=1280) # type=int,
tracker_parser.add_argument("--frame-height", # default=1280)
help="height of the frames", # tracker_parser.add_argument("--frame-height",
type=int, # help="height of the frames",
default=720) # type=int,
# default=720)
# Renderer # Renderer

View file

@ -12,11 +12,30 @@ from typing import Iterable, Optional
import numpy as np import numpy as np
import cv2 import cv2
import zmq import zmq
import os
from deep_sort_realtime.deep_sort.track import Track as DeepsortTrack from deep_sort_realtime.deep_sort.track import Track as DeepsortTrack
from deep_sort_realtime.deep_sort.track import TrackState as DeepsortTrackState from deep_sort_realtime.deep_sort.track import TrackState as DeepsortTrackState
from urllib.parse import urlparse
logger = logging.getLogger('trap.frame_emitter') logger = logging.getLogger('trap.frame_emitter')
class UrlOrPath():
def __init__(self, str):
self.url = urlparse(str)
def __str__(self) -> str:
return self.url.geturl()
def is_url(self) -> bool:
return len(self.url.netloc) > 0
def path(self) -> Path:
if self.is_url():
return Path(self.url.path)
return Path(self.url.geturl()) # can include scheme, such as C:/
class DetectionState(IntFlag): class DetectionState(IntFlag):
Tentative = 1 # state before n_init (see DeepsortTrack) Tentative = 1 # state before n_init (see DeepsortTrack)
Confirmed = 2 # after tentative Confirmed = 2 # after tentative
@ -33,12 +52,13 @@ class DetectionState(IntFlag):
raise RuntimeError("Should not run into Deleted entries here") raise RuntimeError("Should not run into Deleted entries here")
class Camera: class Camera:
def __init__(self, mtx, dist, w, h): def __init__(self, mtx, dist, w, h, H):
self.mtx = mtx self.mtx = mtx
self.dist = dist self.dist = dist
self.w = w self.w = w
self.h = h self.h = h
self.newcameramtx, self.roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h)) self.newcameramtx, self.roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h))
self.H = H # homography
@dataclass @dataclass
@ -135,11 +155,11 @@ class Frame:
} for t in self.tracks.values() } for t in self.tracks.values()
} }
def video_src_from_config(config): def video_src_from_config(config) -> UrlOrPath:
if config.video_loop: if config.video_loop:
video_srcs: Iterable[Path] = cycle(config.video_src) video_srcs: Iterable[UrlOrPath] = cycle(config.video_src)
else: else:
video_srcs: Iterable[Path] = config.video_src video_srcs: Iterable[UrlOrPath] = config.video_src
return video_srcs return video_srcs
class FrameEmitter: class FrameEmitter:
@ -159,24 +179,33 @@ class FrameEmitter:
logger.info(f"Connection socket {config.zmq_frame_addr}") logger.info(f"Connection socket {config.zmq_frame_addr}")
self.video_srcs: video_src_from_config(self.config) self.video_srcs = video_src_from_config(self.config)
def emit_video(self): def emit_video(self):
i = 0 i = 0
delay_generation = False
for video_path in self.video_srcs: for video_path in self.video_srcs:
logger.info(f"Play from '{str(video_path)}'") logger.info(f"Play from '{str(video_path)}'")
if str(video_path).isdigit(): if str(video_path).isdigit():
# numeric input is a CV camera # numeric input is a CV camera
video = cv2.VideoCapture(int(str(video_path))) video = cv2.VideoCapture(int(str(video_path)))
# TODO: make config variables # TODO: make config variables
video.set(cv2.CAP_PROP_FRAME_WIDTH, int(self.config.frame_width)) video.set(cv2.CAP_PROP_FRAME_WIDTH, int(self.config.camera.w))
video.set(cv2.CAP_PROP_FRAME_HEIGHT, int(self.config.frame_height)) video.set(cv2.CAP_PROP_FRAME_HEIGHT, int(self.config.camera.h))
print("exposure!", video.get(cv2.CAP_PROP_AUTO_EXPOSURE)) print("exposure!", video.get(cv2.CAP_PROP_AUTO_EXPOSURE))
video.set(cv2.CAP_PROP_FPS, 5) video.set(cv2.CAP_PROP_FPS, 5)
fps=5
elif video_path.url.scheme == 'rtsp':
gst = f"rtspsrc location={video_path} latency=0 buffer-mode=auto ! decodebin ! videoconvert ! appsink max-buffers=1 drop=true"
logger.info(f"Capture gstreamer (gst-launch-1.0): {gst}")
video = cv2.VideoCapture(gst, cv2.CAP_GSTREAMER)
fps=12
else: else:
# os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "fflags;nobuffer|flags;low_delay|avioflags;direct|rtsp_transport;udp"
video = cv2.VideoCapture(str(video_path)) video = cv2.VideoCapture(str(video_path))
fps = video.get(cv2.CAP_PROP_FPS) delay_generation = True
fps = video.get(cv2.CAP_PROP_FPS)
target_frame_duration = 1./fps target_frame_duration = 1./fps
logger.info(f"Emit frames at {fps} fps") logger.info(f"Emit frames at {fps} fps")
@ -186,18 +215,20 @@ class FrameEmitter:
i = self.config.video_offset i = self.config.video_offset
if '-' in video_path.stem:
path_stem = video_path.stem[:video_path.stem.rfind('-')] # if '-' in video_path.path().stem:
else: # path_stem = video_path.stem[:video_path.stem.rfind('-')]
path_stem = video_path.stem # else:
path_stem += "-homography" # path_stem = video_path.stem
homography_path = video_path.with_stem(path_stem).with_suffix('.txt') # path_stem += "-homography"
logger.info(f'check homography file {homography_path}') # homography_path = video_path.with_stem(path_stem).with_suffix('.txt')
if homography_path.exists(): # logger.info(f'check homography file {homography_path}')
logger.info(f'Found custom homography file! Using {homography_path}') # if homography_path.exists():
video_H = np.loadtxt(homography_path, delimiter=',') # logger.info(f'Found custom homography file! Using {homography_path}')
else: # video_H = np.loadtxt(homography_path, delimiter=',')
video_H = None # else:
# video_H = None
video_H = self.config.camera.H
prev_time = time.time() prev_time = time.time()
@ -222,14 +253,17 @@ class FrameEmitter:
# perhaps multiprocessing Array? # perhaps multiprocessing Array?
self.frame_sock.send(pickle.dumps(frame)) self.frame_sock.send(pickle.dumps(frame))
# defer next loop # only delay consuming the next frame when using a file.
now = time.time() # Otherwise, go ASAP
time_diff = (now - prev_time) if delay_generation:
if time_diff < target_frame_duration: # defer next loop
time.sleep(target_frame_duration - time_diff) now = time.time()
now += target_frame_duration - time_diff time_diff = (now - prev_time)
if time_diff < target_frame_duration:
time.sleep(target_frame_duration - time_diff)
now += target_frame_duration - time_diff
prev_time = now prev_time = now
i += 1 i += 1

View file

@ -88,11 +88,12 @@ class DrawnTrack:
self.inv_H = np.linalg.pinv(self.H) self.inv_H = np.linalg.pinv(self.H)
pred_coords = [] pred_coords = []
if self.draw_projection == PROJECTION_IMG: if track.predictions:
for pred_i, pred in enumerate(track.predictions): if self.draw_projection == PROJECTION_IMG:
pred_coords.append(cv2.perspectiveTransform(np.array([pred]), self.inv_H)[0].tolist()) for pred_i, pred in enumerate(track.predictions):
elif self.draw_projection == PROJECTION_MAP: pred_coords.append(cv2.perspectiveTransform(np.array([pred]), self.inv_H)[0].tolist())
pred_coords = [pred for pred in track.predictions] elif self.draw_projection == PROJECTION_MAP:
pred_coords = [pred for pred in track.predictions]
self.pred_coords = pred_coords self.pred_coords = pred_coords
# color = (128,0,128) if pred_i else (128, # color = (128,0,128) if pred_i else (128,
@ -282,7 +283,7 @@ class PreviewRenderer:
# TODO: get FPS from frame_emitter # TODO: get FPS from frame_emitter
# self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720)) # self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720))
self.fps = 60 self.fps = 60
self.frame_size = (self.config.frame_width,self.config.frame_height) self.frame_size = (self.config.camera.w,self.config.camera.h)
self.hide_stats = False self.hide_stats = False
self.out_writer = self.start_writer() if self.config.render_file else None self.out_writer = self.start_writer() if self.config.render_file else None
self.streaming_process = self.start_streaming() if self.config.render_url else None self.streaming_process = self.start_streaming() if self.config.render_url else None

214
trap/process_data.py Normal file
View file

@ -0,0 +1,214 @@
from pathlib import Path
import sys
import os
import numpy as np
import pandas as pd
import dill
import tqdm
import argparse
#sys.path.append("../../")
from trajectron.environment import Environment, Scene, Node
from trajectron.utils import maybe_makedirs
from trajectron.environment import derivative_of
desired_max_time = 100
pred_indices = [2, 3]
state_dim = 6
frame_diff = 10
desired_frame_diff = 1
dt = 0.1 # dt per frame (e.g. 1/FPS)
standardization = {
'PEDESTRIAN': {
'position': {
'x': {'mean': 0, 'std': 1},
'y': {'mean': 0, 'std': 1}
},
'velocity': {
'x': {'mean': 0, 'std': 2},
'y': {'mean': 0, 'std': 2}
},
'acceleration': {
'x': {'mean': 0, 'std': 1},
'y': {'mean': 0, 'std': 1}
}
}
}
def augment_scene(scene, angle):
def rotate_pc(pc, alpha):
M = np.array([[np.cos(alpha), -np.sin(alpha)],
[np.sin(alpha), np.cos(alpha)]])
return M @ pc
data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
scene_aug = Scene(timesteps=scene.timesteps, dt=scene.dt, name=scene.name)
alpha = angle * np.pi / 180
for node in scene.nodes:
x = node.data.position.x.copy()
y = node.data.position.y.copy()
x, y = rotate_pc(np.array([x, y]), alpha)
vx = derivative_of(x, scene.dt)
vy = derivative_of(y, scene.dt)
ax = derivative_of(vx, scene.dt)
ay = derivative_of(vy, scene.dt)
data_dict = {('position', 'x'): x,
('position', 'y'): y,
('velocity', 'x'): vx,
('velocity', 'y'): vy,
('acceleration', 'x'): ax,
('acceleration', 'y'): ay}
node_data = pd.DataFrame(data_dict, columns=data_columns)
node = Node(node_type=node.type, node_id=node.id, data=node_data, first_timestep=node.first_timestep)
scene_aug.nodes.append(node)
return scene_aug
def augment(scene):
scene_aug = np.random.choice(scene.augmented)
scene_aug.temporal_scene_graph = scene.temporal_scene_graph
return scene_aug
# maybe_makedirs('trajectron-data')
# for desired_source in [ 'hof2', ]:# ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']:
def process_data(src_dir: Path, dst_dir: Path, name: str):
print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}")
nl = 0
l = 0
data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
skipped_for_error = 0
created = 0
for data_class in ['train', 'val', 'test']:
env = Environment(node_type_list=['PEDESTRIAN'], standardization=standardization)
attention_radius = dict()
attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.PEDESTRIAN)] = 2.0
env.attention_radius = attention_radius
scenes = []
split_id = f"{name}_{data_class}"
data_dict_path = dst_dir / (split_id + '.pkl')
print(data_dict_path)
subpath = src_dir / data_class
for file in subpath.glob("*.txt"):
print(file)
input_data_dict = dict()
data = pd.read_csv(file, sep='\t', index_col=False, header=None)
if data.shape[1] == 8:
data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y']
elif data.shape[1] == 9:
data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y', 'state']
else:
raise Exception("Unknown data format. Check column count")
# data['frame_id'] = pd.to_numeric(data['frame_id'], downcast='integer')
data['track_id'] = pd.to_numeric(data['track_id'], downcast='integer')
data['frame_id'] = (data['frame_id'] // frame_diff).astype(int)
data['frame_id'] -= data['frame_id'].min()
data['node_type'] = 'PEDESTRIAN'
data['node_id'] = data['track_id'].astype(str)
data.sort_values('frame_id', inplace=True)
# Mean Position
print("Means: x:", data['pos_x'].mean(), "y:", data['pos_y'].mean())
data['pos_x'] = data['pos_x'] - data['pos_x'].mean()
data['pos_y'] = data['pos_y'] - data['pos_y'].mean()
max_timesteps = data['frame_id'].max()
scene = Scene(timesteps=max_timesteps+1, dt=dt, name=split_id, aug_func=augment if data_class == 'train' else None)
for node_id in tqdm.tqdm(pd.unique(data['node_id'])):
node_df = data[data['node_id'] == node_id]
if not np.all(np.diff(node_df['frame_id']) == 1):
# print(f"Interval in {node_id} not always 1")
# print(node_df['frame_id'])
# print(np.diff(node_df['frame_id']) != 1)
# mask=np.append(False, np.diff(node_df['frame_id']) != 1)
# print(node_df[mask]['frame_id'])
skipped_for_error += 1
continue
node_values = node_df[['pos_x', 'pos_y']].values
if node_values.shape[0] < 2:
continue
new_first_idx = node_df['frame_id'].iloc[0]
x = node_values[:, 0]
y = node_values[:, 1]
vx = derivative_of(x, scene.dt)
vy = derivative_of(y, scene.dt)
ax = derivative_of(vx, scene.dt)
ay = derivative_of(vy, scene.dt)
data_dict = {('position', 'x'): x,
('position', 'y'): y,
('velocity', 'x'): vx,
('velocity', 'y'): vy,
('acceleration', 'x'): ax,
('acceleration', 'y'): ay}
node_data = pd.DataFrame(data_dict, columns=data_columns)
node = Node(node_type=env.NodeType.PEDESTRIAN, node_id=node_id, data=node_data)
node.first_timestep = new_first_idx
scene.nodes.append(node)
created+=1
# if data_class == 'train':
# scene.augmented = list()
# angles = np.arange(0, 360, 15) if data_class == 'train' else [0]
# for angle in angles:
# scene.augmented.append(augment_scene(scene, angle))
# print(scene)
scenes.append(scene)
print(f'Processed {len(scenes):.2f} scene for data class {data_class}')
env.scenes = scenes
print(env.scenes)
if len(scenes) > 0:
with open(data_dict_path, 'wb') as f:
dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL)
print(f"Linear: {l}")
print(f"Non-Linear: {nl}")
print(f"error: {skipped_for_error}, used: {created}")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--src-dir", "-s", type=Path, required=True, help="Directory with tracker output in .txt files")
parser.add_argument("--dst-dir", "-d", type=Path, required=True, help="Destination directory to store parsed .pkl files (typically 'trajectron-data')")
parser.add_argument("--name", "-n", type=str, required=True, help="Identifier to prefix the output .pkl files with (result is NAME-train.pkl, NAME-test.pkl)")
args = parser.parse_args()
process_data(**args.__dict__)

View file

@ -179,7 +179,7 @@ class Tracker:
# embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth" # embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth"
) )
elif self.config.detector == DETECTOR_YOLOv8: elif self.config.detector == DETECTOR_YOLOv8:
self.model = YOLO('EXPERIMENTS/yolov8x.pt', classes=0) self.model = YOLO('EXPERIMENTS/yolov8x.pt')
else: else:
raise RuntimeError(f"{self.config.detector} is not implemented yet. See --help") raise RuntimeError(f"{self.config.detector} is not implemented yet. See --help")
@ -253,7 +253,7 @@ class Tracker:
if self.config.detector == DETECTOR_YOLOv8: if self.config.detector == DETECTOR_YOLOv8:
detections: [Detection] = _yolov8_track(frame, self.model) detections: [Detection] = _yolov8_track(frame, self.model, classes=[0])
else : else :
detections: [Detection] = self._resnet_track(frame.img, scale = 1) detections: [Detection] = self._resnet_track(frame.img, scale = 1)