From 0af50308455df4228b51e1f900680161b6482566 Mon Sep 17 00:00:00 2001 From: Ruben van de Ven Date: Thu, 7 Nov 2024 15:00:05 +0100 Subject: [PATCH] Gstreamer for rtsp --- README.md | 22 ++++ poetry.lock | 22 ++-- pyproject.toml | 2 + trap/animation_renderer.py | 4 +- trap/config.py | 26 ++--- trap/frame_emitter.py | 90 +++++++++++----- trap/preview_renderer.py | 13 +-- trap/process_data.py | 214 +++++++++++++++++++++++++++++++++++++ trap/tracker.py | 4 +- 9 files changed, 335 insertions(+), 62 deletions(-) create mode 100644 README.md create mode 100644 trap/process_data.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..3499b35 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# Trajectory Prediction Video installation + +## Install + +* Run `bash build_opencv_with_gstreamer.sh` to build opencv with gstreamer support +* Use pyenv + poetry to install + +## How to + +> See also the sibling repo [traptools](https://git.rubenvandeven.com/security_vision/traptools) for camera calibration and homography tools that are needed for this repo. + +These are roughly the steps to go from datagathering to training + +1. Make sure to have some recordings with a fixed camera. + * Recording can be done with `ffmpeg -rtsp_transport udp -i rtsp://USER:PASS@IP:554/Streaming/Channels/1.mp4 hof2-cam-$(date "+%Y%m%d-%H%M").mp4` +2. Follow the steps in the auxilary [traptools](https://git.rubenvandeven.com/security_vision/traptools) repository to obtain (1) camera matrix, lens distortion, image dimensions, and (2+3) homography +3. Run the tracker, e.g. `poetry run tracker --detector ultralytics --homography ../DATASETS/NAME/homography.json --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/` +4. Parse tracker data to Trajectron format: `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME` +5. Train Trajectron model `poetry run trajectron_train --eval_every 10 --vis_every 1 --train_data_dict NAME_train.pkl --eval_data_dict NAME_val.pkl --offline_scene_graph no --preprocess_workers 8 --log_dir EXPERIMENTS/models --log_tag _NAME --train_epochs 100 --conf EXPERIMENTS/config.json --batch_size 256 --data_dir EXPERIMENTS/trajectron-data ` +6. The run! + * On a video file (you can use a wildcard) `DISPLAY=:1 poetry run trapserv --remote-log-addr 100.69.123.91 --eval_device cuda:0 --detector ultralytics --homography ../DATASETS/NAME/homography.json --video-src ../DATASETS/NAME/*.mp4 --model_dir EXPERIMENTS/models/models_DATE_NAME/--smooth-predictions --num-samples 3 --render-window --calibration ../DATASETS/NAME/calibration.json` (the DISPLAY environment variable is used here to running over SSH connection and display on local monitor) + * or on the RTSP stream. Which uses gstreamer to substantially reduce latency compared to the default ffmpeg bindings in OpenCV. \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index caec189..ffc7e9b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1801,27 +1801,25 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] [[package]] name = "opencv-python" -version = "4.8.1.78" +version = "4.10.0.84" description = "Wrapper package for OpenCV python bindings." optional = false python-versions = ">=3.6" files = [ - {file = "opencv-python-4.8.1.78.tar.gz", hash = "sha256:cc7adbbcd1112877a39274106cb2752e04984bc01a031162952e97450d6117f6"}, - {file = "opencv_python-4.8.1.78-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:91d5f6f5209dc2635d496f6b8ca6573ecdad051a09e6b5de4c399b8e673c60da"}, - {file = "opencv_python-4.8.1.78-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:bc31f47e05447da8b3089faa0a07ffe80e114c91ce0b171e6424f9badbd1c5cd"}, - {file = "opencv_python-4.8.1.78-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9814beca408d3a0eca1bae7e3e5be68b07c17ecceb392b94170881216e09b319"}, - {file = "opencv_python-4.8.1.78-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c406bdb41eb21ea51b4e90dfbc989c002786c3f601c236a99c59a54670a394"}, - {file = "opencv_python-4.8.1.78-cp37-abi3-win32.whl", hash = "sha256:a7aac3900fbacf55b551e7b53626c3dad4c71ce85643645c43e91fcb19045e47"}, - {file = "opencv_python-4.8.1.78-cp37-abi3-win_amd64.whl", hash = "sha256:b983197f97cfa6fcb74e1da1802c7497a6f94ed561aba6980f1f33123f904956"}, + {file = "opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl", hash = "sha256:c1f8e6ba7fd82517ba97d352f51d161c5be51495dc7b6c6f929a8546d650f4ea"}, ] [package.dependencies] numpy = [ + {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, ] +[package.source] +type = "file" +url = "opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl" + [[package]] name = "orjson" version = "3.9.10" @@ -1939,8 +1937,8 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, + {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3324,7 +3322,7 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0, [[package]] name = "trajectron-plus-plus" version = "0.1.1" -description = "Predict trajectories for anomaly detection" +description = "This repository contains the code for Trajectron++: Dynamically-Feasible Trajectory Forecasting With Heterogeneous Data by Tim Salzmann*, Boris Ivanovic*, Punarjay Chakravarty, and Marco Pavone (* denotes equal contribution)." optional = false python-versions = "^3.9,<3.12" files = [] @@ -3542,4 +3540,4 @@ watchdog = ["watchdog (>=2.3)"] [metadata] lock-version = "2.0" python-versions = "^3.10,<3.12," -content-hash = "bffa0878a620996b47aa5623b951f09ab010c267880c6dcd5a53741f244e675a" +content-hash = "e92dc4bbdd22d5a5ebe5910f6cef1a45c7796e632fb6cb3debfc16f7b89b4972" diff --git a/pyproject.toml b/pyproject.toml index b82fdd9..9370cde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ readme = "README.md" [tool.poetry.scripts] trapserv = "trap.plumber:start" tracker = "trap.tools:tracker_preprocess" +process_data = "trap.process_data:main" [tool.poetry.dependencies] @@ -34,6 +35,7 @@ pandas-helper-calc = {git = "https://github.com/scls19fr/pandas-helper-calc"} tsmoothie = "^1.0.5" pyglet = "^2.0.15" pyglet-cornerpin = "^0.2.0" +opencv-python = {file="./opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl"} [build-system] requires = ["poetry-core"] diff --git a/trap/animation_renderer.py b/trap/animation_renderer.py index 07f1af5..6f34dd0 100644 --- a/trap/animation_renderer.py +++ b/trap/animation_renderer.py @@ -57,7 +57,7 @@ class AnimationRenderer: # TODO: get FPS from frame_emitter # self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720)) self.fps = 60 - self.frame_size = (self.config.frame_width,self.config.frame_height) + self.frame_size = (self.config.camera.w,self.config.camera.h) self.hide_stats = False self.out_writer = None # self.start_writer() if self.config.render_file else None self.streaming_process = None # self.start_streaming() if self.config.render_url else None @@ -246,7 +246,7 @@ class AnimationRenderer: img = pyglet.image.ImageData(self.frame_size[0], self.frame_size[1], 'RGB', img.tobytes()) # don't draw in batch, so that it is the background self.video_sprite = pyglet.sprite.Sprite(img=img, batch=self.batch_bg) - self.video_sprite.opacity = 30 + self.video_sprite.opacity = 100 except zmq.ZMQError as e: # idx = frame.index if frame else "NONE" # logger.debug(f"reuse video frame {idx}") diff --git a/trap/config.py b/trap/config.py index bb6f749..d551f0e 100644 --- a/trap/config.py +++ b/trap/config.py @@ -8,6 +8,7 @@ from trap.tracker import DETECTORS from trap.frame_emitter import Camera from pyparsing import Optional +from trap.frame_emitter import UrlOrPath class LambdaParser(argparse.ArgumentParser): """Execute lambda functions @@ -85,7 +86,7 @@ class CameraAction(argparse.Action): # 'camera_matrix': np.array(data['camera_matrix']), # 'dist_coeff': np.array(data['dist_coeff']), # } - camera = Camera(np.array(data['camera_matrix']), np.array(data['dist_coeff']), namespace.frame_width, namespace.frame_height) + camera = Camera(np.array(data['camera_matrix']), np.array(data['dist_coeff']), data['dim']['width'], data['dim']['height'], namespace.H) setattr(namespace, 'camera', camera) @@ -253,10 +254,10 @@ connection_parser.add_argument('--bypass-prediction', # Frame emitter frame_emitter_parser.add_argument("--video-src", - help="source video to track from", - type=Path, + help="source video to track from can be either a relative or absolute path, or a url, like an RTSP resource", + type=UrlOrPath, nargs='+', - default=lambda: list(Path('../DATASETS/VIRAT_subset_0102x/').glob('*.mp4'))) + default=lambda: [UrlOrPath(p) for p in Path('../DATASETS/VIRAT_subset_0102x/').glob('*.mp4')]) frame_emitter_parser.add_argument("--video-offset", help="Start playback from given frame. Note that when src is an array, this applies to all videos individually.", default=None, @@ -292,14 +293,15 @@ tracker_parser.add_argument("--detector", tracker_parser.add_argument("--smooth-tracks", help="Smooth the tracker tracks before sending them to the predictor", action='store_true') -tracker_parser.add_argument("--frame-width", - help="width of the frames", - type=int, - default=1280) -tracker_parser.add_argument("--frame-height", - help="height of the frames", - type=int, - default=720) +# now in calibration.json +# tracker_parser.add_argument("--frame-width", +# help="width of the frames", +# type=int, +# default=1280) +# tracker_parser.add_argument("--frame-height", +# help="height of the frames", +# type=int, +# default=720) # Renderer diff --git a/trap/frame_emitter.py b/trap/frame_emitter.py index 6d35d32..b3a5c81 100644 --- a/trap/frame_emitter.py +++ b/trap/frame_emitter.py @@ -12,11 +12,30 @@ from typing import Iterable, Optional import numpy as np import cv2 import zmq +import os from deep_sort_realtime.deep_sort.track import Track as DeepsortTrack from deep_sort_realtime.deep_sort.track import TrackState as DeepsortTrackState +from urllib.parse import urlparse + logger = logging.getLogger('trap.frame_emitter') + +class UrlOrPath(): + def __init__(self, str): + self.url = urlparse(str) + + def __str__(self) -> str: + return self.url.geturl() + + def is_url(self) -> bool: + return len(self.url.netloc) > 0 + + def path(self) -> Path: + if self.is_url(): + return Path(self.url.path) + return Path(self.url.geturl()) # can include scheme, such as C:/ + class DetectionState(IntFlag): Tentative = 1 # state before n_init (see DeepsortTrack) Confirmed = 2 # after tentative @@ -33,12 +52,13 @@ class DetectionState(IntFlag): raise RuntimeError("Should not run into Deleted entries here") class Camera: - def __init__(self, mtx, dist, w, h): + def __init__(self, mtx, dist, w, h, H): self.mtx = mtx self.dist = dist self.w = w self.h = h self.newcameramtx, self.roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h)) + self.H = H # homography @dataclass @@ -135,11 +155,11 @@ class Frame: } for t in self.tracks.values() } -def video_src_from_config(config): +def video_src_from_config(config) -> UrlOrPath: if config.video_loop: - video_srcs: Iterable[Path] = cycle(config.video_src) + video_srcs: Iterable[UrlOrPath] = cycle(config.video_src) else: - video_srcs: Iterable[Path] = config.video_src + video_srcs: Iterable[UrlOrPath] = config.video_src return video_srcs class FrameEmitter: @@ -159,24 +179,33 @@ class FrameEmitter: logger.info(f"Connection socket {config.zmq_frame_addr}") - self.video_srcs: video_src_from_config(self.config) + self.video_srcs = video_src_from_config(self.config) def emit_video(self): i = 0 + delay_generation = False for video_path in self.video_srcs: logger.info(f"Play from '{str(video_path)}'") if str(video_path).isdigit(): # numeric input is a CV camera video = cv2.VideoCapture(int(str(video_path))) # TODO: make config variables - video.set(cv2.CAP_PROP_FRAME_WIDTH, int(self.config.frame_width)) - video.set(cv2.CAP_PROP_FRAME_HEIGHT, int(self.config.frame_height)) + video.set(cv2.CAP_PROP_FRAME_WIDTH, int(self.config.camera.w)) + video.set(cv2.CAP_PROP_FRAME_HEIGHT, int(self.config.camera.h)) print("exposure!", video.get(cv2.CAP_PROP_AUTO_EXPOSURE)) video.set(cv2.CAP_PROP_FPS, 5) + fps=5 + elif video_path.url.scheme == 'rtsp': + gst = f"rtspsrc location={video_path} latency=0 buffer-mode=auto ! decodebin ! videoconvert ! appsink max-buffers=1 drop=true" + logger.info(f"Capture gstreamer (gst-launch-1.0): {gst}") + video = cv2.VideoCapture(gst, cv2.CAP_GSTREAMER) + fps=12 else: + # os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "fflags;nobuffer|flags;low_delay|avioflags;direct|rtsp_transport;udp" video = cv2.VideoCapture(str(video_path)) - fps = video.get(cv2.CAP_PROP_FPS) + delay_generation = True + fps = video.get(cv2.CAP_PROP_FPS) target_frame_duration = 1./fps logger.info(f"Emit frames at {fps} fps") @@ -186,18 +215,20 @@ class FrameEmitter: i = self.config.video_offset - if '-' in video_path.stem: - path_stem = video_path.stem[:video_path.stem.rfind('-')] - else: - path_stem = video_path.stem - path_stem += "-homography" - homography_path = video_path.with_stem(path_stem).with_suffix('.txt') - logger.info(f'check homography file {homography_path}') - if homography_path.exists(): - logger.info(f'Found custom homography file! Using {homography_path}') - video_H = np.loadtxt(homography_path, delimiter=',') - else: - video_H = None + + # if '-' in video_path.path().stem: + # path_stem = video_path.stem[:video_path.stem.rfind('-')] + # else: + # path_stem = video_path.stem + # path_stem += "-homography" + # homography_path = video_path.with_stem(path_stem).with_suffix('.txt') + # logger.info(f'check homography file {homography_path}') + # if homography_path.exists(): + # logger.info(f'Found custom homography file! Using {homography_path}') + # video_H = np.loadtxt(homography_path, delimiter=',') + # else: + # video_H = None + video_H = self.config.camera.H prev_time = time.time() @@ -222,14 +253,17 @@ class FrameEmitter: # perhaps multiprocessing Array? self.frame_sock.send(pickle.dumps(frame)) - # defer next loop - now = time.time() - time_diff = (now - prev_time) - if time_diff < target_frame_duration: - time.sleep(target_frame_duration - time_diff) - now += target_frame_duration - time_diff - - prev_time = now + # only delay consuming the next frame when using a file. + # Otherwise, go ASAP + if delay_generation: + # defer next loop + now = time.time() + time_diff = (now - prev_time) + if time_diff < target_frame_duration: + time.sleep(target_frame_duration - time_diff) + now += target_frame_duration - time_diff + + prev_time = now i += 1 diff --git a/trap/preview_renderer.py b/trap/preview_renderer.py index 2d3eb12..a5b31cf 100644 --- a/trap/preview_renderer.py +++ b/trap/preview_renderer.py @@ -88,11 +88,12 @@ class DrawnTrack: self.inv_H = np.linalg.pinv(self.H) pred_coords = [] - if self.draw_projection == PROJECTION_IMG: - for pred_i, pred in enumerate(track.predictions): - pred_coords.append(cv2.perspectiveTransform(np.array([pred]), self.inv_H)[0].tolist()) - elif self.draw_projection == PROJECTION_MAP: - pred_coords = [pred for pred in track.predictions] + if track.predictions: + if self.draw_projection == PROJECTION_IMG: + for pred_i, pred in enumerate(track.predictions): + pred_coords.append(cv2.perspectiveTransform(np.array([pred]), self.inv_H)[0].tolist()) + elif self.draw_projection == PROJECTION_MAP: + pred_coords = [pred for pred in track.predictions] self.pred_coords = pred_coords # color = (128,0,128) if pred_i else (128, @@ -282,7 +283,7 @@ class PreviewRenderer: # TODO: get FPS from frame_emitter # self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720)) self.fps = 60 - self.frame_size = (self.config.frame_width,self.config.frame_height) + self.frame_size = (self.config.camera.w,self.config.camera.h) self.hide_stats = False self.out_writer = self.start_writer() if self.config.render_file else None self.streaming_process = self.start_streaming() if self.config.render_url else None diff --git a/trap/process_data.py b/trap/process_data.py new file mode 100644 index 0000000..28ce966 --- /dev/null +++ b/trap/process_data.py @@ -0,0 +1,214 @@ +from pathlib import Path +import sys +import os +import numpy as np +import pandas as pd +import dill +import tqdm +import argparse + +#sys.path.append("../../") +from trajectron.environment import Environment, Scene, Node +from trajectron.utils import maybe_makedirs +from trajectron.environment import derivative_of + +desired_max_time = 100 +pred_indices = [2, 3] +state_dim = 6 +frame_diff = 10 +desired_frame_diff = 1 +dt = 0.1 # dt per frame (e.g. 1/FPS) + +standardization = { + 'PEDESTRIAN': { + 'position': { + 'x': {'mean': 0, 'std': 1}, + 'y': {'mean': 0, 'std': 1} + }, + 'velocity': { + 'x': {'mean': 0, 'std': 2}, + 'y': {'mean': 0, 'std': 2} + }, + 'acceleration': { + 'x': {'mean': 0, 'std': 1}, + 'y': {'mean': 0, 'std': 1} + } + } +} + + +def augment_scene(scene, angle): + def rotate_pc(pc, alpha): + M = np.array([[np.cos(alpha), -np.sin(alpha)], + [np.sin(alpha), np.cos(alpha)]]) + return M @ pc + + data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']]) + + scene_aug = Scene(timesteps=scene.timesteps, dt=scene.dt, name=scene.name) + + alpha = angle * np.pi / 180 + + for node in scene.nodes: + x = node.data.position.x.copy() + y = node.data.position.y.copy() + + x, y = rotate_pc(np.array([x, y]), alpha) + + vx = derivative_of(x, scene.dt) + vy = derivative_of(y, scene.dt) + ax = derivative_of(vx, scene.dt) + ay = derivative_of(vy, scene.dt) + + data_dict = {('position', 'x'): x, + ('position', 'y'): y, + ('velocity', 'x'): vx, + ('velocity', 'y'): vy, + ('acceleration', 'x'): ax, + ('acceleration', 'y'): ay} + + node_data = pd.DataFrame(data_dict, columns=data_columns) + + node = Node(node_type=node.type, node_id=node.id, data=node_data, first_timestep=node.first_timestep) + + scene_aug.nodes.append(node) + return scene_aug + + +def augment(scene): + scene_aug = np.random.choice(scene.augmented) + scene_aug.temporal_scene_graph = scene.temporal_scene_graph + return scene_aug + + +# maybe_makedirs('trajectron-data') +# for desired_source in [ 'hof2', ]:# ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']: + +def process_data(src_dir: Path, dst_dir: Path, name: str): + print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}") + + nl = 0 + l = 0 + data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']]) + skipped_for_error = 0 + created = 0 + + for data_class in ['train', 'val', 'test']: + env = Environment(node_type_list=['PEDESTRIAN'], standardization=standardization) + attention_radius = dict() + attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.PEDESTRIAN)] = 2.0 + env.attention_radius = attention_radius + + scenes = [] + split_id = f"{name}_{data_class}" + data_dict_path = dst_dir / (split_id + '.pkl') + + print(data_dict_path) + + + subpath = src_dir / data_class + for file in subpath.glob("*.txt"): + print(file) + input_data_dict = dict() + + data = pd.read_csv(file, sep='\t', index_col=False, header=None) + + if data.shape[1] == 8: + data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y'] + elif data.shape[1] == 9: + data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y', 'state'] + else: + raise Exception("Unknown data format. Check column count") + # data['frame_id'] = pd.to_numeric(data['frame_id'], downcast='integer') + data['track_id'] = pd.to_numeric(data['track_id'], downcast='integer') + + + data['frame_id'] = (data['frame_id'] // frame_diff).astype(int) + + + data['frame_id'] -= data['frame_id'].min() + + data['node_type'] = 'PEDESTRIAN' + data['node_id'] = data['track_id'].astype(str) + data.sort_values('frame_id', inplace=True) + + # Mean Position + + print("Means: x:", data['pos_x'].mean(), "y:", data['pos_y'].mean()) + + data['pos_x'] = data['pos_x'] - data['pos_x'].mean() + data['pos_y'] = data['pos_y'] - data['pos_y'].mean() + + max_timesteps = data['frame_id'].max() + + scene = Scene(timesteps=max_timesteps+1, dt=dt, name=split_id, aug_func=augment if data_class == 'train' else None) + + for node_id in tqdm.tqdm(pd.unique(data['node_id'])): + node_df = data[data['node_id'] == node_id] + if not np.all(np.diff(node_df['frame_id']) == 1): + # print(f"Interval in {node_id} not always 1") + # print(node_df['frame_id']) + # print(np.diff(node_df['frame_id']) != 1) + # mask=np.append(False, np.diff(node_df['frame_id']) != 1) + # print(node_df[mask]['frame_id']) + skipped_for_error += 1 + continue + + + node_values = node_df[['pos_x', 'pos_y']].values + + if node_values.shape[0] < 2: + continue + + new_first_idx = node_df['frame_id'].iloc[0] + + x = node_values[:, 0] + y = node_values[:, 1] + vx = derivative_of(x, scene.dt) + vy = derivative_of(y, scene.dt) + ax = derivative_of(vx, scene.dt) + ay = derivative_of(vy, scene.dt) + + data_dict = {('position', 'x'): x, + ('position', 'y'): y, + ('velocity', 'x'): vx, + ('velocity', 'y'): vy, + ('acceleration', 'x'): ax, + ('acceleration', 'y'): ay} + + node_data = pd.DataFrame(data_dict, columns=data_columns) + node = Node(node_type=env.NodeType.PEDESTRIAN, node_id=node_id, data=node_data) + node.first_timestep = new_first_idx + + scene.nodes.append(node) + created+=1 + # if data_class == 'train': + # scene.augmented = list() + # angles = np.arange(0, 360, 15) if data_class == 'train' else [0] + # for angle in angles: + # scene.augmented.append(augment_scene(scene, angle)) + + # print(scene) + scenes.append(scene) + print(f'Processed {len(scenes):.2f} scene for data class {data_class}') + + env.scenes = scenes + + print(env.scenes) + + if len(scenes) > 0: + with open(data_dict_path, 'wb') as f: + dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL) + + print(f"Linear: {l}") + print(f"Non-Linear: {nl}") + print(f"error: {skipped_for_error}, used: {created}") + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--src-dir", "-s", type=Path, required=True, help="Directory with tracker output in .txt files") + parser.add_argument("--dst-dir", "-d", type=Path, required=True, help="Destination directory to store parsed .pkl files (typically 'trajectron-data')") + parser.add_argument("--name", "-n", type=str, required=True, help="Identifier to prefix the output .pkl files with (result is NAME-train.pkl, NAME-test.pkl)") + + args = parser.parse_args() + process_data(**args.__dict__) \ No newline at end of file diff --git a/trap/tracker.py b/trap/tracker.py index 6e15471..300d4e1 100644 --- a/trap/tracker.py +++ b/trap/tracker.py @@ -179,7 +179,7 @@ class Tracker: # embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth" ) elif self.config.detector == DETECTOR_YOLOv8: - self.model = YOLO('EXPERIMENTS/yolov8x.pt', classes=0) + self.model = YOLO('EXPERIMENTS/yolov8x.pt') else: raise RuntimeError(f"{self.config.detector} is not implemented yet. See --help") @@ -253,7 +253,7 @@ class Tracker: if self.config.detector == DETECTOR_YOLOv8: - detections: [Detection] = _yolov8_track(frame, self.model) + detections: [Detection] = _yolov8_track(frame, self.model, classes=[0]) else : detections: [Detection] = self._resnet_track(frame.img, scale = 1)