Gstreamer for rtsp

2024-11-07 15:00:05 +01:00 · 2024-11-07 15:00:05 +01:00 · 0af5030845
commit 0af5030845
parent 9284ce8849
9 changed files with 335 additions and 62 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,22 @@
 # Trajectory Prediction Video installation
 ## Install
 * Run `bash build_opencv_with_gstreamer.sh` to build opencv with gstreamer support
 * Use pyenv + poetry to install
 ## How to
 > See also the sibling repo [traptools](https://git.rubenvandeven.com/security_vision/traptools) for camera calibration and homography tools that are needed for this repo.
 These are roughly the steps to go from datagathering to training
 1. Make sure to have some recordings with a fixed camera.
    * Recording can be done with `ffmpeg  -rtsp_transport udp -i rtsp://USER:PASS@IP:554/Streaming/Channels/1.mp4   hof2-cam-$(date "+%Y%m%d-%H%M").mp4`
 2. Follow the steps in the auxilary  [traptools](https://git.rubenvandeven.com/security_vision/traptools) repository to obtain (1) camera matrix, lens distortion, image dimensions, and (2+3) homography
 3. Run the tracker, e.g. `poetry run tracker --detector ultralytics  --homography ../DATASETS/NAME/homography.json   --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json  --save-for-training EXPERIMENTS/raw/NAME/`
 4. Parse tracker data to Trajectron format: `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME`
 5. Train Trajectron model `poetry run trajectron_train --eval_every 10 --vis_every 1 --train_data_dict NAME_train.pkl --eval_data_dict NAME_val.pkl --offline_scene_graph no --preprocess_workers 8 --log_dir EXPERIMENTS/models --log_tag _NAME --train_epochs 100 --conf EXPERIMENTS/config.json --batch_size 256 --data_dir EXPERIMENTS/trajectron-data `
 6. The run!
    * On a video file (you can use a wildcard) `DISPLAY=:1 poetry run trapserv --remote-log-addr 100.69.123.91 --eval_device cuda:0 --detector ultralytics  --homography ../DATASETS/NAME/homography.json   --video-src ../DATASETS/NAME/*.mp4 --model_dir EXPERIMENTS/models/models_DATE_NAME/--smooth-predictions  --num-samples 3  --render-window --calibration ../DATASETS/NAME/calibration.json` (the DISPLAY environment variable is used here to running over SSH connection and display on local monitor)
    * or on the RTSP stream. Which uses gstreamer to substantially reduce latency compared to the default ffmpeg bindings in OpenCV.
--- a/poetry.lock
+++ b/poetry.lock
@ -1801,27 +1801,25 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]
 [[package]]
 name = "opencv-python"
-version = "4.8.1.78"
+version = "4.10.0.84"
 description = "Wrapper package for OpenCV python bindings."
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "opencv-python-4.8.1.78.tar.gz", hash = "sha256:cc7adbbcd1112877a39274106cb2752e04984bc01a031162952e97450d6117f6"},
+    {file = "opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl", hash = "sha256:c1f8e6ba7fd82517ba97d352f51d161c5be51495dc7b6c6f929a8546d650f4ea"},
    {file = "opencv_python-4.8.1.78-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:91d5f6f5209dc2635d496f6b8ca6573ecdad051a09e6b5de4c399b8e673c60da"},
    {file = "opencv_python-4.8.1.78-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:bc31f47e05447da8b3089faa0a07ffe80e114c91ce0b171e6424f9badbd1c5cd"},
    {file = "opencv_python-4.8.1.78-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9814beca408d3a0eca1bae7e3e5be68b07c17ecceb392b94170881216e09b319"},
    {file = "opencv_python-4.8.1.78-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c406bdb41eb21ea51b4e90dfbc989c002786c3f601c236a99c59a54670a394"},
    {file = "opencv_python-4.8.1.78-cp37-abi3-win32.whl", hash = "sha256:a7aac3900fbacf55b551e7b53626c3dad4c71ce85643645c43e91fcb19045e47"},
    {file = "opencv_python-4.8.1.78-cp37-abi3-win_amd64.whl", hash = "sha256:b983197f97cfa6fcb74e1da1802c7497a6f94ed561aba6980f1f33123f904956"},
 ]
 [package.dependencies]
 numpy = [
    {version = ">=1.23.5", markers = "python_version >= \"3.11\""},
    {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
    {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
    {version = ">=1.23.5", markers = "python_version >= \"3.11\""},
 ]
 [package.source]
 type = "file"
 url = "opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl"
 [[package]]
 name = "orjson"
 version = "3.9.10"
@ -1939,8 +1937,8 @@ files = [
 [package.dependencies]
 numpy = [
    {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""},
    {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""},
    {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@ -3324,7 +3322,7 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,
 [[package]]
 name = "trajectron-plus-plus"
 version = "0.1.1"
-description = "Predict trajectories for anomaly detection"
+description = "This repository contains the code for Trajectron++: Dynamically-Feasible Trajectory Forecasting With Heterogeneous Data by Tim Salzmann*, Boris Ivanovic*, Punarjay Chakravarty, and Marco Pavone (* denotes equal contribution)."
 optional = false
 python-versions = "^3.9,<3.12"
 files = []
@ -3542,4 +3540,4 @@ watchdog = ["watchdog (>=2.3)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10,<3.12,"
-content-hash = "bffa0878a620996b47aa5623b951f09ab010c267880c6dcd5a53741f244e675a"
+content-hash = "e92dc4bbdd22d5a5ebe5910f6cef1a45c7796e632fb6cb3debfc16f7b89b4972"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -8,6 +8,7 @@ readme = "README.md"
 [tool.poetry.scripts]
 trapserv = "trap.plumber:start"
 tracker = "trap.tools:tracker_preprocess"
 process_data = "trap.process_data:main"
 [tool.poetry.dependencies]
@ -34,6 +35,7 @@ pandas-helper-calc = {git = "https://github.com/scls19fr/pandas-helper-calc"}
 tsmoothie = "^1.0.5"
 pyglet = "^2.0.15"
 pyglet-cornerpin = "^0.2.0"
 opencv-python = {file="./opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl"}
 [build-system]
 requires = ["poetry-core"]
--- a/trap/animation_renderer.py
+++ b/trap/animation_renderer.py
@ -57,7 +57,7 @@ class AnimationRenderer:
        # TODO: get FPS from frame_emitter
        # self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720))
        self.fps = 60
-        self.frame_size = (self.config.frame_width,self.config.frame_height)
+        self.frame_size = (self.config.camera.w,self.config.camera.h)
        self.hide_stats = False
        self.out_writer = None # self.start_writer() if self.config.render_file else None
        self.streaming_process = None # self.start_streaming() if self.config.render_url else None
@ -246,7 +246,7 @@ class AnimationRenderer:
            img = pyglet.image.ImageData(self.frame_size[0], self.frame_size[1], 'RGB', img.tobytes())
            # don't draw in batch, so that it is the background
            self.video_sprite = pyglet.sprite.Sprite(img=img, batch=self.batch_bg)
-            self.video_sprite.opacity = 30
+            self.video_sprite.opacity = 100
        except zmq.ZMQError as e:
            # idx = frame.index if frame else "NONE"
            # logger.debug(f"reuse video frame {idx}")
--- a/trap/config.py
+++ b/trap/config.py
@ -8,6 +8,7 @@ from trap.tracker import DETECTORS
 from trap.frame_emitter import Camera
 from pyparsing import Optional
 from trap.frame_emitter import UrlOrPath
 class LambdaParser(argparse.ArgumentParser):
    """Execute lambda functions
@ -85,7 +86,7 @@ class CameraAction(argparse.Action):
            #     'camera_matrix': np.array(data['camera_matrix']), 
            #     'dist_coeff': np.array(data['dist_coeff']),
            # }
-            camera = Camera(np.array(data['camera_matrix']), np.array(data['dist_coeff']), namespace.frame_width, namespace.frame_height)
+            camera = Camera(np.array(data['camera_matrix']), np.array(data['dist_coeff']), data['dim']['width'], data['dim']['height'], namespace.H)
            setattr(namespace, 'camera', camera)
@ -253,10 +254,10 @@ connection_parser.add_argument('--bypass-prediction',
 # Frame emitter
 frame_emitter_parser.add_argument("--video-src",
-                    help="source video to track from",
+                    help="source video to track from can be either a relative or absolute path, or a url, like an RTSP resource",
-                    type=Path,
+                    type=UrlOrPath,
                    nargs='+',
-                    default=lambda: list(Path('../DATASETS/VIRAT_subset_0102x/').glob('*.mp4')))
+                    default=lambda: [UrlOrPath(p) for p in Path('../DATASETS/VIRAT_subset_0102x/').glob('*.mp4')])
 frame_emitter_parser.add_argument("--video-offset",
                    help="Start playback from given frame. Note that when src is an array, this applies to all videos individually.",
                    default=None,
@ -292,14 +293,15 @@ tracker_parser.add_argument("--detector",
 tracker_parser.add_argument("--smooth-tracks",
                    help="Smooth the tracker tracks before sending them to the predictor",
                    action='store_true')
-tracker_parser.add_argument("--frame-width",
+# now in calibration.json
-                    help="width of the frames",
+# tracker_parser.add_argument("--frame-width",
-                    type=int,
+#                     help="width of the frames",
-                    default=1280)
+#                     type=int,
-tracker_parser.add_argument("--frame-height",
+#                     default=1280)
-                    help="height of the frames",
+# tracker_parser.add_argument("--frame-height",
-                    type=int,
+#                     help="height of the frames",
-                    default=720)
+#                     type=int,
 #                     default=720)
 # Renderer
--- a/trap/frame_emitter.py
+++ b/trap/frame_emitter.py
@ -12,11 +12,30 @@ from typing import Iterable, Optional
 import numpy as np
 import cv2
 import zmq
 import os
 from deep_sort_realtime.deep_sort.track import Track as DeepsortTrack
 from deep_sort_realtime.deep_sort.track import TrackState as DeepsortTrackState
 from urllib.parse import urlparse
 logger = logging.getLogger('trap.frame_emitter')
 class UrlOrPath():
    def __init__(self, str):
        self.url = urlparse(str)
    def __str__(self) -> str:
        return self.url.geturl()
    def is_url(self) -> bool:
        return len(self.url.netloc) > 0
    def path(self) -> Path:
        if self.is_url():
            return Path(self.url.path)
        return Path(self.url.geturl()) # can include scheme, such as C:/
 class DetectionState(IntFlag):
    Tentative = 1 # state before n_init (see DeepsortTrack)
    Confirmed = 2 # after tentative
@ -33,12 +52,13 @@ class DetectionState(IntFlag):
        raise RuntimeError("Should not run into Deleted entries here")
 class Camera:
-    def __init__(self, mtx, dist, w, h):
+    def __init__(self, mtx, dist, w, h, H):
        self.mtx = mtx
        self.dist = dist
        self.w = w
        self.h = h
        self.newcameramtx, self.roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h))
        self.H = H # homography
@dataclass
@ -135,11 +155,11 @@ class Frame:
            } for t in self.tracks.values()
        }
-def video_src_from_config(config):
+def video_src_from_config(config) -> UrlOrPath:
    if config.video_loop:
-        video_srcs: Iterable[Path] = cycle(config.video_src)
+        video_srcs: Iterable[UrlOrPath] = cycle(config.video_src)
    else:
-        video_srcs: Iterable[Path] = config.video_src
+        video_srcs: Iterable[UrlOrPath] = config.video_src
    return video_srcs
 class FrameEmitter:
@ -159,24 +179,33 @@ class FrameEmitter:
        logger.info(f"Connection socket {config.zmq_frame_addr}")
-        self.video_srcs: video_src_from_config(self.config)
+        self.video_srcs = video_src_from_config(self.config)
    def emit_video(self):
        i = 0
        delay_generation = False
        for video_path in self.video_srcs:
            logger.info(f"Play from '{str(video_path)}'")
            if str(video_path).isdigit():
                # numeric input is a CV camera
                video = cv2.VideoCapture(int(str(video_path)))
                # TODO: make config variables
-                video.set(cv2.CAP_PROP_FRAME_WIDTH, int(self.config.frame_width))
+                video.set(cv2.CAP_PROP_FRAME_WIDTH, int(self.config.camera.w))
-                video.set(cv2.CAP_PROP_FRAME_HEIGHT, int(self.config.frame_height))
+                video.set(cv2.CAP_PROP_FRAME_HEIGHT, int(self.config.camera.h))
                print("exposure!", video.get(cv2.CAP_PROP_AUTO_EXPOSURE))
                video.set(cv2.CAP_PROP_FPS, 5)
                fps=5
            elif video_path.url.scheme == 'rtsp':
                gst = f"rtspsrc location={video_path} latency=0 buffer-mode=auto ! decodebin ! videoconvert ! appsink max-buffers=1 drop=true"
                logger.info(f"Capture gstreamer (gst-launch-1.0): {gst}")
                video = cv2.VideoCapture(gst, cv2.CAP_GSTREAMER)
                fps=12
            else:
                # os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "fflags;nobuffer|flags;low_delay|avioflags;direct|rtsp_transport;udp"
                video = cv2.VideoCapture(str(video_path))
-            fps = video.get(cv2.CAP_PROP_FPS)
+                delay_generation = True 
                fps = video.get(cv2.CAP_PROP_FPS)
            target_frame_duration = 1./fps
            logger.info(f"Emit frames at {fps} fps")
@ -186,18 +215,20 @@ class FrameEmitter:
                i = self.config.video_offset
-            if '-' in video_path.stem:
+
-                path_stem = video_path.stem[:video_path.stem.rfind('-')]
+            # if '-' in video_path.path().stem:
-            else:
+            #     path_stem = video_path.stem[:video_path.stem.rfind('-')]
-                path_stem = video_path.stem
+            # else:
-            path_stem += "-homography"
+            #     path_stem = video_path.stem
-            homography_path = video_path.with_stem(path_stem).with_suffix('.txt')
+            # path_stem += "-homography"
-            logger.info(f'check homography file {homography_path}')
+            # homography_path = video_path.with_stem(path_stem).with_suffix('.txt')
-            if homography_path.exists():
+            # logger.info(f'check homography file {homography_path}')
-                logger.info(f'Found custom homography file! Using {homography_path}')
+            # if homography_path.exists():
-                video_H = np.loadtxt(homography_path, delimiter=',')
+            #     logger.info(f'Found custom homography file! Using {homography_path}')
-            else:
+            #     video_H = np.loadtxt(homography_path, delimiter=',')
-                video_H = None
+            # else:
            #     video_H = None
            video_H = self.config.camera.H
            prev_time = time.time()
@ -222,14 +253,17 @@ class FrameEmitter:
                # perhaps multiprocessing Array?
                self.frame_sock.send(pickle.dumps(frame))
-                # defer next loop
+                # only delay consuming the next frame when using a file.
-                now = time.time()
+                # Otherwise, go ASAP
-                time_diff = (now - prev_time)
+                if delay_generation:
-                if time_diff < target_frame_duration:
+                    # defer next loop
-                    time.sleep(target_frame_duration - time_diff)
+                    now = time.time()
-                    now += target_frame_duration - time_diff
+                    time_diff = (now - prev_time)
                    if time_diff < target_frame_duration:
                        time.sleep(target_frame_duration - time_diff)
                        now += target_frame_duration - time_diff
-                prev_time = now
+                    prev_time = now
                i += 1
--- a/trap/preview_renderer.py
+++ b/trap/preview_renderer.py
@ -88,11 +88,12 @@ class DrawnTrack:
        self.inv_H = np.linalg.pinv(self.H)
        pred_coords = []
-        if self.draw_projection == PROJECTION_IMG:
+        if track.predictions:
-            for pred_i, pred in enumerate(track.predictions):
+            if self.draw_projection == PROJECTION_IMG:
-                pred_coords.append(cv2.perspectiveTransform(np.array([pred]), self.inv_H)[0].tolist())
+                for pred_i, pred in enumerate(track.predictions):
-        elif self.draw_projection == PROJECTION_MAP:
+                    pred_coords.append(cv2.perspectiveTransform(np.array([pred]), self.inv_H)[0].tolist())
-            pred_coords = [pred for pred in track.predictions]
+            elif self.draw_projection == PROJECTION_MAP:
                pred_coords = [pred for pred in track.predictions]
        self.pred_coords = pred_coords
            # color = (128,0,128) if pred_i else (128,
@ -282,7 +283,7 @@ class PreviewRenderer:
        # TODO: get FPS from frame_emitter
        # self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720))
        self.fps = 60
-        self.frame_size = (self.config.frame_width,self.config.frame_height)
+        self.frame_size = (self.config.camera.w,self.config.camera.h)
        self.hide_stats = False
        self.out_writer = self.start_writer() if self.config.render_file else None
        self.streaming_process = self.start_streaming() if self.config.render_url else None
--- a/trap/process_data.py
+++ b/trap/process_data.py
@ -0,0 +1,214 @@
 from pathlib import Path
 import sys
 import os
 import numpy as np
 import pandas as pd
 import dill
 import tqdm
 import argparse
 #sys.path.append("../../")
 from trajectron.environment import Environment, Scene, Node
 from trajectron.utils import maybe_makedirs
 from trajectron.environment import derivative_of
 desired_max_time = 100
 pred_indices = [2, 3]
 state_dim = 6
 frame_diff = 10
 desired_frame_diff = 1
 dt = 0.1 # dt per frame (e.g. 1/FPS)
 standardization = {
    'PEDESTRIAN': {
        'position': {
            'x': {'mean': 0, 'std': 1},
            'y': {'mean': 0, 'std': 1}
        },
        'velocity': {
            'x': {'mean': 0, 'std': 2},
            'y': {'mean': 0, 'std': 2}
        },
        'acceleration': {
            'x': {'mean': 0, 'std': 1},
            'y': {'mean': 0, 'std': 1}
        }
    }
 }
 def augment_scene(scene, angle):
    def rotate_pc(pc, alpha):
        M = np.array([[np.cos(alpha), -np.sin(alpha)],
                      [np.sin(alpha), np.cos(alpha)]])
        return M @ pc
    data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
    scene_aug = Scene(timesteps=scene.timesteps, dt=scene.dt, name=scene.name)
    alpha = angle * np.pi / 180
    for node in scene.nodes:
        x = node.data.position.x.copy()
        y = node.data.position.y.copy()
        x, y = rotate_pc(np.array([x, y]), alpha)
        vx = derivative_of(x, scene.dt)
        vy = derivative_of(y, scene.dt)
        ax = derivative_of(vx, scene.dt)
        ay = derivative_of(vy, scene.dt)
        data_dict = {('position', 'x'): x,
                     ('position', 'y'): y,
                     ('velocity', 'x'): vx,
                     ('velocity', 'y'): vy,
                     ('acceleration', 'x'): ax,
                     ('acceleration', 'y'): ay}
        node_data = pd.DataFrame(data_dict, columns=data_columns)
        node = Node(node_type=node.type, node_id=node.id, data=node_data, first_timestep=node.first_timestep)
        scene_aug.nodes.append(node)
    return scene_aug
 def augment(scene):
    scene_aug = np.random.choice(scene.augmented)
    scene_aug.temporal_scene_graph = scene.temporal_scene_graph
    return scene_aug
 # maybe_makedirs('trajectron-data')
 # for desired_source in [ 'hof2', ]:#  ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']:
 def process_data(src_dir: Path, dst_dir: Path, name: str):
    print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}")
    nl = 0
    l = 0
    data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
    skipped_for_error = 0
    created = 0
    for data_class in ['train', 'val', 'test']:
        env = Environment(node_type_list=['PEDESTRIAN'], standardization=standardization)
        attention_radius = dict()
        attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.PEDESTRIAN)] = 2.0
        env.attention_radius = attention_radius
        scenes = []
        split_id = f"{name}_{data_class}"
        data_dict_path = dst_dir / (split_id + '.pkl')
        print(data_dict_path)
        subpath = src_dir / data_class        
        for file in subpath.glob("*.txt"):
                print(file)
                input_data_dict = dict()
                data = pd.read_csv(file, sep='\t', index_col=False, header=None)
                if data.shape[1] == 8:
                    data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y']
                elif data.shape[1] == 9:
                    data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y', 'state']
                else:
                    raise Exception("Unknown data format. Check column count")
                # data['frame_id'] = pd.to_numeric(data['frame_id'], downcast='integer')
                data['track_id'] = pd.to_numeric(data['track_id'], downcast='integer')
                data['frame_id'] = (data['frame_id'] // frame_diff).astype(int)
                data['frame_id'] -= data['frame_id'].min()
                data['node_type'] = 'PEDESTRIAN'
                data['node_id'] = data['track_id'].astype(str)
                data.sort_values('frame_id', inplace=True)
                # Mean Position
                print("Means: x:", data['pos_x'].mean(), "y:", data['pos_y'].mean())
                data['pos_x'] = data['pos_x'] - data['pos_x'].mean()
                data['pos_y'] = data['pos_y'] - data['pos_y'].mean()
                max_timesteps = data['frame_id'].max()
                scene = Scene(timesteps=max_timesteps+1, dt=dt, name=split_id, aug_func=augment if data_class == 'train' else None)
                for node_id in tqdm.tqdm(pd.unique(data['node_id'])):
                    node_df = data[data['node_id'] == node_id]
                    if not np.all(np.diff(node_df['frame_id']) == 1):
                        # print(f"Interval in {node_id} not always 1")
                        # print(node_df['frame_id'])
                        # print(np.diff(node_df['frame_id']) != 1)
                        # mask=np.append(False, np.diff(node_df['frame_id']) != 1)
                        # print(node_df[mask]['frame_id'])
                        skipped_for_error += 1
                        continue
                    node_values = node_df[['pos_x', 'pos_y']].values
                    if node_values.shape[0] < 2:
                        continue
                    new_first_idx = node_df['frame_id'].iloc[0]
                    x = node_values[:, 0]
                    y = node_values[:, 1]
                    vx = derivative_of(x, scene.dt)
                    vy = derivative_of(y, scene.dt)
                    ax = derivative_of(vx, scene.dt)
                    ay = derivative_of(vy, scene.dt)
                    data_dict = {('position', 'x'): x,
                                    ('position', 'y'): y,
                                    ('velocity', 'x'): vx,
                                    ('velocity', 'y'): vy,
                                    ('acceleration', 'x'): ax,
                                    ('acceleration', 'y'): ay}
                    node_data = pd.DataFrame(data_dict, columns=data_columns)
                    node = Node(node_type=env.NodeType.PEDESTRIAN, node_id=node_id, data=node_data)
                    node.first_timestep = new_first_idx
                    scene.nodes.append(node)
                    created+=1
                # if data_class == 'train':
                #     scene.augmented = list()
                #     angles = np.arange(0, 360, 15) if data_class == 'train' else [0]
                #     for angle in angles:
                #         scene.augmented.append(augment_scene(scene, angle))
                # print(scene)
                scenes.append(scene)
        print(f'Processed {len(scenes):.2f} scene for data class {data_class}')
        env.scenes = scenes
        print(env.scenes)
        if len(scenes) > 0:
            with open(data_dict_path, 'wb') as f:
                dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL)
    print(f"Linear: {l}")
    print(f"Non-Linear: {nl}")
    print(f"error: {skipped_for_error}, used: {created}")
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--src-dir", "-s", type=Path, required=True, help="Directory with tracker output in .txt files")
    parser.add_argument("--dst-dir", "-d", type=Path, required=True, help="Destination directory to store parsed .pkl files (typically 'trajectron-data')")
    parser.add_argument("--name", "-n", type=str, required=True, help="Identifier to prefix the output .pkl files with (result is NAME-train.pkl, NAME-test.pkl)")
    args = parser.parse_args()
    process_data(**args.__dict__)
--- a/trap/tracker.py
+++ b/trap/tracker.py
@ -179,7 +179,7 @@ class Tracker:
                                    #   embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth"
                                    )
        elif self.config.detector == DETECTOR_YOLOv8:
-            self.model = YOLO('EXPERIMENTS/yolov8x.pt', classes=0)
+            self.model = YOLO('EXPERIMENTS/yolov8x.pt')
        else: 
            raise RuntimeError(f"{self.config.detector} is not implemented yet. See --help")
@ -253,7 +253,7 @@ class Tracker:
                if self.config.detector == DETECTOR_YOLOv8:
-                    detections: [Detection]  = _yolov8_track(frame, self.model)
+                    detections: [Detection]  = _yolov8_track(frame, self.model, classes=[0])
                else :
                    detections: [Detection] = self._resnet_track(frame.img, scale = 1)