Gstreamer for rtsp

2024-11-07 15:00:05 +01:00 · 2024-11-07 15:00:05 +01:00 · 0af5030845
commit 0af5030845
parent 9284ce8849
9 changed files with 335 additions and 62 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,22 @@
+# Trajectory Prediction Video installation
+
+## Install
+
+* Run `bash build_opencv_with_gstreamer.sh` to build opencv with gstreamer support
+* Use pyenv + poetry to install
+
+## How to
+
+> See also the sibling repo [traptools](https://git.rubenvandeven.com/security_vision/traptools) for camera calibration and homography tools that are needed for this repo.
+
+These are roughly the steps to go from datagathering to training
+
+1. Make sure to have some recordings with a fixed camera.
+    * Recording can be done with `ffmpeg  -rtsp_transport udp -i rtsp://USER:PASS@IP:554/Streaming/Channels/1.mp4   hof2-cam-$(date "+%Y%m%d-%H%M").mp4`
+2. Follow the steps in the auxilary  [traptools](https://git.rubenvandeven.com/security_vision/traptools) repository to obtain (1) camera matrix, lens distortion, image dimensions, and (2+3) homography
+3. Run the tracker, e.g. `poetry run tracker --detector ultralytics  --homography ../DATASETS/NAME/homography.json   --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json  --save-for-training EXPERIMENTS/raw/NAME/`
+4. Parse tracker data to Trajectron format: `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME`
+5. Train Trajectron model `poetry run trajectron_train --eval_every 10 --vis_every 1 --train_data_dict NAME_train.pkl --eval_data_dict NAME_val.pkl --offline_scene_graph no --preprocess_workers 8 --log_dir EXPERIMENTS/models --log_tag _NAME --train_epochs 100 --conf EXPERIMENTS/config.json --batch_size 256 --data_dir EXPERIMENTS/trajectron-data `
+6. The run!
+    * On a video file (you can use a wildcard) `DISPLAY=:1 poetry run trapserv --remote-log-addr 100.69.123.91 --eval_device cuda:0 --detector ultralytics  --homography ../DATASETS/NAME/homography.json   --video-src ../DATASETS/NAME/*.mp4 --model_dir EXPERIMENTS/models/models_DATE_NAME/--smooth-predictions  --num-samples 3  --render-window --calibration ../DATASETS/NAME/calibration.json` (the DISPLAY environment variable is used here to running over SSH connection and display on local monitor)
+    * or on the RTSP stream. Which uses gstreamer to substantially reduce latency compared to the default ffmpeg bindings in OpenCV.
--- a/poetry.lock
+++ b/poetry.lock
@ -1801,27 +1801,25 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"]

 [[package]]
 name = "opencv-python"
-version = "4.8.1.78"
+version = "4.10.0.84"
 description = "Wrapper package for OpenCV python bindings."
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "opencv-python-4.8.1.78.tar.gz", hash = "sha256:cc7adbbcd1112877a39274106cb2752e04984bc01a031162952e97450d6117f6"},
-    {file = "opencv_python-4.8.1.78-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:91d5f6f5209dc2635d496f6b8ca6573ecdad051a09e6b5de4c399b8e673c60da"},
-    {file = "opencv_python-4.8.1.78-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:bc31f47e05447da8b3089faa0a07ffe80e114c91ce0b171e6424f9badbd1c5cd"},
-    {file = "opencv_python-4.8.1.78-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9814beca408d3a0eca1bae7e3e5be68b07c17ecceb392b94170881216e09b319"},
-    {file = "opencv_python-4.8.1.78-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c406bdb41eb21ea51b4e90dfbc989c002786c3f601c236a99c59a54670a394"},
-    {file = "opencv_python-4.8.1.78-cp37-abi3-win32.whl", hash = "sha256:a7aac3900fbacf55b551e7b53626c3dad4c71ce85643645c43e91fcb19045e47"},
-    {file = "opencv_python-4.8.1.78-cp37-abi3-win_amd64.whl", hash = "sha256:b983197f97cfa6fcb74e1da1802c7497a6f94ed561aba6980f1f33123f904956"},
+    {file = "opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl", hash = "sha256:c1f8e6ba7fd82517ba97d352f51d161c5be51495dc7b6c6f929a8546d650f4ea"},
 ]

 [package.dependencies]
 numpy = [
+    {version = ">=1.23.5", markers = "python_version >= \"3.11\""},
    {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
    {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
-    {version = ">=1.23.5", markers = "python_version >= \"3.11\""},
 ]

+[package.source]
+type = "file"
+url = "opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl"
+
 [[package]]
 name = "orjson"
 version = "3.9.10"
@ -1939,8 +1937,8 @@ files = [

 [package.dependencies]
 numpy = [
-    {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""},
    {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""},
+    {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@ -3324,7 +3322,7 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,
 [[package]]
 name = "trajectron-plus-plus"
 version = "0.1.1"
-description = "Predict trajectories for anomaly detection"
+description = "This repository contains the code for Trajectron++: Dynamically-Feasible Trajectory Forecasting With Heterogeneous Data by Tim Salzmann*, Boris Ivanovic*, Punarjay Chakravarty, and Marco Pavone (* denotes equal contribution)."
 optional = false
 python-versions = "^3.9,<3.12"
 files = []
@ -3542,4 +3540,4 @@ watchdog = ["watchdog (>=2.3)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10,<3.12,"
-content-hash = "bffa0878a620996b47aa5623b951f09ab010c267880c6dcd5a53741f244e675a"
+content-hash = "e92dc4bbdd22d5a5ebe5910f6cef1a45c7796e632fb6cb3debfc16f7b89b4972"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -8,6 +8,7 @@ readme = "README.md"
 [tool.poetry.scripts]
 trapserv = "trap.plumber:start"
 tracker = "trap.tools:tracker_preprocess"
+process_data = "trap.process_data:main"


 [tool.poetry.dependencies]
@ -34,6 +35,7 @@ pandas-helper-calc = {git = "https://github.com/scls19fr/pandas-helper-calc"}
 tsmoothie = "^1.0.5"
 pyglet = "^2.0.15"
 pyglet-cornerpin = "^0.2.0"
+opencv-python = {file="./opencv_python-4.10.0.84-cp310-cp310-linux_x86_64.whl"}

 [build-system]
 requires = ["poetry-core"]
--- a/trap/animation_renderer.py
+++ b/trap/animation_renderer.py
@ -57,7 +57,7 @@ class AnimationRenderer:
        # TODO: get FPS from frame_emitter
        # self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720))
        self.fps = 60
-        self.frame_size = (self.config.frame_width,self.config.frame_height)
+        self.frame_size = (self.config.camera.w,self.config.camera.h)
        self.hide_stats = False
        self.out_writer = None # self.start_writer() if self.config.render_file else None
        self.streaming_process = None # self.start_streaming() if self.config.render_url else None
@ -246,7 +246,7 @@ class AnimationRenderer:
            img = pyglet.image.ImageData(self.frame_size[0], self.frame_size[1], 'RGB', img.tobytes())
            # don't draw in batch, so that it is the background
            self.video_sprite = pyglet.sprite.Sprite(img=img, batch=self.batch_bg)
-            self.video_sprite.opacity = 30
+            self.video_sprite.opacity = 100
        except zmq.ZMQError as e:
            # idx = frame.index if frame else "NONE"
            # logger.debug(f"reuse video frame {idx}")
--- a/trap/config.py
+++ b/trap/config.py
@ -8,6 +8,7 @@ from trap.tracker import DETECTORS
 from trap.frame_emitter import Camera

 from pyparsing import Optional
+from trap.frame_emitter import UrlOrPath

 class LambdaParser(argparse.ArgumentParser):
    """Execute lambda functions
@ -85,7 +86,7 @@ class CameraAction(argparse.Action):
            #     'camera_matrix': np.array(data['camera_matrix']), 
            #     'dist_coeff': np.array(data['dist_coeff']),
            # }
-            camera = Camera(np.array(data['camera_matrix']), np.array(data['dist_coeff']), namespace.frame_width, namespace.frame_height)
+            camera = Camera(np.array(data['camera_matrix']), np.array(data['dist_coeff']), data['dim']['width'], data['dim']['height'], namespace.H)
            
            setattr(namespace, 'camera', camera)

@ -253,10 +254,10 @@ connection_parser.add_argument('--bypass-prediction',
 # Frame emitter

 frame_emitter_parser.add_argument("--video-src",
-                    help="source video to track from",
-                    type=Path,
+                    help="source video to track from can be either a relative or absolute path, or a url, like an RTSP resource",
+                    type=UrlOrPath,
                    nargs='+',
-                    default=lambda: list(Path('../DATASETS/VIRAT_subset_0102x/').glob('*.mp4')))
+                    default=lambda: [UrlOrPath(p) for p in Path('../DATASETS/VIRAT_subset_0102x/').glob('*.mp4')])
 frame_emitter_parser.add_argument("--video-offset",
                    help="Start playback from given frame. Note that when src is an array, this applies to all videos individually.",
                    default=None,
@ -292,14 +293,15 @@ tracker_parser.add_argument("--detector",
 tracker_parser.add_argument("--smooth-tracks",
                    help="Smooth the tracker tracks before sending them to the predictor",
                    action='store_true')
-tracker_parser.add_argument("--frame-width",
-                    help="width of the frames",
-                    type=int,
-                    default=1280)
-tracker_parser.add_argument("--frame-height",
-                    help="height of the frames",
-                    type=int,
-                    default=720)
+# now in calibration.json
+# tracker_parser.add_argument("--frame-width",
+#                     help="width of the frames",
+#                     type=int,
+#                     default=1280)
+# tracker_parser.add_argument("--frame-height",
+#                     help="height of the frames",
+#                     type=int,
+#                     default=720)


 # Renderer
--- a/trap/frame_emitter.py
+++ b/trap/frame_emitter.py
@ -12,11 +12,30 @@ from typing import Iterable, Optional
 import numpy as np
 import cv2
 import zmq
+import os
 from deep_sort_realtime.deep_sort.track import Track as DeepsortTrack
 from deep_sort_realtime.deep_sort.track import TrackState as DeepsortTrackState

+from urllib.parse import urlparse
+
 logger = logging.getLogger('trap.frame_emitter')

+
+class UrlOrPath():
+    def __init__(self, str):
+        self.url = urlparse(str)
+
+    def __str__(self) -> str:
+        return self.url.geturl()
+
+    def is_url(self) -> bool:
+        return len(self.url.netloc) > 0
+
+    def path(self) -> Path:
+        if self.is_url():
+            return Path(self.url.path)
+        return Path(self.url.geturl()) # can include scheme, such as C:/
+    
 class DetectionState(IntFlag):
    Tentative = 1 # state before n_init (see DeepsortTrack)
    Confirmed = 2 # after tentative
@ -33,12 +52,13 @@ class DetectionState(IntFlag):
        raise RuntimeError("Should not run into Deleted entries here")

 class Camera:
-    def __init__(self, mtx, dist, w, h):
+    def __init__(self, mtx, dist, w, h, H):
        self.mtx = mtx
        self.dist = dist
        self.w = w
        self.h = h
        self.newcameramtx, self.roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (w,h), 1, (w,h))
+        self.H = H # homography


@dataclass
@ -135,11 +155,11 @@ class Frame:
            } for t in self.tracks.values()
        }

-def video_src_from_config(config):
+def video_src_from_config(config) -> UrlOrPath:
    if config.video_loop:
-        video_srcs: Iterable[Path] = cycle(config.video_src)
+        video_srcs: Iterable[UrlOrPath] = cycle(config.video_src)
    else:
-        video_srcs: Iterable[Path] = config.video_src
+        video_srcs: Iterable[UrlOrPath] = config.video_src
    return video_srcs

 class FrameEmitter:
@ -159,24 +179,33 @@ class FrameEmitter:
        
        logger.info(f"Connection socket {config.zmq_frame_addr}")

-        self.video_srcs: video_src_from_config(self.config)
+        self.video_srcs = video_src_from_config(self.config)


    def emit_video(self):
        i = 0
+        delay_generation = False
        for video_path in self.video_srcs:
            logger.info(f"Play from '{str(video_path)}'")
            if str(video_path).isdigit():
                # numeric input is a CV camera
                video = cv2.VideoCapture(int(str(video_path)))
                # TODO: make config variables
-                video.set(cv2.CAP_PROP_FRAME_WIDTH, int(self.config.frame_width))
-                video.set(cv2.CAP_PROP_FRAME_HEIGHT, int(self.config.frame_height))
+                video.set(cv2.CAP_PROP_FRAME_WIDTH, int(self.config.camera.w))
+                video.set(cv2.CAP_PROP_FRAME_HEIGHT, int(self.config.camera.h))
                print("exposure!", video.get(cv2.CAP_PROP_AUTO_EXPOSURE))
                video.set(cv2.CAP_PROP_FPS, 5)
+                fps=5
+            elif video_path.url.scheme == 'rtsp':
+                gst = f"rtspsrc location={video_path} latency=0 buffer-mode=auto ! decodebin ! videoconvert ! appsink max-buffers=1 drop=true"
+                logger.info(f"Capture gstreamer (gst-launch-1.0): {gst}")
+                video = cv2.VideoCapture(gst, cv2.CAP_GSTREAMER)
+                fps=12
            else:
+                # os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "fflags;nobuffer|flags;low_delay|avioflags;direct|rtsp_transport;udp"
                video = cv2.VideoCapture(str(video_path))
-            fps = video.get(cv2.CAP_PROP_FPS)
+                delay_generation = True 
+                fps = video.get(cv2.CAP_PROP_FPS)
            target_frame_duration = 1./fps
            logger.info(f"Emit frames at {fps} fps")

@ -186,18 +215,20 @@ class FrameEmitter:
                i = self.config.video_offset


-            if '-' in video_path.stem:
-                path_stem = video_path.stem[:video_path.stem.rfind('-')]
-            else:
-                path_stem = video_path.stem
-            path_stem += "-homography"
-            homography_path = video_path.with_stem(path_stem).with_suffix('.txt')
-            logger.info(f'check homography file {homography_path}')
-            if homography_path.exists():
-                logger.info(f'Found custom homography file! Using {homography_path}')
-                video_H = np.loadtxt(homography_path, delimiter=',')
-            else:
-                video_H = None
+
+            # if '-' in video_path.path().stem:
+            #     path_stem = video_path.stem[:video_path.stem.rfind('-')]
+            # else:
+            #     path_stem = video_path.stem
+            # path_stem += "-homography"
+            # homography_path = video_path.with_stem(path_stem).with_suffix('.txt')
+            # logger.info(f'check homography file {homography_path}')
+            # if homography_path.exists():
+            #     logger.info(f'Found custom homography file! Using {homography_path}')
+            #     video_H = np.loadtxt(homography_path, delimiter=',')
+            # else:
+            #     video_H = None
+            video_H = self.config.camera.H

            prev_time = time.time()
            
@ -222,14 +253,17 @@ class FrameEmitter:
                # perhaps multiprocessing Array?
                self.frame_sock.send(pickle.dumps(frame))

-                # defer next loop
-                now = time.time()
-                time_diff = (now - prev_time)
-                if time_diff < target_frame_duration:
-                    time.sleep(target_frame_duration - time_diff)
-                    now += target_frame_duration - time_diff
-                
-                prev_time = now
+                # only delay consuming the next frame when using a file.
+                # Otherwise, go ASAP
+                if delay_generation:
+                    # defer next loop
+                    now = time.time()
+                    time_diff = (now - prev_time)
+                    if time_diff < target_frame_duration:
+                        time.sleep(target_frame_duration - time_diff)
+                        now += target_frame_duration - time_diff
+                    
+                    prev_time = now
                
                i += 1

--- a/trap/preview_renderer.py
+++ b/trap/preview_renderer.py
@ -88,11 +88,12 @@ class DrawnTrack:
        self.inv_H = np.linalg.pinv(self.H)

        pred_coords = []
-        if self.draw_projection == PROJECTION_IMG:
-            for pred_i, pred in enumerate(track.predictions):
-                pred_coords.append(cv2.perspectiveTransform(np.array([pred]), self.inv_H)[0].tolist())
-        elif self.draw_projection == PROJECTION_MAP:
-            pred_coords = [pred for pred in track.predictions]
+        if track.predictions:
+            if self.draw_projection == PROJECTION_IMG:
+                for pred_i, pred in enumerate(track.predictions):
+                    pred_coords.append(cv2.perspectiveTransform(np.array([pred]), self.inv_H)[0].tolist())
+            elif self.draw_projection == PROJECTION_MAP:
+                pred_coords = [pred for pred in track.predictions]
        
        self.pred_coords = pred_coords
            # color = (128,0,128) if pred_i else (128,
@ -282,7 +283,7 @@ class PreviewRenderer:
        # TODO: get FPS from frame_emitter
        # self.out = cv2.VideoWriter(str(filename), fourcc, 23.97, (1280,720))
        self.fps = 60
-        self.frame_size = (self.config.frame_width,self.config.frame_height)
+        self.frame_size = (self.config.camera.w,self.config.camera.h)
        self.hide_stats = False
        self.out_writer = self.start_writer() if self.config.render_file else None
        self.streaming_process = self.start_streaming() if self.config.render_url else None
--- a/trap/process_data.py
+++ b/trap/process_data.py
@ -0,0 +1,214 @@
+from pathlib import Path
+import sys
+import os
+import numpy as np
+import pandas as pd
+import dill
+import tqdm
+import argparse
+
+#sys.path.append("../../")
+from trajectron.environment import Environment, Scene, Node
+from trajectron.utils import maybe_makedirs
+from trajectron.environment import derivative_of
+
+desired_max_time = 100
+pred_indices = [2, 3]
+state_dim = 6
+frame_diff = 10
+desired_frame_diff = 1
+dt = 0.1 # dt per frame (e.g. 1/FPS)
+
+standardization = {
+    'PEDESTRIAN': {
+        'position': {
+            'x': {'mean': 0, 'std': 1},
+            'y': {'mean': 0, 'std': 1}
+        },
+        'velocity': {
+            'x': {'mean': 0, 'std': 2},
+            'y': {'mean': 0, 'std': 2}
+        },
+        'acceleration': {
+            'x': {'mean': 0, 'std': 1},
+            'y': {'mean': 0, 'std': 1}
+        }
+    }
+}
+
+
+def augment_scene(scene, angle):
+    def rotate_pc(pc, alpha):
+        M = np.array([[np.cos(alpha), -np.sin(alpha)],
+                      [np.sin(alpha), np.cos(alpha)]])
+        return M @ pc
+
+    data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
+
+    scene_aug = Scene(timesteps=scene.timesteps, dt=scene.dt, name=scene.name)
+
+    alpha = angle * np.pi / 180
+
+    for node in scene.nodes:
+        x = node.data.position.x.copy()
+        y = node.data.position.y.copy()
+
+        x, y = rotate_pc(np.array([x, y]), alpha)
+
+        vx = derivative_of(x, scene.dt)
+        vy = derivative_of(y, scene.dt)
+        ax = derivative_of(vx, scene.dt)
+        ay = derivative_of(vy, scene.dt)
+
+        data_dict = {('position', 'x'): x,
+                     ('position', 'y'): y,
+                     ('velocity', 'x'): vx,
+                     ('velocity', 'y'): vy,
+                     ('acceleration', 'x'): ax,
+                     ('acceleration', 'y'): ay}
+
+        node_data = pd.DataFrame(data_dict, columns=data_columns)
+
+        node = Node(node_type=node.type, node_id=node.id, data=node_data, first_timestep=node.first_timestep)
+
+        scene_aug.nodes.append(node)
+    return scene_aug
+
+
+def augment(scene):
+    scene_aug = np.random.choice(scene.augmented)
+    scene_aug.temporal_scene_graph = scene.temporal_scene_graph
+    return scene_aug
+
+
+# maybe_makedirs('trajectron-data')
+# for desired_source in [ 'hof2', ]:#  ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']:
+
+def process_data(src_dir: Path, dst_dir: Path, name: str):
+    print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}")
+    
+    nl = 0
+    l = 0
+    data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']])
+    skipped_for_error = 0
+    created = 0
+
+    for data_class in ['train', 'val', 'test']:
+        env = Environment(node_type_list=['PEDESTRIAN'], standardization=standardization)
+        attention_radius = dict()
+        attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.PEDESTRIAN)] = 2.0
+        env.attention_radius = attention_radius
+
+        scenes = []
+        split_id = f"{name}_{data_class}"
+        data_dict_path = dst_dir / (split_id + '.pkl')
+
+        print(data_dict_path)
+
+        
+        subpath = src_dir / data_class        
+        for file in subpath.glob("*.txt"):
+                print(file)
+                input_data_dict = dict()
+
+                data = pd.read_csv(file, sep='\t', index_col=False, header=None)
+                
+                if data.shape[1] == 8:
+                    data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y']
+                elif data.shape[1] == 9:
+                    data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y', 'state']
+                else:
+                    raise Exception("Unknown data format. Check column count")
+                # data['frame_id'] = pd.to_numeric(data['frame_id'], downcast='integer')
+                data['track_id'] = pd.to_numeric(data['track_id'], downcast='integer')
+                
+
+                data['frame_id'] = (data['frame_id'] // frame_diff).astype(int)
+
+
+                data['frame_id'] -= data['frame_id'].min()
+
+                data['node_type'] = 'PEDESTRIAN'
+                data['node_id'] = data['track_id'].astype(str)
+                data.sort_values('frame_id', inplace=True)
+
+                # Mean Position
+
+                print("Means: x:", data['pos_x'].mean(), "y:", data['pos_y'].mean())
+
+                data['pos_x'] = data['pos_x'] - data['pos_x'].mean()
+                data['pos_y'] = data['pos_y'] - data['pos_y'].mean()
+
+                max_timesteps = data['frame_id'].max()
+
+                scene = Scene(timesteps=max_timesteps+1, dt=dt, name=split_id, aug_func=augment if data_class == 'train' else None)
+
+                for node_id in tqdm.tqdm(pd.unique(data['node_id'])):
+                    node_df = data[data['node_id'] == node_id]
+                    if not np.all(np.diff(node_df['frame_id']) == 1):
+                        # print(f"Interval in {node_id} not always 1")
+                        # print(node_df['frame_id'])
+                        # print(np.diff(node_df['frame_id']) != 1)
+                        # mask=np.append(False, np.diff(node_df['frame_id']) != 1)
+                        # print(node_df[mask]['frame_id'])
+                        skipped_for_error += 1
+                        continue
+
+
+                    node_values = node_df[['pos_x', 'pos_y']].values
+
+                    if node_values.shape[0] < 2:
+                        continue
+
+                    new_first_idx = node_df['frame_id'].iloc[0]
+
+                    x = node_values[:, 0]
+                    y = node_values[:, 1]
+                    vx = derivative_of(x, scene.dt)
+                    vy = derivative_of(y, scene.dt)
+                    ax = derivative_of(vx, scene.dt)
+                    ay = derivative_of(vy, scene.dt)
+
+                    data_dict = {('position', 'x'): x,
+                                    ('position', 'y'): y,
+                                    ('velocity', 'x'): vx,
+                                    ('velocity', 'y'): vy,
+                                    ('acceleration', 'x'): ax,
+                                    ('acceleration', 'y'): ay}
+
+                    node_data = pd.DataFrame(data_dict, columns=data_columns)
+                    node = Node(node_type=env.NodeType.PEDESTRIAN, node_id=node_id, data=node_data)
+                    node.first_timestep = new_first_idx
+
+                    scene.nodes.append(node)
+                    created+=1
+                # if data_class == 'train':
+                #     scene.augmented = list()
+                #     angles = np.arange(0, 360, 15) if data_class == 'train' else [0]
+                #     for angle in angles:
+                #         scene.augmented.append(augment_scene(scene, angle))
+
+                # print(scene)
+                scenes.append(scene)
+        print(f'Processed {len(scenes):.2f} scene for data class {data_class}')
+
+        env.scenes = scenes
+
+        print(env.scenes)
+
+        if len(scenes) > 0:
+            with open(data_dict_path, 'wb') as f:
+                dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL)
+
+    print(f"Linear: {l}")
+    print(f"Non-Linear: {nl}")
+    print(f"error: {skipped_for_error}, used: {created}")
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--src-dir", "-s", type=Path, required=True, help="Directory with tracker output in .txt files")
+    parser.add_argument("--dst-dir", "-d", type=Path, required=True, help="Destination directory to store parsed .pkl files (typically 'trajectron-data')")
+    parser.add_argument("--name", "-n", type=str, required=True, help="Identifier to prefix the output .pkl files with (result is NAME-train.pkl, NAME-test.pkl)")
+    
+    args = parser.parse_args()
+    process_data(**args.__dict__)
--- a/trap/tracker.py
+++ b/trap/tracker.py
@ -179,7 +179,7 @@ class Tracker:
                                    #   embedder='torchreid', embedder_wts="../MODELS/osnet_x1_0_imagenet.pth"
                                    )
        elif self.config.detector == DETECTOR_YOLOv8:
-            self.model = YOLO('EXPERIMENTS/yolov8x.pt', classes=0)
+            self.model = YOLO('EXPERIMENTS/yolov8x.pt')
        else: 
            raise RuntimeError(f"{self.config.detector} is not implemented yet. See --help")

@ -253,7 +253,7 @@ class Tracker:


                if self.config.detector == DETECTOR_YOLOv8:
-                    detections: [Detection]  = _yolov8_track(frame, self.model)
+                    detections: [Detection]  = _yolov8_track(frame, self.model, classes=[0])
                else :
                    detections: [Detection] = self._resnet_track(frame.img, scale = 1)