from pathlib import Path import sys import os import numpy as np import pandas as pd import dill import tqdm import argparse #sys.path.append("../../") from trajectron.environment import Environment, Scene, Node from trajectron.utils import maybe_makedirs from trajectron.environment import derivative_of desired_max_time = 100 pred_indices = [2, 3] state_dim = 6 frame_diff = 10 desired_frame_diff = 1 dt = 0.1 # dt per frame (e.g. 1/FPS) standardization = { 'PEDESTRIAN': { 'position': { 'x': {'mean': 0, 'std': 1}, 'y': {'mean': 0, 'std': 1} }, 'velocity': { 'x': {'mean': 0, 'std': 2}, 'y': {'mean': 0, 'std': 2} }, 'acceleration': { 'x': {'mean': 0, 'std': 1}, 'y': {'mean': 0, 'std': 1} } } } def augment_scene(scene, angle): def rotate_pc(pc, alpha): M = np.array([[np.cos(alpha), -np.sin(alpha)], [np.sin(alpha), np.cos(alpha)]]) return M @ pc data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']]) scene_aug = Scene(timesteps=scene.timesteps, dt=scene.dt, name=scene.name) alpha = angle * np.pi / 180 for node in scene.nodes: x = node.data.position.x.copy() y = node.data.position.y.copy() x, y = rotate_pc(np.array([x, y]), alpha) vx = derivative_of(x, scene.dt) vy = derivative_of(y, scene.dt) ax = derivative_of(vx, scene.dt) ay = derivative_of(vy, scene.dt) data_dict = {('position', 'x'): x, ('position', 'y'): y, ('velocity', 'x'): vx, ('velocity', 'y'): vy, ('acceleration', 'x'): ax, ('acceleration', 'y'): ay} node_data = pd.DataFrame(data_dict, columns=data_columns) node = Node(node_type=node.type, node_id=node.id, data=node_data, first_timestep=node.first_timestep) scene_aug.nodes.append(node) return scene_aug def augment(scene): scene_aug = np.random.choice(scene.augmented) scene_aug.temporal_scene_graph = scene.temporal_scene_graph return scene_aug # maybe_makedirs('trajectron-data') # for desired_source in [ 'hof2', ]:# ,'hof-maskrcnn', 'hof-yolov8', 'VIRAT-0102-parsed', 'virat-resnet-keypoints-full']: def process_data(src_dir: Path, dst_dir: Path, name: str): print(f"Process data in {src_dir}, to {dst_dir}, identified by {name}") nl = 0 l = 0 data_columns = pd.MultiIndex.from_product([['position', 'velocity', 'acceleration'], ['x', 'y']]) skipped_for_error = 0 created = 0 for data_class in ['train', 'val', 'test']: env = Environment(node_type_list=['PEDESTRIAN'], standardization=standardization) attention_radius = dict() attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.PEDESTRIAN)] = 2.0 env.attention_radius = attention_radius scenes = [] split_id = f"{name}_{data_class}" data_dict_path = dst_dir / (split_id + '.pkl') print(data_dict_path) subpath = src_dir / data_class for file in subpath.glob("*.txt"): print(file) input_data_dict = dict() data = pd.read_csv(file, sep='\t', index_col=False, header=None) if data.shape[1] == 8: data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y'] elif data.shape[1] == 9: data.columns = ['frame_id', 'track_id', 'l','t', 'w','h', 'pos_x', 'pos_y', 'state'] else: raise Exception("Unknown data format. Check column count") # data['frame_id'] = pd.to_numeric(data['frame_id'], downcast='integer') data['track_id'] = pd.to_numeric(data['track_id'], downcast='integer') data['frame_id'] = (data['frame_id'] // frame_diff).astype(int) data['frame_id'] -= data['frame_id'].min() data['node_type'] = 'PEDESTRIAN' data['node_id'] = data['track_id'].astype(str) data.sort_values('frame_id', inplace=True) # Mean Position print("Means: x:", data['pos_x'].mean(), "y:", data['pos_y'].mean()) data['pos_x'] = data['pos_x'] - data['pos_x'].mean() data['pos_y'] = data['pos_y'] - data['pos_y'].mean() max_timesteps = data['frame_id'].max() scene = Scene(timesteps=max_timesteps+1, dt=dt, name=split_id, aug_func=augment if data_class == 'train' else None) for node_id in tqdm.tqdm(pd.unique(data['node_id'])): node_df = data[data['node_id'] == node_id] if not np.all(np.diff(node_df['frame_id']) == 1): # print(f"Interval in {node_id} not always 1") # print(node_df['frame_id']) # print(np.diff(node_df['frame_id']) != 1) # mask=np.append(False, np.diff(node_df['frame_id']) != 1) # print(node_df[mask]['frame_id']) skipped_for_error += 1 continue node_values = node_df[['pos_x', 'pos_y']].values if node_values.shape[0] < 2: continue new_first_idx = node_df['frame_id'].iloc[0] x = node_values[:, 0] y = node_values[:, 1] vx = derivative_of(x, scene.dt) vy = derivative_of(y, scene.dt) ax = derivative_of(vx, scene.dt) ay = derivative_of(vy, scene.dt) data_dict = {('position', 'x'): x, ('position', 'y'): y, ('velocity', 'x'): vx, ('velocity', 'y'): vy, ('acceleration', 'x'): ax, ('acceleration', 'y'): ay} node_data = pd.DataFrame(data_dict, columns=data_columns) node = Node(node_type=env.NodeType.PEDESTRIAN, node_id=node_id, data=node_data) node.first_timestep = new_first_idx scene.nodes.append(node) created+=1 # if data_class == 'train': # scene.augmented = list() # angles = np.arange(0, 360, 15) if data_class == 'train' else [0] # for angle in angles: # scene.augmented.append(augment_scene(scene, angle)) # print(scene) scenes.append(scene) print(f'Processed {len(scenes):.2f} scene for data class {data_class}') env.scenes = scenes print(env.scenes) if len(scenes) > 0: with open(data_dict_path, 'wb') as f: dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL) print(f"Linear: {l}") print(f"Non-Linear: {nl}") print(f"error: {skipped_for_error}, used: {created}") def main(): parser = argparse.ArgumentParser() parser.add_argument("--src-dir", "-s", type=Path, required=True, help="Directory with tracker output in .txt files") parser.add_argument("--dst-dir", "-d", type=Path, required=True, help="Destination directory to store parsed .pkl files (typically 'trajectron-data')") parser.add_argument("--name", "-n", type=str, required=True, help="Identifier to prefix the output .pkl files with (result is NAME-train.pkl, NAME-test.pkl)") args = parser.parse_args() process_data(**args.__dict__)