trap/test_custom_rnn.ipynb at abc80727da1959c2b172586fef6db6a9e4e0a0ca

Goal of this notebook: implement some basic RNN/LSTM/GRU to forecast trajectories based on VIRAT and/or the custom hof dataset.

In [1]:

import numpy as np
import torch
import matplotlib.pyplot as plt # Visualization 
import torch.nn as nn
import pandas_helper_calc  # noqa # provides df.calc.derivative()
import pandas as pd
import cv2
import pathlib
from tqdm.autonotebook import tqdm

/home/ruben/suspicion/trap/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

In [2]:

FPS = 12
# SRC_CSV = "EXPERIMENTS/hofext-maskrcnn/all.txt"
# SRC_CSV = "EXPERIMENTS/raw/generated/train/tracks.txt"
SRC_CSV = "EXPERIMENTS/raw/hof-meter-maskrcnn2/train/tracks.txt"
SRC_CSV = "EXPERIMENTS/20240426-hof-yolo/train/tracked.txt"
SRC_CSV = "EXPERIMENTS/raw/hof2/train/tracked.txt"
# SRC_H = "../DATASETS/hof/webcam20231103-2-homography.txt"
SRC_H = None
CACHE_DIR = "EXPERIMENTS/cache/hof2/"
SMOOTHING = True # hof-yolo is already smoothed, hof2 isn't
SMOOTHING_WINDOW=3 #2

In [3]:

in_fields = ['proj_x', 'proj_y', 'vx', 'vy', 'ax', 'ay']
# out_fields = ['v', 'heading']
# velocity cannot be negative, and heading is circular (modulo), this makes it harder to optimise than a linear space, so try to use components
# an we can use simple MSE loss (I guess?)
out_fields = ['vx', 'vy']
window = int(FPS*1.5)

In [4]:

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

# Hyperparameters
input_size = len(in_fields)
hidden_size = 256
num_layers = 3
output_size = len(out_fields)
learning_rate = 0.005 #0.01 #0.005
batch_size = 256
num_epochs = 1000

cuda

In [5]:

cache_path = pathlib.Path(CACHE_DIR)
cache_path.mkdir(parents=True, exist_ok=True)

In [6]:

from pathlib import Path
from trap.tools import load_tracks_from_csv

data = load_tracks_from_csv(Path(SRC_CSV), FPS, 2, 5 )

Samping 1/5, of 412098 items
Done sampling kept 83726 items

In [ ]:

Out[ ]:

	frame_id	track_id	l	t	w	h	x	y	state	diff	...	dx	dy	vx	vy	ax	ay	v	a	heading	d_heading
194	606.0	4	1593.885864	782.814819	145.704346	195.380432	12.897830	10.750061	2.0	NaN	...	0.201965	-0.291350	0.484716	-0.699240	-1.622919	-1.732144	0.850815	1.399195	304.729842	-101.772559
199	611.0	4	1563.890015	700.710510	137.461304	190.194855	13.099794	10.458712	1.0	5.0	...	0.201965	-0.291350	0.484716	-0.699240	-1.622919	-1.732144	0.850815	1.399195	304.729842	-101.772559
204	616.0	4	1529.469727	635.622498	129.342651	194.191528	13.020002	9.866642	2.0	5.0	...	-0.079792	-0.592069	-0.191501	-1.420966	-1.622919	-1.732144	1.433812	1.399195	262.324609	-101.772559
209	621.0	4	1474.449341	569.387634	128.099854	199.766357	12.965776	9.301442	2.0	5.0	...	-0.054226	-0.565200	-0.130143	-1.356479	0.147259	0.154769	1.362708	-0.170650	264.519715	5.268254
214	626.0	4	1443.123535	518.907043	120.022461	202.566772	12.642992	8.976624	2.0	5.0	...	-0.322784	-0.324818	-0.774681	-0.779564	-1.546892	1.384597	1.099023	-0.632844	225.179993	-94.415332
219	631.0	4	1398.944946	461.813049	106.391357	193.476410	12.465588	8.557788	2.0	5.0	...	-0.177404	-0.418836	-0.425771	-1.005205	0.837386	-0.541539	1.091659	-0.017675	247.044148	52.473972
224	636.0	4	1353.237793	438.118896	91.444336	170.930664	12.128433	8.052323	2.0	5.0	...	-0.337155	-0.505465	-0.809172	-1.213117	-0.920163	-0.498987	1.458222	0.879752	236.295957	-25.795658
229	641.0	4	1272.791992	408.827759	104.274536	180.414551	11.689648	7.684636	2.0	5.0	...	-0.438785	-0.367687	-1.053084	-0.882448	-0.585388	0.793604	1.373936	-0.202286	219.961870	-39.201809
234	646.0	4	1198.965820	407.952759	103.282104	167.306580	11.207276	7.476216	2.0	5.0	...	-0.482372	-0.208420	-1.157693	-0.500209	-0.251064	0.917374	1.261136	-0.270721	203.367915	-39.825493
239	651.0	4	1156.309570	415.743408	97.628784	158.774811	10.884154	7.514692	2.0	5.0	...	-0.323122	0.038476	-0.775493	0.092343	0.917282	1.422125	0.780971	-1.152395	173.209381	-72.380481
244	656.0	4	1094.440430	443.849915	107.938110	177.703979	10.544492	7.870090	2.0	5.0	...	-0.339661	0.355398	-0.815187	0.852955	-0.095267	1.825468	1.179857	0.957326	133.703018	-94.815270
249	661.0	4	1072.595093	481.461945	118.452148	205.365173	10.486504	8.287758	2.0	5.0	...	-0.057989	0.417668	-0.139173	1.002404	1.622435	0.358678	1.012019	-0.402811	97.904355	-85.916792
254	666.0	4	1086.627930	526.733154	105.444458	189.750610	10.498393	8.684043	2.0	5.0	...	0.011889	0.396285	0.028534	0.951083	0.402496	-0.123170	0.951511	-0.145220	88.281546	-23.094741
259	671.0	4	1099.592285	584.216675	114.395874	218.003479	10.492767	9.267106	2.0	5.0	...	-0.005626	0.583063	-0.013502	1.399352	-0.100887	1.075845	1.399417	1.074975	90.552815	5.451045
264	676.0	4	1144.484782	642.779582	96.750326	180.744690	10.484691	9.582745	1.0	5.0	...	-0.008077	0.315639	-0.019384	0.757534	-0.014116	-1.540364	0.757782	-1.539925	91.465753	2.191052
269	681.0	4	1179.532959	682.365540	107.764282	200.651733	10.698373	9.950516	2.0	5.0	...	0.213682	0.367771	0.512837	0.882650	1.277331	0.300278	1.020820	0.631291	59.842534	-75.895726

16 rows × 24 columns

In [6]:

data = pd.read_csv(SRC_CSV, delimiter="\t", index_col=False, header=None)
# data.columns = ['frame_id', 'track_id', 'pos_x', 'pos_y', 'width', 'height']#, '_x', '_y,']
data.columns = ['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state']#, '_x', '_y,']
data['frame_id'] = pd.to_numeric(data['frame_id'], downcast='integer')
data['frame_id'] = data['frame_id'] // 10 # compatibility with Trajectron++

data.sort_values(by=['track_id', 'frame_id'],inplace=True)

data.set_index(['track_id', 'frame_id'])

Out[6]:

		l	t	w	h	x	y	state
track_id	frame_id
1	342	1393.736572	0.000000	67.613647	121.391151	1363.3164	232.92647	2
	343	1391.775879	0.852371	78.562622	141.050934	1359.1885	266.06586	2
	346	1392.164551	7.758987	85.757324	154.357971	1355.7444	297.67404	2
	347	1393.844849	12.691238	86.482910	156.264786	1355.2312	308.20670	2
	348	1394.839111	15.621338	84.763428	154.584396	1354.9246	310.09225	2
...	...	...	...	...	...	...	...	...
5030	32691	1708.213379	749.260376	133.839966	182.405396	1402.5426	1075.20870	2
	32692	1707.651855	748.997437	134.013672	182.391296	1402.2948	1074.97230	2
	32720	1700.379639	750.314697	128.792603	181.589783	1395.7992	1074.27320	2
	32721	1701.722412	751.000488	125.286865	180.867615	1395.5424	1074.20560	2
	32722	1702.384766	750.754517	123.435425	180.945618	1395.4082	1074.06500	2

326960 rows × 7 columns

In [7]:

# cm to meter
data['x'] = data['x']/100
data['y'] = data['y']/100

In [8]:

data['diff'] = data.groupby(['track_id'])['frame_id'].diff() #.fillna(0)
data['diff'] = pd.to_numeric(data['diff'], downcast='integer')

In [9]:

missing=0
old_size=len(data)
# slow way to append missing steps to the dataset
for ind, row in tqdm(data.iterrows()):
    if row['diff'] > 1:
        for s in range(1, int(row['diff'])):
            # add as many entries as missing
            missing += 1
            data.loc[len(data)] = [row['frame_id']-s, row['track_id'], np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 1, 1]
            # new_frame =  [data.loc[ind-1]['frame_id']+s, row['track_id'], np.nan, np.nan, np.nan, np.nan, np.nan]
            # data.loc[len(data)] = new_frame

print('was:', old_size, 'added:', missing, 'new length:', len(data))
# now sort, so that the added data is in the right place
data.sort_values(by=['track_id', 'frame_id'], inplace=True)

326960it [06:37, 821.55it/s]

was: 326960 added: 85138 new length: 412098

In [10]:

# interpolate missing data
df=data.copy()
df = df.groupby('track_id').apply(lambda group: group.interpolate(method='linear'))
df.reset_index(drop=True, inplace=True)
data = df

In [ ]:

from trap.tracker import Smoother

if SMOOTHING:
    df=data.copy()
    if 'x_raw' not in df:
        df['x_raw'] = df['x']
    if 'y_raw' not in df:
        df['y_raw'] = df['y']

    print("Running smoother")
    # print(df)
    # from tsmoothie.smoother import KalmanSmoother, ConvolutionSmoother
    smoother = Smoother(convolution=False)
    def smoothing(data):
        # smoother = ConvolutionSmoother(window_len=SMOOTHING_WINDOW, window_type='ones', copy=None)
        return smoother.smooth(data).tolist()
        # df=df.assign(smooth_data=smoother.smooth_data[0])
        # return smoother.smooth_data[0].tolist()

    # operate smoothing per axis
    df['x'] = df.groupby('track_id')['x_raw'].transform(smoothing)
    df['y'] = df.groupby('track_id')['y_raw'].transform(smoothing)
    

    data = df

Running smoother

In [ ]:

# del data['diff']
# recalculate diff
data['diff'] = data.groupby(['track_id'])['frame_id'].diff()
data

Out[ ]:

	frame_id	track_id	l	t	w	h	x	y	state	diff	x_raw	y_raw
0	9.0	1.0	0.000000	565.566162	88.795326	173.917542	0.855100	7.136193	2.0	NaN	0.881595	7.341152
1	10.0	1.0	0.000000	565.116699	88.801704	171.334290	0.873132	7.235233	2.0	1.0	0.870703	7.309168
2	11.0	1.0	0.000000	564.874573	90.596596	177.199951	0.890957	7.328989	2.0	1.0	0.901374	7.370044
3	12.0	1.0	0.000000	564.874268	90.928131	183.125732	0.907784	7.418187	2.0	1.0	0.924360	7.432365
4	13.0	1.0	0.000000	569.931213	86.213280	180.774292	0.923439	7.505012	2.0	1.0	0.906583	7.456334
...	...	...	...	...	...	...	...	...	...	...	...	...
320183	60159.0	3632.0	1830.709717	651.257446	150.202515	157.239746	14.840476	9.786501	2.0	NaN	15.214551	10.027093
320184	60160.0	3632.0	1834.013672	649.612122	153.686646	160.874023	15.033432	9.870472	2.0	1.0	15.244872	10.047117
320185	60161.0	3632.0	1845.373047	651.249756	147.178589	153.729248	15.211560	9.943236	2.0	1.0	15.318496	10.015218
320186	60162.0	3632.0	1857.388916	650.908203	136.407349	142.354614	15.377673	10.008965	2.0	1.0	15.400203	9.935355
320187	60163.0	3632.0	1862.792725	658.719971	141.984253	149.052307	15.538255	10.075935	2.0	1.0	15.416893	10.051785

320188 rows × 12 columns

In [ ]:

# data['node_type'] = 'PEDESTRIAN' # compatibility with Trajectron++
# data['node_id'] = data['track_id'].astype(str)
data['track_id'] = pd.to_numeric(data['track_id'], downcast='integer')


data['dt'] = data['diff'] * (1/FPS)
data

Out[ ]:

	frame_id	track_id	l	t	w	h	x	y	state	diff	x_raw	y_raw	dt
0	9.0	1	0.000000	565.566162	88.795326	173.917542	0.855100	7.136193	2.0	NaN	0.881595	7.341152	NaN
1	10.0	1	0.000000	565.116699	88.801704	171.334290	0.873132	7.235233	2.0	1.0	0.870703	7.309168	0.083333
2	11.0	1	0.000000	564.874573	90.596596	177.199951	0.890957	7.328989	2.0	1.0	0.901374	7.370044	0.083333
3	12.0	1	0.000000	564.874268	90.928131	183.125732	0.907784	7.418187	2.0	1.0	0.924360	7.432365	0.083333
4	13.0	1	0.000000	569.931213	86.213280	180.774292	0.923439	7.505012	2.0	1.0	0.906583	7.456334	0.083333
...	...	...	...	...	...	...	...	...	...	...	...	...	...
320183	60159.0	3632	1830.709717	651.257446	150.202515	157.239746	14.840476	9.786501	2.0	NaN	15.214551	10.027093	NaN
320184	60160.0	3632	1834.013672	649.612122	153.686646	160.874023	15.033432	9.870472	2.0	1.0	15.244872	10.047117	0.083333
320185	60161.0	3632	1845.373047	651.249756	147.178589	153.729248	15.211560	9.943236	2.0	1.0	15.318496	10.015218	0.083333
320186	60162.0	3632	1857.388916	650.908203	136.407349	142.354614	15.377673	10.008965	2.0	1.0	15.400203	9.935355	0.083333
320187	60163.0	3632	1862.792725	658.719971	141.984253	149.052307	15.538255	10.075935	2.0	1.0	15.416893	10.051785	0.083333

320188 rows × 13 columns

In [ ]:

# position into an average coordinate system. (DO THESE NEED TO BE STORED?)
# Don't do this, messes up
# data['pos_x'] = data['pos_x'] - data['pos_x'].mean()
# data['pos_y'] = data['pos_y'] - data['pos_y'].mean()
    
# data

In [ ]:

data['diff'].hist()

Out[ ]:

<AxesSubplot:>

The dataset is a bit crappy because it has different frame step: ranging from predominantly 1 and 2 to sometimes have 3 and 4 as well. This inevitabily leads to difference in speed caluclations

In [ ]:

if SRC_H is not None:
    H = np.loadtxt(SRC_H, delimiter=',')
else:
    H = None

In [ ]:

if H is not None:
    print("Projecting data")
    data['foot_x'] = data['pos_x'] + 0.5 * data['width']
    data['foot_y'] = data['pos_y'] + 0.5 * data['height']
    
    transformed = cv2.perspectiveTransform(np.array([data[['foot_x','foot_y']].to_numpy()]),H)[0]
    data['proj_x'], data['proj_y'] = transformed[:,0], transformed[:,1]
    data['proj_x'] = data['proj_x'].div(100) # cm to m
    data['proj_y'] = data['proj_y'].div(100) # cm to m
    # and shift to mean (THES NEED TO BE STORED AND REUSED IN LIVE SETTING)
    mean_x = data['proj_x'].mean()
    mean_y = data['proj_y'].mean()
    data['proj_x'] = data['proj_x'] - data['proj_x'].mean()
    data['proj_y'] = data['proj_y'] - data['proj_y'].mean()
else:
    print("No H given, probably already projected data?")
    mean_x = 0
    mean_y = 0
    data['proj_x']  = data['x']
    data['proj_y']  = data['y']
data

No H given, probably already projected data?

Out[ ]:

	frame_id	track_id	l	t	w	h	x	y	state	diff	x_raw	y_raw	dt	proj_x	proj_y
0	9.0	1	0.000000	565.566162	88.795326	173.917542	0.855100	7.136193	2.0	NaN	0.881595	7.341152	NaN	0.855100	7.136193
1	10.0	1	0.000000	565.116699	88.801704	171.334290	0.873132	7.235233	2.0	1.0	0.870703	7.309168	0.083333	0.873132	7.235233
2	11.0	1	0.000000	564.874573	90.596596	177.199951	0.890957	7.328989	2.0	1.0	0.901374	7.370044	0.083333	0.890957	7.328989
3	12.0	1	0.000000	564.874268	90.928131	183.125732	0.907784	7.418187	2.0	1.0	0.924360	7.432365	0.083333	0.907784	7.418187
4	13.0	1	0.000000	569.931213	86.213280	180.774292	0.923439	7.505012	2.0	1.0	0.906583	7.456334	0.083333	0.923439	7.505012
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
320183	60159.0	3632	1830.709717	651.257446	150.202515	157.239746	14.840476	9.786501	2.0	NaN	15.214551	10.027093	NaN	14.840476	9.786501
320184	60160.0	3632	1834.013672	649.612122	153.686646	160.874023	15.033432	9.870472	2.0	1.0	15.244872	10.047117	0.083333	15.033432	9.870472
320185	60161.0	3632	1845.373047	651.249756	147.178589	153.729248	15.211560	9.943236	2.0	1.0	15.318496	10.015218	0.083333	15.211560	9.943236
320186	60162.0	3632	1857.388916	650.908203	136.407349	142.354614	15.377673	10.008965	2.0	1.0	15.400203	9.935355	0.083333	15.377673	10.008965
320187	60163.0	3632	1862.792725	658.719971	141.984253	149.052307	15.538255	10.075935	2.0	1.0	15.416893	10.051785	0.083333	15.538255	10.075935

320188 rows × 15 columns

In [ ]:

print("Deriving displacement, velocity and accelation from x and y")
data['dx'] = data.groupby(['track_id'])['proj_x'].diff()
data['dy'] = data.groupby(['track_id'])['proj_y'].diff()
data['vx'] = data['dx'].div(data['dt'], axis=0)
data['vy'] = data['dy'].div(data['dt'], axis=0)

data['ax'] = data.groupby(['track_id'])['vx'].diff().div(data['dt'], axis=0)
data['ay'] = data.groupby(['track_id'])['vy'].diff().div(data['dt'], axis=0)

data

Deriving displacement, velocity and accelation from x and y

Out[ ]:

	frame_id	track_id	l	t	w	h	x	y	state	diff	...	y_raw	dt	proj_x	proj_y	dx	dy	vx	vy	ax	ay
0	9.0	1	0.000000	565.566162	88.795326	173.917542	0.855100	7.136193	2.0	NaN	...	7.341152	NaN	0.855100	7.136193	NaN	NaN	NaN	NaN	NaN	NaN
1	10.0	1	0.000000	565.116699	88.801704	171.334290	0.873132	7.235233	2.0	1.0	...	7.309168	0.083333	0.873132	7.235233	0.018032	0.099039	0.216383	1.188473	NaN	NaN
2	11.0	1	0.000000	564.874573	90.596596	177.199951	0.890957	7.328989	2.0	1.0	...	7.370044	0.083333	0.890957	7.328989	0.017825	0.093756	0.213899	1.125077	-0.029812	-0.760753
3	12.0	1	0.000000	564.874268	90.928131	183.125732	0.907784	7.418187	2.0	1.0	...	7.432365	0.083333	0.907784	7.418187	0.016827	0.089198	0.201924	1.070371	-0.143699	-0.656466
4	13.0	1	0.000000	569.931213	86.213280	180.774292	0.923439	7.505012	2.0	1.0	...	7.456334	0.083333	0.923439	7.505012	0.015655	0.086825	0.187865	1.041902	-0.168701	-0.341637
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
320183	60159.0	3632	1830.709717	651.257446	150.202515	157.239746	14.840476	9.786501	2.0	NaN	...	10.027093	NaN	14.840476	9.786501	NaN	NaN	NaN	NaN	NaN	NaN
320184	60160.0	3632	1834.013672	649.612122	153.686646	160.874023	15.033432	9.870472	2.0	1.0	...	10.047117	0.083333	15.033432	9.870472	0.192955	0.083971	2.315463	1.007656	NaN	NaN
320185	60161.0	3632	1845.373047	651.249756	147.178589	153.729248	15.211560	9.943236	2.0	1.0	...	10.015218	0.083333	15.211560	9.943236	0.178128	0.072764	2.137542	0.873173	-2.135059	-1.613797
320186	60162.0	3632	1857.388916	650.908203	136.407349	142.354614	15.377673	10.008965	2.0	1.0	...	9.935355	0.083333	15.377673	10.008965	0.166113	0.065728	1.993352	0.788742	-1.730279	-1.013172
320187	60163.0	3632	1862.792725	658.719971	141.984253	149.052307	15.538255	10.075935	2.0	1.0	...	10.051785	0.083333	15.538255	10.075935	0.160582	0.066971	1.926987	0.803649	-0.796376	0.178886

320188 rows × 21 columns

In [ ]:

# then we need the velocity itself
data['v'] = np.sqrt(data['vx'].pow(2) + data['vy'].pow(2))
# and derive acceleration
data['a'] = data.groupby(['track_id'])['v'].diff().div(data['dt'], axis=0)

# we can calculate heading based on the velocity components
data['heading'] = (np.arctan2(data['vy'], data['vx'])  * 180 / np.pi) % 360

# and derive it to get the rate of change of the heading
data['d_heading'] = data.groupby(['track_id'])['heading'].diff().div(data['dt'], axis=0)

data

Out[ ]:

	frame_id	track_id	l	t	w	h	x	y	state	diff	...	dx	dy	vx	vy	ax	ay	v	a	heading	d_heading
0	9.0	1	0.000000	565.566162	88.795326	173.917542	0.855100	7.136193	2.0	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	10.0	1	0.000000	565.116699	88.801704	171.334290	0.873132	7.235233	2.0	1.0	...	0.018032	0.099039	0.216383	1.188473	NaN	NaN	1.208011	NaN	79.681298	NaN
2	11.0	1	0.000000	564.874573	90.596596	177.199951	0.890957	7.328989	2.0	1.0	...	0.017825	0.093756	0.213899	1.125077	-0.029812	-0.760753	1.145230	-0.753373	79.235449	-5.350188
3	12.0	1	0.000000	564.874268	90.928131	183.125732	0.907784	7.418187	2.0	1.0	...	0.016827	0.089198	0.201924	1.070371	-0.143699	-0.656466	1.089251	-0.671740	79.316807	0.976297
4	13.0	1	0.000000	569.931213	86.213280	180.774292	0.923439	7.505012	2.0	1.0	...	0.015655	0.086825	0.187865	1.041902	-0.168701	-0.341637	1.058703	-0.366576	79.778828	5.544252
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
320183	60159.0	3632	1830.709717	651.257446	150.202515	157.239746	14.840476	9.786501	2.0	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
320184	60160.0	3632	1834.013672	649.612122	153.686646	160.874023	15.033432	9.870472	2.0	1.0	...	0.192955	0.083971	2.315463	1.007656	NaN	NaN	2.525221	NaN	23.517970	NaN
320185	60161.0	3632	1845.373047	651.249756	147.178589	153.729248	15.211560	9.943236	2.0	1.0	...	0.178128	0.072764	2.137542	0.873173	-2.135059	-1.613797	2.309007	-2.594562	22.219713	-15.579091
320186	60162.0	3632	1857.388916	650.908203	136.407349	142.354614	15.377673	10.008965	2.0	1.0	...	0.166113	0.065728	1.993352	0.788742	-1.730279	-1.013172	2.143727	-1.983366	21.588019	-7.580324
320187	60163.0	3632	1862.792725	658.719971	141.984253	149.052307	15.538255	10.075935	2.0	1.0	...	0.160582	0.066971	1.926987	0.803649	-0.796376	0.178886	2.087853	-0.670484	22.638547	12.606340

320188 rows × 25 columns

In [ ]:

# we can backfill the v and a, so that our model can make estimations
# based on these assumed values
data['v'] = data.groupby(['track_id'])['v'].bfill()
data['a'] = data.groupby(['track_id'])['a'].bfill()

data['heading'] = data.groupby(['track_id'])['heading'].bfill()
data['d_heading'] = data.groupby(['track_id'])['d_heading'].bfill()
data

Out[ ]:

	frame_id	track_id	l	t	w	h	x	y	state	diff	...	dx	dy	vx	vy	ax	ay	v	a	heading	d_heading
0	9.0	1	0.000000	565.566162	88.795326	173.917542	0.855100	7.136193	2.0	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	1.208011	-0.753373	79.681298	-5.350188
1	10.0	1	0.000000	565.116699	88.801704	171.334290	0.873132	7.235233	2.0	1.0	...	0.018032	0.099039	0.216383	1.188473	NaN	NaN	1.208011	-0.753373	79.681298	-5.350188
2	11.0	1	0.000000	564.874573	90.596596	177.199951	0.890957	7.328989	2.0	1.0	...	0.017825	0.093756	0.213899	1.125077	-0.029812	-0.760753	1.145230	-0.753373	79.235449	-5.350188
3	12.0	1	0.000000	564.874268	90.928131	183.125732	0.907784	7.418187	2.0	1.0	...	0.016827	0.089198	0.201924	1.070371	-0.143699	-0.656466	1.089251	-0.671740	79.316807	0.976297
4	13.0	1	0.000000	569.931213	86.213280	180.774292	0.923439	7.505012	2.0	1.0	...	0.015655	0.086825	0.187865	1.041902	-0.168701	-0.341637	1.058703	-0.366576	79.778828	5.544252
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
320183	60159.0	3632	1830.709717	651.257446	150.202515	157.239746	14.840476	9.786501	2.0	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	2.525221	-2.594562	23.517970	-15.579091
320184	60160.0	3632	1834.013672	649.612122	153.686646	160.874023	15.033432	9.870472	2.0	1.0	...	0.192955	0.083971	2.315463	1.007656	NaN	NaN	2.525221	-2.594562	23.517970	-15.579091
320185	60161.0	3632	1845.373047	651.249756	147.178589	153.729248	15.211560	9.943236	2.0	1.0	...	0.178128	0.072764	2.137542	0.873173	-2.135059	-1.613797	2.309007	-2.594562	22.219713	-15.579091
320186	60162.0	3632	1857.388916	650.908203	136.407349	142.354614	15.377673	10.008965	2.0	1.0	...	0.166113	0.065728	1.993352	0.788742	-1.730279	-1.013172	2.143727	-1.983366	21.588019	-7.580324
320187	60163.0	3632	1862.792725	658.719971	141.984253	149.052307	15.538255	10.075935	2.0	1.0	...	0.160582	0.066971	1.926987	0.803649	-0.796376	0.178886	2.087853	-0.670484	22.638547	12.606340

320188 rows × 25 columns

In [ ]:

filtered_data = data.groupby(['track_id']).filter(lambda group: len(group) >= window+1) # a lenght of 3 is neccessary to have all relevant derivatives of position
filtered_data = filtered_data.set_index(['track_id', 'frame_id']) # use for quick access
print(filtered_data.shape[0], "items in filtered set, out of", data.shape[0], "in total set")

312423 items in filtered set, out of 320188 in total set

In [ ]:

track_ids = filtered_data.index.unique('track_id').to_numpy()
np.random.shuffle(track_ids)
test_offset_idx = int(len(track_ids) * .8)
training_ids, test_ids = track_ids[:test_offset_idx], track_ids[test_offset_idx:]
print(f"{len(training_ids)} training tracks, {len(test_ids)} test tracks")

1263 training tracks, 316 test tracks

here, draw out a sample track to see if it looks alright. unfortunately the imate isn't mapped properly.

In [ ]:

import random
if H:
    img_src = "../DATASETS/hof/webcam20231103-2.png"
    # dst = cv2.warpPerspective(img_src,H,(2500,1920))
    src_img = cv2.imread(img_src)
    print(src_img.shape)
    h1,w1 = src_img.shape[:2]
    corners = np.float32([[0,0], [w1, 0], [0, h1], [w1, h1]])

    print(corners)
    corners_projected = cv2.perspectiveTransform(corners.reshape((-1,4,2)), H)[0]
    print(corners_projected)
    [xmin, ymin] = np.int32(corners_projected.min(axis=0).ravel() - 0.5)
    [xmax, ymax] = np.int32(corners_projected.max(axis=0).ravel() + 0.5)
    print(xmin, xmax, ymin, ymax)

    dst = cv2.warpPerspective(src_img,H, (xmax, ymax))
    def plot_track(track_id: int):
        plt.gca().invert_yaxis()

        plt.imshow(dst, origin='lower', extent=[xmin/100-mean_x, xmax/100-mean_x, ymin/100-mean_y, ymax/100-mean_y])
        # plot scatter plot with x and y data 
        
        ax = plt.scatter(
            filtered_data.loc[track_id,:]['proj_x'],
            filtered_data.loc[track_id,:]['proj_y'],
            marker="*") 
        ax.axes.invert_yaxis()
        plt.plot(
            filtered_data.loc[track_id,:]['proj_x'],
            filtered_data.loc[track_id,:]['proj_y']
        )
else:
    def plot_track(track_id: int):
        ax = plt.scatter(
            filtered_data.loc[track_id,:]['x'],
            filtered_data.loc[track_id,:]['y'],
            marker="*") 
        plt.plot(
            filtered_data.loc[track_id,:]['proj_x'],
            filtered_data.loc[track_id,:]['proj_y']
        )

# print(filtered_data.loc[track_id,:]['proj_x'])
# _track_id = 2188
_track_id = random.choice(track_ids)
print(_track_id)
plot_track(_track_id)

for track_id in random.choices(track_ids, k=100):
    plot_track(track_id)
    
print(mean_x, mean_y)

1058
0 0

Now make the dataset:

In [ ]:

# a=filtered_data.loc[1]
# min(a.index.tolist())

In [ ]:

def create_dataset(data, track_ids, window):
    X, y, = [], []
    for track_id in tqdm(track_ids):
        df = data.loc[track_id]
        start_frame = min(df.index.tolist())
        for step in range(len(df)-window-1):
            i = int(start_frame) + step
            # print(step, int(start_frame), i)
            feature = df.loc[i:i+window][in_fields]
            # target = df.loc[i+1:i+window+1][out_fields]
            target = df.loc[i+window+1][out_fields]
            X.append(feature.values)
            y.append(target.values)
            
    return torch.tensor(np.array(X), dtype=torch.float), torch.tensor(np.array(y), dtype=torch.float)

X_train, y_train = create_dataset(filtered_data, training_ids, window)
X_test, y_test = create_dataset(filtered_data, test_ids, window)

  0%|          | 0/1263 [00:00<?, ?it/s]100%|██████████| 1263/1263 [01:42<00:00, 12.36it/s]
100%|██████████| 316/316 [00:26<00:00, 11.75it/s]

In [ ]:

X_train, y_train = X_train.to(device=device), y_train.to(device=device)
X_test, y_test = X_test.to(device=device), y_test.to(device=device)

In [ ]:

from torch.utils.data import TensorDataset, DataLoader
dataset_train = TensorDataset(X_train, y_train)
loader_train = DataLoader(dataset_train, shuffle=True, batch_size=8)

Model give output for all timesteps, this should improve training. But we use only the last timestep for the prediction process

In [ ]:

class LSTMModel(nn.Module):
    # input_size : number of features in input at each time step
    # hidden_size : Number of LSTM units 
    # num_layers : number of LSTM layers 
    def __init__(self, input_size, hidden_size, num_layers): 
        super(LSTMModel, self).__init__() #initializes the parent class nn.Module
        self.lin1 = nn.Linear(input_size, hidden_size//2)
        self.lstm = nn.LSTM(hidden_size//2, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)
        # self.activation_v = nn.LeakyReLU(.01)
        # self.activation_heading = torch.remainder()

    def forward(self, x): # defines forward pass of the neural network
        out = self.lin1(x)
        out, h0 = self.lstm(out)
        # extract only the last time step, see https://machinelearningmastery.com/lstm-for-time-series-prediction-in-pytorch/
        # print(out.shape)
        out = out[:, -1,:]
        # print(out.shape)
        out = self.linear(out)
        
        #  torch.remainder(out[1], 360)
        # print('o',out.shape)
        return out

model = LSTMModel(input_size, hidden_size, num_layers).to(device)

In [ ]:

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()

In [ ]:

def evaluate():
    # toggle evaluation mode
    model.eval()
    with torch.no_grad():
        y_pred = model(X_train.to(device=device))
        train_rmse = torch.sqrt(loss_fn(y_pred, y_train))
        y_pred = model(X_test.to(device=device))
        test_rmse = torch.sqrt(loss_fn(y_pred, y_test))
    print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse, test_rmse))

def load_most_recent():
    paths = list(cache_path.glob("checkpoint_*.pt"))
    if len(paths) < 1:
        print('Nothing found to load')
        return None, None
    paths.sort()

    print(f"Loading {paths[-1]}")
    return load_cache(path=paths[-1])

def load_cache(epoch=None, path=None):
    if path is None:
        if epoch is None:
            raise RuntimeError("Either path or epoch must be given")
        path = cache_path / f"checkpoint_{epoch:05d}.pt"
    else:
        print (path.stem)
        epoch = int(path.stem[-5:])

    cached = torch.load(path)
    
    optimizer.load_state_dict(cached['optimizer_state_dict'])
    model.load_state_dict(cached['model_state_dict'])
    return epoch, cached['loss']
    

def cache(epoch, loss):
    path = cache_path / f"checkpoint_{epoch:05d}.pt"
    print(f"Cache to {path}")
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, path)

In [ ]:

start_epoch, loss = load_most_recent()
if start_epoch is None:
    start_epoch = 0
else:
    print(f"starting from epoch {start_epoch} (loss: {loss})")

# Train Network
for epoch in tqdm(range(start_epoch+1,num_epochs+1)):
    # toggle train mode
    model.train()
    for batch_idx, (data, targets) in enumerate(loader_train):
        # Get data to cuda if possible
        data = data.to(device=device).squeeze(1)
        targets = targets.to(device=device)

        # forward
        scores = model(data)
        loss = loss_fn(scores, targets)

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent update step/adam step
        optimizer.step()

    if epoch % 5 != 0:
        continue

    cache(epoch, loss)
    evaluate()

evaluate()

Loading EXPERIMENTS/cache/hof2/checkpoint_00005.pt
checkpoint_00005
starting from epoch 5 (loss: nan)

  0%|          | 0/95 [00:00<?, ?it/s]  4%|▍         | 4/95 [07:37<2:53:27, 114.37s/it]

Cache to EXPERIMENTS/cache/hof2/checkpoint_00010.pt

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[31], line 31
     28         continue
     30     cache(epoch, loss)
---> 31     evaluate()
     33 evaluate()

Cell In[30], line 5, in evaluate()
      3 model.eval()
      4 with torch.no_grad():
----> 5     y_pred = model(X_train.to(device=device))
      6     train_rmse = torch.sqrt(loss_fn(y_pred, y_train))
      7     y_pred = model(X_test.to(device=device))

File ~/suspicion/trap/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[28], line 15, in LSTMModel.forward(self, x)
     13 def forward(self, x): # defines forward pass of the neural network
     14     out = self.lin1(x)
---> 15     out, h0 = self.lstm(out)
     16     # extract only the last time step, see https://machinelearningmastery.com/lstm-for-time-series-prediction-in-pytorch/
     17     # print(out.shape)
     18     out = out[:, -1,:]

File ~/suspicion/trap/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

File ~/suspicion/trap/.venv/lib/python3.10/site-packages/torch/nn/modules/rnn.py:769, in LSTM.forward(self, input, hx)
    767 self.check_forward_args(input, hx, batch_sizes)
    768 if batch_sizes is None:
--> 769     result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
    770                       self.dropout, self.training, self.bidirectional, self.batch_first)
    771 else:
    772     result = _VF.lstm(input, batch_sizes, hx, self._flat_weights, self.bias,
    773                       self.num_layers, self.dropout, self.training, self.bidirectional)

RuntimeError: CUDA out of memory. Tried to allocate 28.40 GiB (GPU 0; 23.59 GiB total capacity; 15.01 GiB already allocated; 7.20 GiB free; 15.03 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [1]:

model.eval()
with torch.no_grad():
    y_pred = model(X_train.to(device=device))
    print(y_pred.shape, y_train.shape)
y_train, y_pred

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[1], line 1
----> 1 model.eval()
      2 with torch.no_grad():
      3     y_pred = model(X_train.to(device=device))

NameError: name 'model' is not defined

In [ ]:

mean_x, mean_y

Out[ ]:

(0, 0)

In [ ]:

import scipy


def predict_and_plot(feature, steps = 50):
    lenght = feature.shape[0]
    # feature = filtered_data.loc[_track_id,:].iloc[:5][in_fields].values
    # nxt = filtered_data.loc[_track_id,:].iloc[5][out_fields]
    with torch.no_grad():
        for i in range(steps):
            # predict_f = scipy.ndimage.uniform_filter(feature)
            # predict_f = scipy.interpolate.splrep(feature[:][0], feature[:][1],)
            # predict_f = scipy.signal.spline_feature(feature, lmbda=.1)
            # bathc size of one, so feature as single item in array
            X = torch.tensor([feature], dtype=torch.float).to(device)
            # print(X.shape)
            s = model(X)[0].cpu()
            
            #  proj_x    proj_y         v     heading          a    d_heading
            # next_step = feature
            dt = 1/ FPS
            v = np.sqrt(s[0]**2 + s[1]**2)
            heading = (np.arctan2(s[1], s[0])  * 180 / np.pi) % 360
            a = (v - feature[-1][2]) / dt
            d_heading = (heading - feature[-1][5])
            # print(s)
            feature  = np.append(feature, [[feature[-1][0] + s[0]*dt, feature[-1][1] + s[1]*dt, v, heading, a, d_heading ]], axis=0)
            
            # print(next_step, nxt)
    plt.plot(feature[:lenght,0], feature[:lenght,1], c='orange')
    plt.plot(feature[lenght-1:,0], feature[lenght-1:,1], c='red')
    plt.scatter(feature[lenght:,0], feature[lenght:,1], c='red')

In [ ]:

# print(filtered_data.loc[track_id,:]['proj_x'])
_track_id =  8701 # random.choice(track_ids)
_track_id =  3880 # random.choice(track_ids)

# _track_id = 2780

for i in range(100):
    _track_id = random.choice(track_ids)
    plt.plot(
        filtered_data.loc[_track_id,:]['proj_x'],
        filtered_data.loc[_track_id,:]['proj_y'],
        c='grey', alpha=.2
    )

_track_id = random.choice(track_ids)
# _track_id = 801
print(_track_id)
ax = plt.scatter(
    filtered_data.loc[_track_id,:]['proj_x'],
    filtered_data.loc[_track_id,:]['proj_y'],
    marker="*") 
plt.plot(
    filtered_data.loc[_track_id,:]['proj_x'],
    filtered_data.loc[_track_id,:]['proj_y']
)

predict_and_plot(filtered_data.loc[_track_id,:].iloc[:5][in_fields].values)
predict_and_plot(filtered_data.loc[_track_id,:].iloc[:10][in_fields].values)
predict_and_plot(filtered_data.loc[_track_id,:].iloc[:50][in_fields].values)
# predict_and_plot(filtered_data.loc[_track_id,:].iloc[:70][in_fields].values)
# predict_and_plot(filtered_data.loc[_track_id,:].iloc[:115][in_fields].values)

In [ ]:

294 KiB Raw Blame History Unescape Escape

294 KiB

Raw Blame History