294 KiB
294 KiB
Goal of this notebook: implement some basic RNN/LSTM/GRU to forecast trajectories based on VIRAT and/or the custom hof dataset.
In [1]:
import numpy as np
import torch
import matplotlib.pyplot as plt # Visualization
import torch.nn as nn
import pandas_helper_calc # noqa # provides df.calc.derivative()
import pandas as pd
import cv2
import pathlib
from tqdm.autonotebook import tqdm
In [2]:
FPS = 12
# SRC_CSV = "EXPERIMENTS/hofext-maskrcnn/all.txt"
# SRC_CSV = "EXPERIMENTS/raw/generated/train/tracks.txt"
SRC_CSV = "EXPERIMENTS/raw/hof-meter-maskrcnn2/train/tracks.txt"
SRC_CSV = "EXPERIMENTS/20240426-hof-yolo/train/tracked.txt"
SRC_CSV = "EXPERIMENTS/raw/hof2/train/tracked.txt"
# SRC_H = "../DATASETS/hof/webcam20231103-2-homography.txt"
SRC_H = None
CACHE_DIR = "EXPERIMENTS/cache/hof2/"
SMOOTHING = True # hof-yolo is already smoothed, hof2 isn't
SMOOTHING_WINDOW=3 #2
In [3]:
in_fields = ['proj_x', 'proj_y', 'vx', 'vy', 'ax', 'ay']
# out_fields = ['v', 'heading']
# velocity cannot be negative, and heading is circular (modulo), this makes it harder to optimise than a linear space, so try to use components
# an we can use simple MSE loss (I guess?)
out_fields = ['vx', 'vy']
window = int(FPS*1.5)
In [4]:
# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
# Hyperparameters
input_size = len(in_fields)
hidden_size = 256
num_layers = 3
output_size = len(out_fields)
learning_rate = 0.005 #0.01 #0.005
batch_size = 256
num_epochs = 1000
In [5]:
cache_path = pathlib.Path(CACHE_DIR)
cache_path.mkdir(parents=True, exist_ok=True)
In [6]:
from pathlib import Path
from trap.tools import load_tracks_from_csv
data = load_tracks_from_csv(Path(SRC_CSV), FPS, 2, 5 )
In [ ]:
Out[ ]:
In [6]:
data = pd.read_csv(SRC_CSV, delimiter="\t", index_col=False, header=None)
# data.columns = ['frame_id', 'track_id', 'pos_x', 'pos_y', 'width', 'height']#, '_x', '_y,']
data.columns = ['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state']#, '_x', '_y,']
data['frame_id'] = pd.to_numeric(data['frame_id'], downcast='integer')
data['frame_id'] = data['frame_id'] // 10 # compatibility with Trajectron++
data.sort_values(by=['track_id', 'frame_id'],inplace=True)
data.set_index(['track_id', 'frame_id'])
Out[6]:
In [7]:
# cm to meter
data['x'] = data['x']/100
data['y'] = data['y']/100
In [8]:
data['diff'] = data.groupby(['track_id'])['frame_id'].diff() #.fillna(0)
data['diff'] = pd.to_numeric(data['diff'], downcast='integer')
In [9]:
missing=0
old_size=len(data)
# slow way to append missing steps to the dataset
for ind, row in tqdm(data.iterrows()):
if row['diff'] > 1:
for s in range(1, int(row['diff'])):
# add as many entries as missing
missing += 1
data.loc[len(data)] = [row['frame_id']-s, row['track_id'], np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 1, 1]
# new_frame = [data.loc[ind-1]['frame_id']+s, row['track_id'], np.nan, np.nan, np.nan, np.nan, np.nan]
# data.loc[len(data)] = new_frame
print('was:', old_size, 'added:', missing, 'new length:', len(data))
# now sort, so that the added data is in the right place
data.sort_values(by=['track_id', 'frame_id'], inplace=True)
In [10]:
# interpolate missing data
df=data.copy()
df = df.groupby('track_id').apply(lambda group: group.interpolate(method='linear'))
df.reset_index(drop=True, inplace=True)
data = df
In [ ]:
from trap.tracker import Smoother
if SMOOTHING:
df=data.copy()
if 'x_raw' not in df:
df['x_raw'] = df['x']
if 'y_raw' not in df:
df['y_raw'] = df['y']
print("Running smoother")
# print(df)
# from tsmoothie.smoother import KalmanSmoother, ConvolutionSmoother
smoother = Smoother(convolution=False)
def smoothing(data):
# smoother = ConvolutionSmoother(window_len=SMOOTHING_WINDOW, window_type='ones', copy=None)
return smoother.smooth(data).tolist()
# df=df.assign(smooth_data=smoother.smooth_data[0])
# return smoother.smooth_data[0].tolist()
# operate smoothing per axis
df['x'] = df.groupby('track_id')['x_raw'].transform(smoothing)
df['y'] = df.groupby('track_id')['y_raw'].transform(smoothing)
data = df
In [ ]:
# del data['diff']
# recalculate diff
data['diff'] = data.groupby(['track_id'])['frame_id'].diff()
data
Out[ ]:
In [ ]:
# data['node_type'] = 'PEDESTRIAN' # compatibility with Trajectron++
# data['node_id'] = data['track_id'].astype(str)
data['track_id'] = pd.to_numeric(data['track_id'], downcast='integer')
data['dt'] = data['diff'] * (1/FPS)
data
Out[ ]:
In [ ]:
# position into an average coordinate system. (DO THESE NEED TO BE STORED?)
# Don't do this, messes up
# data['pos_x'] = data['pos_x'] - data['pos_x'].mean()
# data['pos_y'] = data['pos_y'] - data['pos_y'].mean()
# data
In [ ]:
data['diff'].hist()
Out[ ]:
The dataset is a bit crappy because it has different frame step: ranging from predominantly 1 and 2 to sometimes have 3 and 4 as well. This inevitabily leads to difference in speed caluclations
In [ ]:
if SRC_H is not None:
H = np.loadtxt(SRC_H, delimiter=',')
else:
H = None
In [ ]:
if H is not None:
print("Projecting data")
data['foot_x'] = data['pos_x'] + 0.5 * data['width']
data['foot_y'] = data['pos_y'] + 0.5 * data['height']
transformed = cv2.perspectiveTransform(np.array([data[['foot_x','foot_y']].to_numpy()]),H)[0]
data['proj_x'], data['proj_y'] = transformed[:,0], transformed[:,1]
data['proj_x'] = data['proj_x'].div(100) # cm to m
data['proj_y'] = data['proj_y'].div(100) # cm to m
# and shift to mean (THES NEED TO BE STORED AND REUSED IN LIVE SETTING)
mean_x = data['proj_x'].mean()
mean_y = data['proj_y'].mean()
data['proj_x'] = data['proj_x'] - data['proj_x'].mean()
data['proj_y'] = data['proj_y'] - data['proj_y'].mean()
else:
print("No H given, probably already projected data?")
mean_x = 0
mean_y = 0
data['proj_x'] = data['x']
data['proj_y'] = data['y']
data
Out[ ]:
In [ ]:
print("Deriving displacement, velocity and accelation from x and y")
data['dx'] = data.groupby(['track_id'])['proj_x'].diff()
data['dy'] = data.groupby(['track_id'])['proj_y'].diff()
data['vx'] = data['dx'].div(data['dt'], axis=0)
data['vy'] = data['dy'].div(data['dt'], axis=0)
data['ax'] = data.groupby(['track_id'])['vx'].diff().div(data['dt'], axis=0)
data['ay'] = data.groupby(['track_id'])['vy'].diff().div(data['dt'], axis=0)
data
Out[ ]:
In [ ]:
# then we need the velocity itself
data['v'] = np.sqrt(data['vx'].pow(2) + data['vy'].pow(2))
# and derive acceleration
data['a'] = data.groupby(['track_id'])['v'].diff().div(data['dt'], axis=0)
# we can calculate heading based on the velocity components
data['heading'] = (np.arctan2(data['vy'], data['vx']) * 180 / np.pi) % 360
# and derive it to get the rate of change of the heading
data['d_heading'] = data.groupby(['track_id'])['heading'].diff().div(data['dt'], axis=0)
data
Out[ ]:
In [ ]:
# we can backfill the v and a, so that our model can make estimations
# based on these assumed values
data['v'] = data.groupby(['track_id'])['v'].bfill()
data['a'] = data.groupby(['track_id'])['a'].bfill()
data['heading'] = data.groupby(['track_id'])['heading'].bfill()
data['d_heading'] = data.groupby(['track_id'])['d_heading'].bfill()
data
Out[ ]:
In [ ]:
filtered_data = data.groupby(['track_id']).filter(lambda group: len(group) >= window+1) # a lenght of 3 is neccessary to have all relevant derivatives of position
filtered_data = filtered_data.set_index(['track_id', 'frame_id']) # use for quick access
print(filtered_data.shape[0], "items in filtered set, out of", data.shape[0], "in total set")
In [ ]:
track_ids = filtered_data.index.unique('track_id').to_numpy()
np.random.shuffle(track_ids)
test_offset_idx = int(len(track_ids) * .8)
training_ids, test_ids = track_ids[:test_offset_idx], track_ids[test_offset_idx:]
print(f"{len(training_ids)} training tracks, {len(test_ids)} test tracks")
here, draw out a sample track to see if it looks alright. unfortunately the imate isn't mapped properly.
In [ ]:
import random
if H:
img_src = "../DATASETS/hof/webcam20231103-2.png"
# dst = cv2.warpPerspective(img_src,H,(2500,1920))
src_img = cv2.imread(img_src)
print(src_img.shape)
h1,w1 = src_img.shape[:2]
corners = np.float32([[0,0], [w1, 0], [0, h1], [w1, h1]])
print(corners)
corners_projected = cv2.perspectiveTransform(corners.reshape((-1,4,2)), H)[0]
print(corners_projected)
[xmin, ymin] = np.int32(corners_projected.min(axis=0).ravel() - 0.5)
[xmax, ymax] = np.int32(corners_projected.max(axis=0).ravel() + 0.5)
print(xmin, xmax, ymin, ymax)
dst = cv2.warpPerspective(src_img,H, (xmax, ymax))
def plot_track(track_id: int):
plt.gca().invert_yaxis()
plt.imshow(dst, origin='lower', extent=[xmin/100-mean_x, xmax/100-mean_x, ymin/100-mean_y, ymax/100-mean_y])
# plot scatter plot with x and y data
ax = plt.scatter(
filtered_data.loc[track_id,:]['proj_x'],
filtered_data.loc[track_id,:]['proj_y'],
marker="*")
ax.axes.invert_yaxis()
plt.plot(
filtered_data.loc[track_id,:]['proj_x'],
filtered_data.loc[track_id,:]['proj_y']
)
else:
def plot_track(track_id: int):
ax = plt.scatter(
filtered_data.loc[track_id,:]['x'],
filtered_data.loc[track_id,:]['y'],
marker="*")
plt.plot(
filtered_data.loc[track_id,:]['proj_x'],
filtered_data.loc[track_id,:]['proj_y']
)
# print(filtered_data.loc[track_id,:]['proj_x'])
# _track_id = 2188
_track_id = random.choice(track_ids)
print(_track_id)
plot_track(_track_id)
for track_id in random.choices(track_ids, k=100):
plot_track(track_id)
print(mean_x, mean_y)
Now make the dataset:
In [ ]:
# a=filtered_data.loc[1]
# min(a.index.tolist())
In [ ]:
def create_dataset(data, track_ids, window):
X, y, = [], []
for track_id in tqdm(track_ids):
df = data.loc[track_id]
start_frame = min(df.index.tolist())
for step in range(len(df)-window-1):
i = int(start_frame) + step
# print(step, int(start_frame), i)
feature = df.loc[i:i+window][in_fields]
# target = df.loc[i+1:i+window+1][out_fields]
target = df.loc[i+window+1][out_fields]
X.append(feature.values)
y.append(target.values)
return torch.tensor(np.array(X), dtype=torch.float), torch.tensor(np.array(y), dtype=torch.float)
X_train, y_train = create_dataset(filtered_data, training_ids, window)
X_test, y_test = create_dataset(filtered_data, test_ids, window)
In [ ]:
X_train, y_train = X_train.to(device=device), y_train.to(device=device)
X_test, y_test = X_test.to(device=device), y_test.to(device=device)
In [ ]:
from torch.utils.data import TensorDataset, DataLoader
dataset_train = TensorDataset(X_train, y_train)
loader_train = DataLoader(dataset_train, shuffle=True, batch_size=8)
Model give output for all timesteps, this should improve training. But we use only the last timestep for the prediction process
In [ ]:
class LSTMModel(nn.Module):
# input_size : number of features in input at each time step
# hidden_size : Number of LSTM units
# num_layers : number of LSTM layers
def __init__(self, input_size, hidden_size, num_layers):
super(LSTMModel, self).__init__() #initializes the parent class nn.Module
self.lin1 = nn.Linear(input_size, hidden_size//2)
self.lstm = nn.LSTM(hidden_size//2, hidden_size, num_layers, batch_first=True)
self.linear = nn.Linear(hidden_size, output_size)
# self.activation_v = nn.LeakyReLU(.01)
# self.activation_heading = torch.remainder()
def forward(self, x): # defines forward pass of the neural network
out = self.lin1(x)
out, h0 = self.lstm(out)
# extract only the last time step, see https://machinelearningmastery.com/lstm-for-time-series-prediction-in-pytorch/
# print(out.shape)
out = out[:, -1,:]
# print(out.shape)
out = self.linear(out)
# torch.remainder(out[1], 360)
# print('o',out.shape)
return out
model = LSTMModel(input_size, hidden_size, num_layers).to(device)
In [ ]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
In [ ]:
def evaluate():
# toggle evaluation mode
model.eval()
with torch.no_grad():
y_pred = model(X_train.to(device=device))
train_rmse = torch.sqrt(loss_fn(y_pred, y_train))
y_pred = model(X_test.to(device=device))
test_rmse = torch.sqrt(loss_fn(y_pred, y_test))
print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse, test_rmse))
def load_most_recent():
paths = list(cache_path.glob("checkpoint_*.pt"))
if len(paths) < 1:
print('Nothing found to load')
return None, None
paths.sort()
print(f"Loading {paths[-1]}")
return load_cache(path=paths[-1])
def load_cache(epoch=None, path=None):
if path is None:
if epoch is None:
raise RuntimeError("Either path or epoch must be given")
path = cache_path / f"checkpoint_{epoch:05d}.pt"
else:
print (path.stem)
epoch = int(path.stem[-5:])
cached = torch.load(path)
optimizer.load_state_dict(cached['optimizer_state_dict'])
model.load_state_dict(cached['model_state_dict'])
return epoch, cached['loss']
def cache(epoch, loss):
path = cache_path / f"checkpoint_{epoch:05d}.pt"
print(f"Cache to {path}")
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss,
}, path)
In [ ]:
start_epoch, loss = load_most_recent()
if start_epoch is None:
start_epoch = 0
else:
print(f"starting from epoch {start_epoch} (loss: {loss})")
# Train Network
for epoch in tqdm(range(start_epoch+1,num_epochs+1)):
# toggle train mode
model.train()
for batch_idx, (data, targets) in enumerate(loader_train):
# Get data to cuda if possible
data = data.to(device=device).squeeze(1)
targets = targets.to(device=device)
# forward
scores = model(data)
loss = loss_fn(scores, targets)
# backward
optimizer.zero_grad()
loss.backward()
# gradient descent update step/adam step
optimizer.step()
if epoch % 5 != 0:
continue
cache(epoch, loss)
evaluate()
evaluate()
In [1]:
model.eval()
with torch.no_grad():
y_pred = model(X_train.to(device=device))
print(y_pred.shape, y_train.shape)
y_train, y_pred
In [ ]:
mean_x, mean_y
Out[ ]:
In [ ]:
import scipy
def predict_and_plot(feature, steps = 50):
lenght = feature.shape[0]
# feature = filtered_data.loc[_track_id,:].iloc[:5][in_fields].values
# nxt = filtered_data.loc[_track_id,:].iloc[5][out_fields]
with torch.no_grad():
for i in range(steps):
# predict_f = scipy.ndimage.uniform_filter(feature)
# predict_f = scipy.interpolate.splrep(feature[:][0], feature[:][1],)
# predict_f = scipy.signal.spline_feature(feature, lmbda=.1)
# bathc size of one, so feature as single item in array
X = torch.tensor([feature], dtype=torch.float).to(device)
# print(X.shape)
s = model(X)[0].cpu()
# proj_x proj_y v heading a d_heading
# next_step = feature
dt = 1/ FPS
v = np.sqrt(s[0]**2 + s[1]**2)
heading = (np.arctan2(s[1], s[0]) * 180 / np.pi) % 360
a = (v - feature[-1][2]) / dt
d_heading = (heading - feature[-1][5])
# print(s)
feature = np.append(feature, [[feature[-1][0] + s[0]*dt, feature[-1][1] + s[1]*dt, v, heading, a, d_heading ]], axis=0)
# print(next_step, nxt)
plt.plot(feature[:lenght,0], feature[:lenght,1], c='orange')
plt.plot(feature[lenght-1:,0], feature[lenght-1:,1], c='red')
plt.scatter(feature[lenght:,0], feature[lenght:,1], c='red')
In [ ]:
# print(filtered_data.loc[track_id,:]['proj_x'])
_track_id = 8701 # random.choice(track_ids)
_track_id = 3880 # random.choice(track_ids)
# _track_id = 2780
for i in range(100):
_track_id = random.choice(track_ids)
plt.plot(
filtered_data.loc[_track_id,:]['proj_x'],
filtered_data.loc[_track_id,:]['proj_y'],
c='grey', alpha=.2
)
_track_id = random.choice(track_ids)
# _track_id = 801
print(_track_id)
ax = plt.scatter(
filtered_data.loc[_track_id,:]['proj_x'],
filtered_data.loc[_track_id,:]['proj_y'],
marker="*")
plt.plot(
filtered_data.loc[_track_id,:]['proj_x'],
filtered_data.loc[_track_id,:]['proj_y']
)
predict_and_plot(filtered_data.loc[_track_id,:].iloc[:5][in_fields].values)
predict_and_plot(filtered_data.loc[_track_id,:].iloc[:10][in_fields].values)
predict_and_plot(filtered_data.loc[_track_id,:].iloc[:50][in_fields].values)
# predict_and_plot(filtered_data.loc[_track_id,:].iloc[:70][in_fields].values)
# predict_and_plot(filtered_data.loc[_track_id,:].iloc[:115][in_fields].values)
In [ ]: