trap/trap/tools.py
2025-01-02 16:24:00 +01:00

697 lines
26 KiB
Python

from __future__ import annotations
from argparse import Namespace
from dataclasses import dataclass
import json
import math
from pathlib import Path
import pickle
from tempfile import mktemp
import jsonlines
import numpy as np
import pandas as pd
import shapely
from shapely.ops import split
import trap.tracker
from trap.config import parser
from trap.frame_emitter import Camera, Detection, DetectionState, video_src_from_config, Frame
from trap.tracker import DETECTOR_YOLOv8, FinalDisplacementFilter, Smoother, TrackReader, _yolov8_track, Track, TrainingDataWriter, Tracker, read_tracks_json
from collections import defaultdict
import logging
import cv2
from typing import Callable, List, Iterable, Optional
from ultralytics import YOLO
from ultralytics.engine.results import Results as YOLOResult
import tqdm
from trap.utils import inv_lerp, lerp
logger = logging.getLogger('tools')
class FrameGenerator():
def __init__(self, config):
self.video_srcs = video_src_from_config(config)
self.config = config
if not hasattr(config, "H"):
raise RuntimeError("Set homography file with --homography param")
# store current position
self.video_path = None
self.video_nr = None
self.frame_count = None
self.frame_idx = None
self.n = 0
def __iter__(self):
for video_nr, video_path in enumerate(self.video_srcs):
self.video_path = video_path
self.video_nr = video_nr
logger.info(f"Play from '{str(video_path)}'")
video = cv2.VideoCapture(str(video_path))
fps = video.get(cv2.CAP_PROP_FPS)
self.frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT)
if self.frame_count < 0:
self.frame_count = math.inf
self.frame_idx = 0
if self.config.video_offset:
logger.info(f"Start at frame {self.config.video_offset}")
video.set(cv2.CAP_PROP_POS_FRAMES, self.config.video_offset)
self.frame_idx = self.config.video_offset
while True:
ret, img = video.read()
self.frame_idx+=1
self.n+=1
# seek to 0 if video has finished. Infinite loop
if not ret:
# now loading multiple files
break
frame = Frame(index=self.n, img=img, H=self.config.H, camera=self.config.camera)
yield frame
def marquee_string(string: str, window: int, i: int):
if window > len(string):
return string
# too_much = len(string) - window
# offset = i % too_much
# return string[offset:offset+window]
too_much = len(string) - window
offset = i % (too_much*2)
if offset > too_much:
offset = too_much - (offset-too_much)
return string[offset:offset+window]
def tracker_preprocess():
config = parser.parse_args()
tracker = Tracker(config)
# model = YOLO('EXPERIMENTS/yolov8x.pt')
with TrainingDataWriter(config.save_for_training) as writer:
bar = tqdm.tqdm()
tracks = defaultdict(lambda: Track())
total = 0
frames = FrameGenerator(config)
total_tracks = set()
for frame in frames:
bar.update()
detections = tracker.track_frame(frame)
total += len(detections)
# detections = _yolov8_track(frame, model, imgsz=1440, classes=[0])
for detection in detections:
track = tracks[detection.track_id]
track.track_id = detection.track_id # for new tracks
track.history.append(detection) # add to history
active_track_ids = [d.track_id for d in detections]
active_tracks = {t.track_id: t for t in tracks.values() if t.track_id in active_track_ids}
total_tracks.update(active_track_ids)
bar.set_description(f"{frames.video_nr}/{len(frames.video_srcs)} [{frames.frame_idx}/{frames.frame_count}] {marquee_string(str(frames.video_path), 10, frames.n//2)} | dets {len(detections)}: {[d.track_id for d in detections]} (∑{total}{len(total_tracks)})")
writer.add(frame, active_tracks.values())
logger.info("Done!")
bgr_colors = [
(255, 0, 0),
(0, 255, 0),
# (0, 0, 255),# red used for missing waypoints
(0, 255, 255),
]
def detection_color(detection: Detection, i, prev_detection: Optional[Detection] = None):
vague = detection.state == DetectionState.Lost or (prev_detection and detection.frame_nr - prev_detection.frame_nr > 1)
return bgr_colors[i % len(bgr_colors)] if not vague else (0,0,255)
def to_point(coord):
return (int(coord[0]), int(coord[1]))
def tracker_compare():
config = parser.parse_args()
trackers: List[Tracker] = []
# TODO, support all tracker.DETECTORS
for tracker_id in [
trap.tracker.DETECTOR_YOLOv8,
# trap.tracker.DETECTOR_MASKRCNN,
# trap.tracker.DETECTOR_RETINANET,
trap.tracker.DETECTOR_FASTERRCNN,
]:
tracker_config = Namespace(**vars(config))
tracker_config.detector = tracker_id
trackers.append(Tracker(tracker_config))
frames = FrameGenerator(config)
bar = tqdm.tqdm(frames)
cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN)
cv2.setWindowProperty("frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)
for frame in bar:
# frame.img = cv2.undistort(frame.img, config.camera.mtx, config.camera.dist, None, config.camera.newcameramtx) # try to undistort for better detections, seems not to matter at all
trackers_detections = [(t, t.track_frame(frame)) for t in trackers]
for i, tracker in enumerate(trackers):
cv2.putText(frame.img, tracker.config.detector, (10,30*(i+1)), cv2.FONT_HERSHEY_DUPLEX, 1, color=bgr_colors[i % len(bgr_colors)])
for i, (tracker, detections) in enumerate(trackers_detections):
for track_id in tracker.tracks:
draw_track(frame.img, tracker.tracks[track_id], i)
for detection in detections:
color = color = detection_color(detection, i)
l, t, r, b = detection.to_ltrb()
cv2.rectangle(frame.img, (l, t), (r,b), color)
cv2.putText(frame.img, f"{detection.track_id}", (l, b+10), cv2.FONT_HERSHEY_DUPLEX, 1, color=color)
conf = f"{detection.conf:.3f}" if detection.conf is not None else "None"
cv2.putText(frame.img, f"{detection.det_class} - {conf}", (l, t), cv2.FONT_HERSHEY_DUPLEX, .7, color=color)
cv2.imshow('frame',cv2.resize(frame.img, (1920, 1080)))
cv2.waitKey(1)
bar.set_description(f"[{frames.video_nr}/{len(frames.video_srcs)}] [{frames.frame_idx}/{frames.frame_count}] {str(frames.video_path)}")
def transition_path_points(path: np.array, t: float):
"""
"""
if t >= 1:
return path
if t <= 0:
return np.array([path[0]])
# new_path = np.array([])
lengths = np.sqrt(np.sum(np.diff(path, axis=0)**2, axis=1))
cum_lenghts = np.cumsum(lengths)
# distance = cum_lenghts[-1] * t
# ts = np.concatenate((np.array([0.]), cum_lenghts / cum_lenghts[-1]))
# print(cum_lenghts[-1])
DRAW_SPEED = 35 # fixed speed (independent of lenght) TODO)) make variable
ts = np.concatenate((np.array([0.]), cum_lenghts / DRAW_SPEED))
new_path = [path[0]]
for a, b, t_a, t_b in zip(path[:-1], path[1:], ts[:-1], ts[1:]):
if t_b < t:
new_path.append(b)
continue
# interpolate
relative_t = inv_lerp(t_a, t_b, t)
x = lerp(a[0], b[0], relative_t)
y = lerp(a[1], b[1], relative_t)
new_path.append([x,y])
break
return np.array(new_path)
from shapely.geometry import LineString
from shapely.geometry import Point
from sklearn.cluster import AgglomerativeClustering
@dataclass
class PointCluster:
point: np.ndarray
start: np.ndarray
source_points: List[np.ndarray]
probability: float
next_point_clusters: List[PointCluster]
def cluster_predictions_by_radius(start_point, lines: Iterable[np.ndarray] | LineString, radius = .5, p_factor = 1.) -> List[PointCluster]:
# start = lines[0][0]
p0 = Point(*start_point)
# print(lines[0][0], start_point)
circle = p0.buffer(radius).boundary
# print(lines)
# print([line.tolist() for line in lines])
intersections = []
remaining_lines = []
for line in lines:
linestring = line if type(line) is LineString else LineString(line.tolist())
intersection = circle.intersection(linestring)
if type(intersection) is LineString and intersection.is_empty:
# No intersection with circle, a dangling endpoint that we can skip
continue
if type(intersection) is not Point:
# with multiple intersections: use only the first one
intersection = intersection.geoms[0]
# set a buffer around the intersection to assure a match is fond oun the line
split_line = split(linestring, intersection.buffer(.01))
remaining_line = split_line.geoms[2] if len(split_line.geoms) > 2 else None
# print(intersection, split_line)
intersections.append(intersection)
remaining_lines.append(remaining_line)
if len(intersections) < 1:
return []
# linestrings = [LineString(line.tolist()) for line in lines]
# intersections = [circle.intersection(line) for line in linestrings]
# dangling_lines = [(type(i) is LineString and i.is_empty) for i in intersections]
# intersections = [False if is_end else (p if type(p) is Point else p.geoms[0]) for p, is_end in zip(intersections, dangling_lines)]
# as all intersections are on the same circle we can guestimate angle by
# estimating distance, as circumfence is 2*pi*r, thus distance ~ proportional with radius.
if len(intersections) > 1:
clustering = AgglomerativeClustering(None, linkage="ward", distance_threshold=2*math.pi * radius / 6)
coords = np.asarray([i.coords for i in intersections]).reshape((-1,2))
assigned_clusters = clustering.fit_predict(coords)
else:
assigned_clusters = [0] # only one item
clusters = defaultdict(lambda: [])
cluster_remainders = defaultdict(lambda: [])
for point, line, c in zip(intersections, remaining_lines, assigned_clusters):
clusters[c].append(point)
cluster_remainders[c].append(line)
line_clusters = []
for c, points in clusters.items():
mean = np.mean(points, axis=0)
prob = p_factor * len(points) / len(assigned_clusters)
remaining_lines = cluster_remainders[c]
remaining_lines = list(filter(None, remaining_lines))
next_points = cluster_predictions_by_radius(mean, remaining_lines, radius, prob)
line_clusters.append(PointCluster(mean, start_point, points, prob, next_points))
# split_lines = [shapely.ops.split(line, point) for line, point in zip(linestrings, intersections)]
# remaining_lines = [l[1] for l in split_lines if len(l) > 1]
# print(line_clusters)
return line_clusters
# def cosine_similarity(point1, point2):
# dot_product = np.dot(point1, point2)
# norm1 = np.linalg.norm(point1)
# norm2 = np.linalg.norm(point2)
# return dot_product / (norm1 * norm2)
# p = Point(5,5)
# c = p.buffer(3).boundary
# l = LineString([(0,0), (10, 10)])
# i = c.intersection(l)
def draw_track_predictions(img: cv2.Mat, track: Track, color_index: int, camera:Camera, convert_points: Optional[Callable], anim_position=.8, as_clusters=False):
"""
anim_position: 0-1
"""
if not track.predictions:
return
current_point = track.get_projected_history(camera=camera)[-1]
opacity = 1-min(1, max(0, inv_lerp(0.8, 1, anim_position))) # fade out
slide_t = min(1, max(0, inv_lerp(0, 0.8, anim_position))) # slide_position
# if convert_points:
# current_point = convert_points([current_point])[0]
color = bgr_colors[color_index % len(bgr_colors)]
color = tuple([int(c*opacity) for c in color])
lines = []
for pred_i, pred in enumerate(track.predictions):
pred_coords = pred #cv2.perspectiveTransform(np.array([pred]), inv_H)[0].tolist()
# line_points = pred_coords
line_points = np.concatenate(([current_point], pred_coords)) # 'current point' is amoving target
# print(pred_coords, current_point, line_points)
line_points = transition_path_points(line_points, slide_t)
lines.append(line_points)
if as_clusters:
clusters = cluster_predictions_by_radius(current_point, lines, 1.5)
def draw_cluster(img, cluster: PointCluster):
points = convert_points([cluster.start, cluster.point])
# cv2 only draws to integer coordinates
points = np.rint(points).astype(int)
thickness = max(1, int(cluster.probability * 6))
if len(cluster.next_point_clusters) == 1:
# not a final point, nor a split:
cv2.line(img, points[0], points[1], color, thickness, lineType=cv2.LINE_AA)
else:
cv2.arrowedLine(img, points[0], points[1], color, thickness, cv2.LINE_AA)
for sub in cluster.next_point_clusters:
draw_cluster(img, sub)
# pass
# # cv2.circle(img, end, 2, color, 1, lineType=cv2.LINE_AA)
# print(clusters)
for cluster in clusters:
draw_cluster(img, cluster)
else:
# convert function (e.g. to project points to img space)
if convert_points:
lines = [convert_points(points) for points in lines]
# cv2 only draws to integer coordinates
lines = [np.rint(points).astype(int) for points in lines]
# draw in a single pass
line_points = line_points.reshape((1, -1,1,2))
cv2.polylines(img, lines, False, color, 2, cv2.LINE_AA)
def draw_trackjectron_history(img: cv2.Mat, track: Track, color_index: int, convert_points: Optional[Callable]):
if not track.predictor_history:
return
coords = track.predictor_history #cv2.perspectiveTransform(np.array([track.predictor_history]), inv_H)[0].tolist()
if convert_points:
coords = convert_points(coords)
# color = (128,0,128) if pred_i else (128,128,0)
color = tuple(b/2 for b in bgr_colors[color_index % len(bgr_colors)])
for ci in range(0, len(coords)):
if ci == 0:
# TODO)) prev point
continue
# start = [int(p) for p in coords[-1]]
# start = [0,0]?
# print(start)
else:
start = [int(p) for p in coords[ci-1]]
end = [int(p) for p in coords[ci]]
cv2.line(img, start, end, color, 1, lineType=cv2.LINE_AA)
cv2.circle(img, end, 4, color, 1, lineType=cv2.LINE_AA)
def draw_track_projected(img: cv2.Mat, track: Track, color_index: int, camera: Camera, convert_points: Optional[Callable]):
history = track.get_projected_history(camera=camera)
if convert_points:
history = convert_points(history)
cv2.putText(img, f"{track.track_id} ({len(history)})", to_point(history[0]), cv2.FONT_HERSHEY_DUPLEX, 1, color=bgr_colors[color_index % len(bgr_colors)])
point_color = bgr_colors[color_index % len(bgr_colors)]
cv2.circle(img, to_point(history[0]), 3, point_color, 2)
points = np.rint(history.reshape((-1,1,2))).astype(np.int32)
cv2.polylines(img, [points], False, point_color, 1)
for j in range(len(history)-1):
# a = history[j]
b = history[j+1]
# cv2.line(img, to_point(a), to_point(b), point_color, 1)
cv2.circle(img, to_point(b), 3, point_color, 2)
def draw_track(img: cv2.Mat, track: Track, color_index: int):
history = track.history
cv2.putText(img, f"{track.track_id} ({len(history)})", to_point(history[0].get_foot_coords()), cv2.FONT_HERSHEY_DUPLEX, 1, color=bgr_colors[color_index % len(bgr_colors)])
point_color = detection_color(history[0], color_index)
cv2.circle(img, to_point(history[0].get_foot_coords()), 3, point_color, 2)
for j in range(len(history)-1):
a = history[j]
b = history[j+1]
# TODO)) replace with Track.get_with_interpolated_history()
# gap = b.frame_nr - a.frame_nr - 1
# if gap < 0:
# print(f"WARNING, gap between frames {a.frame_nr} -> {b.frame_nr} is negative?")
# if gap > 0:
# for g in range(gap):
# p1 = a.get_foot_coords()
# p2 = b.get_foot_coords()
# point = (lerp(p1[0], p2[0], g/gap), lerp(p1[1], p2[1], g/gap))
# cv2.circle(img, to_point(point), 3, (0,0,255), 1)
color = detection_color(b, color_index, a)
cv2.line(img, to_point(a.get_foot_coords()), to_point(b.get_foot_coords()), color, 1)
point_color = detection_color(b, color_index)
cv2.circle(img, to_point(b.get_foot_coords()), 3, point_color, 2)
def blacklist_tracks():
config = parser.parse_args()
cv2.namedWindow("frame", cv2.WND_PROP_FULLSCREEN)
cv2.setWindowProperty("frame",cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)
backdrop = cv2.imread('../DATASETS/hof3/output.png')
blacklist = []
path: Path = config.save_for_training
reader = TrackReader(path, config.camera.fps, exclude_whitelisted = True)
tracks = [t for t in reader]
filter = FinalDisplacementFilter(2.0)
tracks = filter.apply(tracks, config.camera)
# blacklist_file = path / "blacklist.jsonl"
# whitelist_file = path / "whitelist.jsonl" # for skipping
# tracks_file = path / "tracks.json"
# FPS = 12 # TODO)) From config
# if whitelist_file.exists():
# # with whitelist_file.open('r') as fp:
# with jsonlines.open(whitelist_file, 'r') as reader:
# whitelist = [l for l in reader.iter(type=str)]
# else:
# whitelist = []
smoother = Smoother()
try:
for track in tqdm.tqdm(tracks):
if len(track.history) < 5:
continue
img = backdrop.copy()
draw_track(img, track.get_with_interpolated_history(), 0)
draw_track(img, smoother.smooth_track(track.get_with_interpolated_history()).get_sampled(5), 1)
imgS = cv2.resize(img, (1920, 1080))
cv2.imshow('frame', imgS)
while True:
k = cv2.waitKey(0)
if k==27: # Esc key to stop
raise StopIteration
elif k == ord('s'):
break # skip for now
elif k == ord('y'):
print('whitelist', track.track_id)
with jsonlines.open(reader.whitelist_file, mode='a') as writer:
# skip next time around
writer.write(track.track_id)
break
elif k == ord('n'):
print('blacklist', track.track_id)
# logger.info(f"Append {len(track)} items to {str(reader.blacklist_file)}")
with jsonlines.open(reader.blacklist_file, mode='a') as writer:
writer.write(track.track_id)
break
else:
# ignore all other keypresses
print(k) # else print its value
continue
except StopIteration as e:
pass
def rewrite_raw_track_files():
logging.basicConfig(level=logging.DEBUG)
config = parser.parse_args()
trap.tracker.rewrite_raw_track_files(config.save_for_training)
def interpolate_missing_frames(data: pd.DataFrame):
missing=0
old_size=len(data)
# slow way to append missing steps to the dataset
for ind, row in tqdm.tqdm(data.iterrows()):
if row['diff'] > 1:
for s in range(1, int(row['diff'])):
# add as many entries as missing
missing += 1
data.loc[len(data)] = [row['frame_id']-s, row['track_id'], np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 1, 1]
# new_frame = [data.loc[ind-1]['frame_id']+s, row['track_id'], np.nan, np.nan, np.nan, np.nan, np.nan]
# data.loc[len(data)] = new_frame
logger.info(f'was:{old_size} added:{missing}, new length: {len(data)}')
# now sort, so that the added data is in the right place
data.sort_values(by=['track_id', 'frame_id'], inplace=True)
df=data.copy()
df = df.groupby('track_id').apply(lambda group: group.interpolate(method='linear'))
df.reset_index(drop=True, inplace=True)
# update diff, shouldnow be 1 | NaN
data['diff'] = data.groupby(['track_id'])['frame_id'].diff()
# data = df
return df
def smooth(data: pd.DataFrame):
df=data.copy()
if 'x_raw' not in df:
df['x_raw'] = df['x']
if 'y_raw' not in df:
df['y_raw'] = df['y']
print("Running smoother")
# print(df)
# from tsmoothie.smoother import KalmanSmoother, ConvolutionSmoother
smoother = Smoother(convolution=False)
def smoothing(data):
# smoother = ConvolutionSmoother(window_len=SMOOTHING_WINDOW, window_type='ones', copy=None)
return smoother.smooth(data).tolist()
# df=df.assign(smooth_data=smoother.smooth_data[0])
# return smoother.smooth_data[0].tolist()
# operate smoothing per axis
print("smooth x")
df['x'] = df.groupby('track_id')['x_raw'].transform(smoothing)
print("smooth y")
df['y'] = df.groupby('track_id')['y_raw'].transform(smoothing)
return df
def load_tracks_from_csv(file: Path, fps: float, grid_size: Optional[int] = None, sample: Optional[int] = None):
cache_file = Path('/tmp/load_tracks-smooth-' + file.name)
if cache_file.exists():
data = pd.read_pickle(cache_file)
else:
# grid_size is in points per meter
# sample: sample to every n-th point. Thus sample=5 converts 12fps to 2.4fps, and 4 to 3fps
data = pd.read_csv(file, delimiter="\t", index_col=False, header=None)
# l,t,w,h: image space (pixels)
# x,y: world space (meters or cm depending on homography)
data.columns = ['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state']
data['frame_id'] = pd.to_numeric(data['frame_id'], downcast='integer')
data['frame_id'] = data['frame_id'] // 10 # compatibility with Trajectron++
data.sort_values(by=['track_id', 'frame_id'],inplace=True)
data.set_index(['track_id', 'frame_id'])
# cm to meter
data['x'] = data['x']/100
data['y'] = data['y']/100
if grid_size is not None:
data['x'] = (data['x']*grid_size).round() / grid_size
data['y'] = (data['y']*grid_size).round() / grid_size
data['diff'] = data.groupby(['track_id'])['frame_id'].diff() #.fillna(0)
data['diff'] = pd.to_numeric(data['diff'], downcast='integer')
data = interpolate_missing_frames(data)
data = smooth(data)
data.to_pickle(cache_file)
if sample is not None:
print(f"Samping 1/{sample}, of {data.shape[0]} items")
data["idx_in_track"] = data.groupby(['track_id']).cumcount() # create index in group
groups = data.groupby(['track_id'])
# print(groups, data)
# selection = groups['idx_in_track'].apply(lambda x: x % sample == 0)
# print(selection)
selection = data["idx_in_track"].apply(lambda x: x % sample == 0)
# data = data[selection]
data = data.loc[selection].copy() # avoid errors
# # convert from e.g. 12Hz, to 2.4Hz (1/5)
# sampled_groups = []
# for name, group in data.groupby('track_id'):
# sampled_groups.append(group.iloc[::sample])
# print(f"Sampled {len(sampled_groups)} groups")
# data = pd.concat(sampled_groups, axis=1).T
print(f"Done sampling kept {data.shape[0]} items")
# String ot int
data['track_id'] = pd.to_numeric(data['track_id'], downcast='integer')
# redo diff after possible sampling:
data['diff'] = data.groupby(['track_id'])['frame_id'].diff()
# timestep to seconds
data['dt'] = data['diff'] * (1/fps)
# "Deriving displacement, velocity and accelation from x and y")
data['dx'] = data.groupby(['track_id'])['x'].diff()
data['dy'] = data.groupby(['track_id'])['y'].diff()
data['vx'] = data['dx'].div(data['dt'], axis=0)
data['vy'] = data['dy'].div(data['dt'], axis=0)
data['ax'] = data.groupby(['track_id'])['vx'].diff().div(data['dt'], axis=0)
data['ay'] = data.groupby(['track_id'])['vy'].diff().div(data['dt'], axis=0)
# then we need the velocity itself
data['v'] = np.sqrt(data['vx'].pow(2) + data['vy'].pow(2))
# and derive acceleration
data['a'] = data.groupby(['track_id'])['v'].diff().div(data['dt'], axis=0)
# we can calculate heading based on the velocity components
data['heading'] = (np.arctan2(data['vy'], data['vx']) * 180 / np.pi) % 360
# and derive it to get the rate of change of the heading
data['d_heading'] = data.groupby(['track_id'])['heading'].diff().div(data['dt'], axis=0)
# we can backfill the derived parameters (v and a), assuming they were constant when entering the frame
# so that our model can make estimations, based on these assumed values
group = data.groupby(['track_id'])
for field in ['dx', 'dy', 'vx', 'vy', 'ax', 'ay', 'v', 'a', 'heading', 'd_heading']:
data[field] = group[field].bfill()
data.set_index(['track_id', 'frame_id'], inplace=True) # use for quick access
return data
def filter_short_tracks(data: pd.DataFrame, n):
return data.groupby(['track_id']).filter(lambda group: len(group) >= n) # a lenght of 3 is neccessary to have all relevant derivatives of position
# print(filtered_data.shape[0], "items in filtered set, out of", data.shape[0], "in total set")
def normalise_position(data: pd.DataFrame):
mu = data[['x','y']].mean(axis=0)
std = data[['x','y']].std(axis=0)
data[['x_norm','y_norm']] = (data[['x','y']] - mu) / std
return data, mu, std