From dd10ce13af215614ac0598acdc5266842e14181e Mon Sep 17 00:00:00 2001 From: Ruben van de Ven Date: Fri, 8 Nov 2024 14:59:21 +0100 Subject: [PATCH] Track directly from rtsp --- README.md | 3 +- trap/animation_renderer.py | 8 ++--- trap/prediction_server.py | 21 ++++++++++++- trap/tracker.py | 61 ++++++++++++++++++++++++++++---------- 4 files changed, 71 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 3499b35..6a91926 100644 --- a/README.md +++ b/README.md @@ -11,10 +11,11 @@ These are roughly the steps to go from datagathering to training -1. Make sure to have some recordings with a fixed camera. +1. Make sure to have some recordings with a fixed camera. [UPDATE: not needed anymore, except for calibration & homography footage] * Recording can be done with `ffmpeg -rtsp_transport udp -i rtsp://USER:PASS@IP:554/Streaming/Channels/1.mp4 hof2-cam-$(date "+%Y%m%d-%H%M").mp4` 2. Follow the steps in the auxilary [traptools](https://git.rubenvandeven.com/security_vision/traptools) repository to obtain (1) camera matrix, lens distortion, image dimensions, and (2+3) homography 3. Run the tracker, e.g. `poetry run tracker --detector ultralytics --homography ../DATASETS/NAME/homography.json --video-src ../DATASETS/NAME/*.mp4 --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/` + * Note: You can run this right of the camera stream: `poetry run tracker --eval_device cuda:0 --detector ultralytics --video-src rtsp://USER:PW@ADDRESS/STREAM --homography ../DATASETS/NAME/homography.json --calibration ../DATASETS/NAME/calibration.json --save-for-training EXPERIMENTS/raw/NAME/`, each recording adding a new file to the `raw` folder. 4. Parse tracker data to Trajectron format: `poetry run process_data --src-dir EXPERIMENTS/raw/NAME --dst-dir EXPERIMENTS/trajectron-data/ --name NAME` 5. Train Trajectron model `poetry run trajectron_train --eval_every 10 --vis_every 1 --train_data_dict NAME_train.pkl --eval_data_dict NAME_val.pkl --offline_scene_graph no --preprocess_workers 8 --log_dir EXPERIMENTS/models --log_tag _NAME --train_epochs 100 --conf EXPERIMENTS/config.json --batch_size 256 --data_dir EXPERIMENTS/trajectron-data ` 6. The run! diff --git a/trap/animation_renderer.py b/trap/animation_renderer.py index 161c13c..5951037 100644 --- a/trap/animation_renderer.py +++ b/trap/animation_renderer.py @@ -109,10 +109,10 @@ class AnimationRenderer: self.batch_anim = pyglet.graphics.Batch() self.debug_lines = [ - pyglet.shapes.Line(1380, self.config.camera.h, 1380, 670, 2, (255,255,255,255), batch=self.batch_overlay), - pyglet.shapes.Line(0, 660, 1380, 670, 2, (255,255,255,255), batch=self.batch_overlay), - pyglet.shapes.Line(1140, 760, 1140, 675, 2, (255,255,255,255), batch=self.batch_overlay), - pyglet.shapes.Line(0, 750, 1380, 760, 2, (255,255,255,255), batch=self.batch_overlay), + pyglet.shapes.Line(1370, self.config.camera.h-360, 1380, 670-360, 2, (255,255,255,255), batch=self.batch_overlay), + pyglet.shapes.Line(0, 660-360, 1380, 670-360, 2, (255,255,255,255), batch=self.batch_overlay), + pyglet.shapes.Line(1140, 760-360, 1140, 675-360, 2, (255,255,255,255), batch=self.batch_overlay), + pyglet.shapes.Line(0, 770-360, 1380, 770-360, 2, (255,255,255,255), batch=self.batch_overlay), ] diff --git a/trap/prediction_server.py b/trap/prediction_server.py index f541678..6ee641c 100644 --- a/trap/prediction_server.py +++ b/trap/prediction_server.py @@ -113,6 +113,20 @@ def get_maps_for_input(input_dict, scene, hyperparams): return maps_dict +# If homography is in cm, predictions can be terrible. Correct that here +# TODO)) This should actually not be here, but we should use alternative homography +# and then scale up in rendering +def history_cm_to_m(history): + return [(h[0]/100, h[1]/100) for h in history] + +def prediction_m_to_cm(source): + # histories_dict[t][node] + for t in source: + for node in source[t]: + source[t][node] *= 100 + # print(t,node, source[t][node]) + return source + class PredictionServer: def __init__(self, config: Namespace, is_running: Event): self.config = config @@ -270,6 +284,7 @@ class PredictionServer: # TODO: modify this into a mapping function between JS data an the expected Node format # node = FakeNode(online_env.NodeType.PEDESTRIAN) history = [[h['x'], h['y']] for h in track.get_projected_history_as_dict(frame.H, self.config.camera)] + history = history_cm_to_m(history) history = np.array(history) x = history[:, 0] y = history[:, 1] @@ -359,7 +374,11 @@ class PredictionServer: hyperparams['maximum_history_length'], hyperparams['prediction_horizon'] ) - + + + prediction_dict, histories_dict, futures_dict = prediction_m_to_cm(prediction_dict), prediction_m_to_cm(histories_dict), prediction_m_to_cm(futures_dict) + + assert(len(prediction_dict.keys()) <= 1) if len(prediction_dict.keys()) == 0: return diff --git a/trap/tracker.py b/trap/tracker.py index 300d4e1..cd7055e 100644 --- a/trap/tracker.py +++ b/trap/tracker.py @@ -27,6 +27,7 @@ from trap.frame_emitter import DetectionState, Frame, Detection, Track from tsmoothie.smoother import KalmanSmoother, ConvolutionSmoother import tsmoothie.smoother +from datetime import datetime # Detection = [int, int, int, int, float, int] # Detections = [Detection] @@ -56,6 +57,24 @@ def _yolov8_track(frame: Frame, model: YOLO, **kwargs) -> List[Detection]: return [Detection(track_id, bbox[0]-.5*bbox[2], bbox[1]-.5*bbox[3], bbox[2], bbox[3], 1, DetectionState.Confirmed, frame.index) for bbox, track_id in zip(results[0].boxes.xywh.cpu(), results[0].boxes.id.int().cpu().tolist())] +class Multifile(): + def __init__(self, srcs: List[Path]): + self.srcs = srcs + self.g = self.__iter__() + + @property + def name(self): + return ", ".join([s.name for s in self.srcs]) + + def __iter__(self): + for path in self.srcs: + with path.open('r') as fp: + for l in fp: + yield l + + def readline(self): + return self.g.__next__() + class TrainingDataWriter: def __init__(self, training_path = Optional[Path]): @@ -77,7 +96,8 @@ class TrainingDataWriter: def __enter__(self): if self.path: - self.training_fp = open(self.path / 'all.txt', 'w') + d = datetime.now().isoformat(timespec="minutes") + self.training_fp = open(self.path / f'all-{d}.txt', 'w') # following https://github.com/StanfordASL/Trajectron-plus-plus/blob/master/experiments/pedestrians/process_data.py self.csv = csv.DictWriter(self.training_fp, fieldnames=['frame_id', 'track_id', 'l', 't', 'w', 'h', 'x', 'y', 'state'], delimiter='\t', quoting=csv.QUOTE_NONE) self.count = 0 @@ -110,23 +130,32 @@ class TrainingDataWriter: return self.training_fp.close() + + source_files = list(self.path.glob("*.txt")) # we loop twice, so need a list instead of generator + total = 0 + sources = Multifile(source_files) + for line in sources: + if len(line) > 3: # make sure not to count empty lines + total += 1 + + lines = { - 'train': int(self.count * .8), - 'val': int(self.count * .12), - 'test': int(self.count * .08), + 'train': int(total * .8), + 'val': int(total * .12), + 'test': int(total * .08), } - logger.info(f"Splitting gathered data from {self.training_fp.name}") - with open(self.training_fp.name, 'r') as source_fp: - for name, line_nrs in lines.items(): - dir_path = self.path / name - dir_path.mkdir(exist_ok=True) - file = dir_path / 'tracked.txt' - logger.debug(f"- Write {line_nrs} lines to {file}") - with file.open('w') as target_fp: - for i in range(line_nrs): - target_fp.write(source_fp.readline()) - - + + logger.info(f"Splitting gathered data from {sources.name}") + # for source_file in source_files: + for name, line_nrs in lines.items(): + dir_path = self.path / name + dir_path.mkdir(exist_ok=True) + file = dir_path / 'tracked.txt' + logger.debug(f"- Write {line_nrs} lines to {file}") + with file.open('w') as target_fp: + for i in range(line_nrs): + target_fp.write(sources.readline()) +