Towards-Realtime-MOT/utils/datasets.py

import glob
import math
import os
import os.path as osp
import random
import time
from collections import OrderedDict

import cv2
import numpy as np
import torch

from torch.utils.data import Dataset
from utils.utils import xyxy2xywh

class LoadImages:  # for inference
    def __init__(self, path, img_size=(1088, 608)):
        if os.path.isdir(path):
            image_format = ['.jpg', '.jpeg', '.png', '.tif']
            self.files = sorted(glob.glob('%s/*.*' % path))
            self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files))
        elif os.path.isfile(path):
            self.files = [path]

        self.nF = len(self.files)  # number of image files
        self.width = img_size[0]
        self.height = img_size[1]
        self.count = 0

        assert self.nF > 0, 'No images found in ' + path

    def __iter__(self):
        self.count = -1
        return self

    def __next__(self):
        self.count += 1
        if self.count == self.nF:
            raise StopIteration
        img_path = self.files[self.count]

        # Read image
        img0 = cv2.imread(img_path)  # BGR
        assert img0 is not None, 'Failed to load ' + img_path

        # Padded resize
        img, _, _, _ = letterbox(img0, height=self.height, width=self.width)

        # Normalize RGB
        img = img[:, :, ::-1].transpose(2, 0, 1)
        img = np.ascontiguousarray(img, dtype=np.float32)
        img /= 255.0

        # cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
        return img_path, img, img0
    
    def __getitem__(self, idx):
        idx = idx % self.nF 
        img_path = self.files[idx]

        # Read image
        img0 = cv2.imread(img_path)  # BGR
        assert img0 is not None, 'Failed to load ' + img_path

        # Padded resize
        img, _, _, _ = letterbox(img0, height=self.height, width=self.width)

        # Normalize RGB
        img = img[:, :, ::-1].transpose(2, 0, 1)
        img = np.ascontiguousarray(img, dtype=np.float32)
        img /= 255.0

        return img_path, img, img0

    def __len__(self):
        return self.nF  # number of files


class LoadVideo:  # for inference
    def __init__(self, path, img_size=(1088, 608)):
        self.cap = cv2.VideoCapture(path)        
        self.frame_rate = int(round(self.cap.get(cv2.CAP_PROP_FPS)))
        self.vw = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.vh = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        self.vn = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))

        self.width = img_size[0]
        self.height = img_size[1]
        self.count = 0

        self.w, self.h = self.get_size(self.vw, self.vh, self.width, self.height)
        print('Lenth of the video: {:d} frames'.format(self.vn))

    def get_size(self, vw, vh, dw, dh):
        wa, ha = float(dw) / vw, float(dh) / vh
        a = min(wa, ha)
        return int(vw *a), int(vh*a)

    def __iter__(self):
        self.count = -1
        return self

    def __next__(self):
        self.count += 1
        if self.count == len(self):
            raise StopIteration
        # Read image
        res, img0 = self.cap.read()  # BGR
        assert img0 is not None, 'Failed to load frame {:d}'.format(self.count)
        img0 = cv2.resize(img0, (self.w, self.h))

        # Padded resize
        img, _, _, _ = letterbox(img0, height=self.height, width=self.width)

        # Normalize RGB
        img = img[:, :, ::-1].transpose(2, 0, 1)
        img = np.ascontiguousarray(img, dtype=np.float32)
        img /= 255.0

        # cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
        return self.count, img, img0
    
    def __len__(self):
        return self.vn  # number of files


class LoadImagesAndLabels:  # for training
    def __init__(self, path, img_size=(1088,608),  augment=False, transforms=None):
        with open(path, 'r') as file:
            self.img_files = file.readlines()
            self.img_files = [x.replace('\n', '') for x in self.img_files]
            self.img_files = list(filter(lambda x: len(x) > 0, self.img_files))

        self.label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt')
                            for x in self.img_files]

        self.nF = len(self.img_files)  # number of image files
        self.width = img_size[0]
        self.height = img_size[1]
        self.augment = augment
        self.transforms = transforms


    def __getitem__(self, files_index):
        img_path = self.img_files[files_index]
        label_path = self.label_files[files_index]
        return self.get_data(img_path, label_path)

    def get_data(self, img_path, label_path):
        height = self.height
        width = self.width
        img = cv2.imread(img_path)  # BGR
        if img is None:
            raise ValueError('File corrupt {}'.format(img_path))
        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
            fraction = 0.50
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            S = img_hsv[:, :, 1].astype(np.float32)
            V = img_hsv[:, :, 2].astype(np.float32)

            a = (random.random() * 2 - 1) * fraction + 1
            S *= a
            if a > 1:
                np.clip(S, a_min=0, a_max=255, out=S)

            a = (random.random() * 2 - 1) * fraction + 1
            V *= a
            if a > 1:
                np.clip(V, a_min=0, a_max=255, out=V)

            img_hsv[:, :, 1] = S.astype(np.uint8)
            img_hsv[:, :, 2] = V.astype(np.uint8)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

        h, w, _ = img.shape
        img, ratio, padw, padh = letterbox(img, height=height, width=width)

        # Load labels
        if os.path.isfile(label_path):
            labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6)

            # Normalized xywh to pixel xyxy format
            labels = labels0.copy()
            labels[:, 2] = ratio * w * (labels0[:, 2] - labels0[:, 4] / 2) + padw
            labels[:, 3] = ratio * h * (labels0[:, 3] - labels0[:, 5] / 2) + padh
            labels[:, 4] = ratio * w * (labels0[:, 2] + labels0[:, 4] / 2) + padw
            labels[:, 5] = ratio * h * (labels0[:, 3] + labels0[:, 5] / 2) + padh
        else:
            labels = np.array([])

        # Augment image and labels
        if self.augment:
            img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20))

    
        plotFlag = False
        if plotFlag:
            import matplotlib
            matplotlib.use('Agg')
            import matplotlib.pyplot as plt
            plt.figure(figsize=(50, 50)) 
            plt.imshow(img[:, :, ::-1])
            plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-')
            plt.axis('off')
            plt.savefig('test.jpg')
            time.sleep(10)

        nL = len(labels)
        if nL > 0:
            # convert xyxy to xywh
            labels[:, 2:6] = xyxy2xywh(labels[:, 2:6].copy()) #/ height
            labels[:, 2] /= width
            labels[:, 3] /= height
            labels[:, 4] /= width
            labels[:, 5] /= height
        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip & (random.random() > 0.5):
                img = np.fliplr(img)
                if nL > 0:
                    labels[:, 2] = 1 - labels[:, 2]
       
        img = np.ascontiguousarray(img[ :, :, ::-1]) # BGR to RGB
        if self.transforms is not None:
            img = self.transforms(img)

        return img, labels, img_path, (h, w)

    def __len__(self):
        return self.nF  # number of batches


def letterbox(img, height=608, width=1088, color=(127.5, 127.5, 127.5)):  # resize a rectangular image to a padded rectangular 
    shape = img.shape[:2]  # shape = [height, width]
    ratio = min(float(height)/shape[0], float(width)/shape[1])
    new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height]
    dw = (width - new_shape[0]) / 2  # width padding
    dh = (height - new_shape[1]) / 2  # height padding
    top, bottom = round(dh - 0.1), round(dh + 0.1)
    left, right = round(dw - 0.1), round(dw + 0.1)
    img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded rectangular
    return img, ratio, dw, dh


def random_affine(img, targets=None, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
                  borderValue=(127.5, 127.5, 127.5)):
    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
    # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4

    border = 0  # width of added border (optional)
    height = img.shape[0]
    width = img.shape[1]

    # Rotation and Scale
    R = np.eye(3)
    a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
    # a += random.choice([-180, -90, 0, 90])  # 90deg rotations added to small rotations
    s = random.random() * (scale[1] - scale[0]) + scale[0]
    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)

    # Translation
    T = np.eye(3)
    T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border  # x translation (pixels)
    T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border  # y translation (pixels)

    # Shear
    S = np.eye(3)
    S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # x shear (deg)
    S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # y shear (deg)

    M = S @ T @ R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
    imw = cv2.warpPerspective(img, M, dsize=(width, height), flags=cv2.INTER_LINEAR,
                              borderValue=borderValue)  # BGR order borderValue

    # Return warped points also
    if targets is not None:
        if len(targets) > 0:
            n = targets.shape[0]
            points = targets[:, 2:6].copy()
            area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])

            # warp points
            xy = np.ones((n * 4, 3))
            xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
            xy = (xy @ M.T)[:, :2].reshape(n, 8)

            # create new boxes
            x = xy[:, [0, 2, 4, 6]]
            y = xy[:, [1, 3, 5, 7]]
            xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

            # apply angle-based reduction
            radians = a * math.pi / 180
            reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
            x = (xy[:, 2] + xy[:, 0]) / 2
            y = (xy[:, 3] + xy[:, 1]) / 2
            w = (xy[:, 2] - xy[:, 0]) * reduction
            h = (xy[:, 3] - xy[:, 1]) * reduction
            xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T

            # reject warped points outside of image
            np.clip(xy[:, 0], 0, width, out=xy[:, 0])
            np.clip(xy[:, 2], 0, width, out=xy[:, 2])
            np.clip(xy[:, 1], 0, height, out=xy[:, 1])
            np.clip(xy[:, 3], 0, height, out=xy[:, 3])
            w = xy[:, 2] - xy[:, 0]
            h = xy[:, 3] - xy[:, 1]
            area = w * h
            ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
            i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)

            targets = targets[i]
            targets[:, 2:6] = xy[i]

        return imw, targets, M
    else:
        return imw

def collate_fn(batch):
    imgs, labels, paths, sizes = zip(*batch)
    batch_size = len(labels)
    imgs = torch.stack(imgs, 0)
    max_box_len = max([l.shape[0] for l in labels])
    labels = [torch.from_numpy(l) for l in labels]
    filled_labels = torch.zeros(batch_size, max_box_len, 6)
    labels_len = torch.zeros(batch_size)

    for i in range(batch_size):
        isize = labels[i].shape[0]
        if len(labels[i])>0:
            filled_labels[i, :isize, :] = labels[i]
        labels_len[i] = isize

    return imgs, filled_labels, paths, sizes, labels_len.unsqueeze(1)


class JointDataset(LoadImagesAndLabels):  # for training
    def __init__(self, root, paths, img_size=(1088,608), augment=False, transforms=None):
        
        dataset_names = paths.keys()
        self.img_files = OrderedDict()
        self.label_files = OrderedDict()
        self.tid_num = OrderedDict()
        self.tid_start_index = OrderedDict()
        for ds, path in paths.items():
            with open(path, 'r') as file:
                self.img_files[ds] = file.readlines()
                self.img_files[ds] = [osp.join(root, x.strip()) for x in self.img_files[ds]]
                self.img_files[ds] = list(filter(lambda x: len(x) > 0, self.img_files[ds]))

            self.label_files[ds] = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt')
                                for x in self.img_files[ds]]

        for ds, label_paths in self.label_files.items():
            max_index = -1
            for lp in label_paths:
                lb = np.loadtxt(lp)
                if len(lb) < 1:
                    continue
                if len(lb.shape) < 2:
                    img_max = lb[1]
                else:
                    img_max = np.max(lb[:,1])
                if img_max >max_index:
                    max_index = img_max 
            self.tid_num[ds] = max_index + 1
        
        last_index = 0
        for i, (k, v) in enumerate(self.tid_num.items()):
            self.tid_start_index[k] = last_index
            last_index += v
        
        self.nID = int(last_index+1)
        self.nds = [len(x) for x in self.img_files.values()]
        self.cds = [sum(self.nds[:i]) for i in range(len(self.nds))]
        self.nF = sum(self.nds)
        self.width = img_size[0]
        self.height = img_size[1]
        self.augment = augment
        self.transforms = transforms
        
        print('='*80)
        print('dataset summary')
        print(self.tid_num)
        print('total # identities:', self.nID)
        print('start index')
        print(self.tid_start_index)
        print('='*80)
        

    def __getitem__(self, files_index):
        for i, c in enumerate(self.cds):
            if files_index >= c: 
                ds = list(self.label_files.keys())[i]
                start_index = c
        img_path = self.img_files[ds][files_index - start_index]
        label_path = self.label_files[ds][files_index - start_index]
        
        imgs, labels, img_path, (h, w) = self.get_data(img_path, label_path) 
        for i, _ in enumerate(labels):
            if labels[i,1] > -1:
                labels[i,1] += self.tid_start_index[ds]
        
        return imgs, labels, img_path, (h, w)
replace maskrcnn-benchmark nms with torchvision nms 2020-01-09 15:48:17 +01:00			`import glob`
			`import math`
			`import os`
			`import os.path as osp`
			`import random`
			`import time`
			`from collections import OrderedDict`

			`import cv2`
			`import numpy as np`
			`import torch`

			`from torch.utils.data import Dataset`
			`from utils.utils import xyxy2xywh`

			`class LoadImages: # for inference`
			`def __init__(self, path, img_size=(1088, 608)):`
			`if os.path.isdir(path):`
			`image_format = ['.jpg', '.jpeg', '.png', '.tif']`
			`self.files = sorted(glob.glob('%s/.' % path))`
			`self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files))`
			`elif os.path.isfile(path):`
			`self.files = [path]`

			`self.nF = len(self.files) # number of image files`
			`self.width = img_size[0]`
			`self.height = img_size[1]`
			`self.count = 0`

			`assert self.nF > 0, 'No images found in ' + path`

			`def __iter__(self):`
			`self.count = -1`
			`return self`

			`def __next__(self):`
			`self.count += 1`
			`if self.count == self.nF:`
			`raise StopIteration`
			`img_path = self.files[self.count]`

			`# Read image`
			`img0 = cv2.imread(img_path) # BGR`
			`assert img0 is not None, 'Failed to load ' + img_path`

			`# Padded resize`
			`img, _, _, _ = letterbox(img0, height=self.height, width=self.width)`

			`# Normalize RGB`
			`img = img[:, :, ::-1].transpose(2, 0, 1)`
			`img = np.ascontiguousarray(img, dtype=np.float32)`
			`img /= 255.0`

			`# cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image`
			`return img_path, img, img0`

			`def __getitem__(self, idx):`
			`idx = idx % self.nF`
			`img_path = self.files[idx]`

			`# Read image`
			`img0 = cv2.imread(img_path) # BGR`
			`assert img0 is not None, 'Failed to load ' + img_path`

			`# Padded resize`
			`img, _, _, _ = letterbox(img0, height=self.height, width=self.width)`

			`# Normalize RGB`
			`img = img[:, :, ::-1].transpose(2, 0, 1)`
			`img = np.ascontiguousarray(img, dtype=np.float32)`
			`img /= 255.0`

			`return img_path, img, img0`

			`def __len__(self):`
			`return self.nF # number of files`


			`class LoadVideo: # for inference`
			`def __init__(self, path, img_size=(1088, 608)):`
			`self.cap = cv2.VideoCapture(path)`
			`self.frame_rate = int(round(self.cap.get(cv2.CAP_PROP_FPS)))`
			`self.vw = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))`
			`self.vh = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))`
			`self.vn = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))`

			`self.width = img_size[0]`
			`self.height = img_size[1]`
			`self.count = 0`

			`self.w, self.h = self.get_size(self.vw, self.vh, self.width, self.height)`
			`print('Lenth of the video: {:d} frames'.format(self.vn))`

			`def get_size(self, vw, vh, dw, dh):`
			`wa, ha = float(dw) / vw, float(dh) / vh`
			`a = min(wa, ha)`
			`return int(vw a), int(vha)`

			`def __iter__(self):`
			`self.count = -1`
			`return self`

			`def __next__(self):`
			`self.count += 1`
			`if self.count == len(self):`
			`raise StopIteration`
			`# Read image`
			`res, img0 = self.cap.read() # BGR`
			`assert img0 is not None, 'Failed to load frame {:d}'.format(self.count)`
			`img0 = cv2.resize(img0, (self.w, self.h))`

			`# Padded resize`
			`img, _, _, _ = letterbox(img0, height=self.height, width=self.width)`

			`# Normalize RGB`
			`img = img[:, :, ::-1].transpose(2, 0, 1)`
			`img = np.ascontiguousarray(img, dtype=np.float32)`
			`img /= 255.0`

			`# cv2.imwrite(img_path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image`
			`return self.count, img, img0`

			`def __len__(self):`
			`return self.vn # number of files`


			`class LoadImagesAndLabels: # for training`
			`def __init__(self, path, img_size=(1088,608), augment=False, transforms=None):`
			`with open(path, 'r') as file:`
			`self.img_files = file.readlines()`
			`self.img_files = [x.replace('\n', '') for x in self.img_files]`
			`self.img_files = list(filter(lambda x: len(x) > 0, self.img_files))`

			`self.label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt')`
			`for x in self.img_files]`

			`self.nF = len(self.img_files) # number of image files`
			`self.width = img_size[0]`
			`self.height = img_size[1]`
			`self.augment = augment`
			`self.transforms = transforms`


			`def __getitem__(self, files_index):`
			`img_path = self.img_files[files_index]`
			`label_path = self.label_files[files_index]`
			`return self.get_data(img_path, label_path)`

			`def get_data(self, img_path, label_path):`
			`height = self.height`
			`width = self.width`
			`img = cv2.imread(img_path) # BGR`
			`if img is None:`
			`raise ValueError('File corrupt {}'.format(img_path))`
			`augment_hsv = True`
			`if self.augment and augment_hsv:`
			`# SV augmentation by 50%`
			`fraction = 0.50`
			`img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)`
			`S = img_hsv[:, :, 1].astype(np.float32)`
			`V = img_hsv[:, :, 2].astype(np.float32)`

			`a = (random.random() * 2 - 1) * fraction + 1`
			`S *= a`
			`if a > 1:`
			`np.clip(S, a_min=0, a_max=255, out=S)`

			`a = (random.random() * 2 - 1) * fraction + 1`
			`V *= a`
			`if a > 1:`
			`np.clip(V, a_min=0, a_max=255, out=V)`

			`img_hsv[:, :, 1] = S.astype(np.uint8)`
			`img_hsv[:, :, 2] = V.astype(np.uint8)`
			`cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)`

			`h, w, _ = img.shape`
			`img, ratio, padw, padh = letterbox(img, height=height, width=width)`

			`# Load labels`
			`if os.path.isfile(label_path):`
			`labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6)`

			`# Normalized xywh to pixel xyxy format`
			`labels = labels0.copy()`
			`labels[:, 2] = ratio * w * (labels0[:, 2] - labels0[:, 4] / 2) + padw`
			`labels[:, 3] = ratio * h * (labels0[:, 3] - labels0[:, 5] / 2) + padh`
			`labels[:, 4] = ratio * w * (labels0[:, 2] + labels0[:, 4] / 2) + padw`
			`labels[:, 5] = ratio * h * (labels0[:, 3] + labels0[:, 5] / 2) + padh`
			`else:`
			`labels = np.array([])`

			`# Augment image and labels`
			`if self.augment:`
			`img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.50, 1.20))`


			`plotFlag = False`
			`if plotFlag:`
			`import matplotlib`
			`matplotlib.use('Agg')`
			`import matplotlib.pyplot as plt`
			`plt.figure(figsize=(50, 50))`
			`plt.imshow(img[:, :, ::-1])`
			`plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-')`
			`plt.axis('off')`
			`plt.savefig('test.jpg')`
			`time.sleep(10)`

			`nL = len(labels)`
			`if nL > 0:`
			`# convert xyxy to xywh`
			`labels[:, 2:6] = xyxy2xywh(labels[:, 2:6].copy()) #/ height`
			`labels[:, 2] /= width`
			`labels[:, 3] /= height`
			`labels[:, 4] /= width`
			`labels[:, 5] /= height`
			`if self.augment:`
			`# random left-right flip`
			`lr_flip = True`
			`if lr_flip & (random.random() > 0.5):`
			`img = np.fliplr(img)`
			`if nL > 0:`
			`labels[:, 2] = 1 - labels[:, 2]`

			`img = np.ascontiguousarray(img[ :, :, ::-1]) # BGR to RGB`
			`if self.transforms is not None:`
			`img = self.transforms(img)`

			`return img, labels, img_path, (h, w)`

			`def __len__(self):`
			`return self.nF # number of batches`


			`def letterbox(img, height=608, width=1088, color=(127.5, 127.5, 127.5)): # resize a rectangular image to a padded rectangular`
			`shape = img.shape[:2] # shape = [height, width]`
			`ratio = min(float(height)/shape[0], float(width)/shape[1])`
			`new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height]`
			`dw = (width - new_shape[0]) / 2 # width padding`
			`dh = (height - new_shape[1]) / 2 # height padding`
			`top, bottom = round(dh - 0.1), round(dh + 0.1)`
			`left, right = round(dw - 0.1), round(dw + 0.1)`
			`img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border`
			`img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded rectangular`
			`return img, ratio, dw, dh`


			`def random_affine(img, targets=None, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),`
			`borderValue=(127.5, 127.5, 127.5)):`
			`# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))`
			`# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4`

			`border = 0 # width of added border (optional)`
			`height = img.shape[0]`
			`width = img.shape[1]`

			`# Rotation and Scale`
			`R = np.eye(3)`
			`a = random.random() * (degrees[1] - degrees[0]) + degrees[0]`
			`# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations`
			`s = random.random() * (scale[1] - scale[0]) + scale[0]`
			`R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)`

			`# Translation`
			`T = np.eye(3)`
			`T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels)`
			`T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels)`

			`# Shear`
			`S = np.eye(3)`
			`S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg)`
			`S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg)`

			`M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!`
			`imw = cv2.warpPerspective(img, M, dsize=(width, height), flags=cv2.INTER_LINEAR,`
			`borderValue=borderValue) # BGR order borderValue`

			`# Return warped points also`
			`if targets is not None:`
			`if len(targets) > 0:`
			`n = targets.shape[0]`
			`points = targets[:, 2:6].copy()`
			`area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])`

			`# warp points`
			`xy = np.ones((n * 4, 3))`
			`xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1`
			`xy = (xy @ M.T)[:, :2].reshape(n, 8)`

			`# create new boxes`
			`x = xy[:, [0, 2, 4, 6]]`
			`y = xy[:, [1, 3, 5, 7]]`
			`xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T`

			`# apply angle-based reduction`
			`radians = a * math.pi / 180`
			`reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5`
			`x = (xy[:, 2] + xy[:, 0]) / 2`
			`y = (xy[:, 3] + xy[:, 1]) / 2`
			`w = (xy[:, 2] - xy[:, 0]) * reduction`
			`h = (xy[:, 3] - xy[:, 1]) * reduction`
			`xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T`

			`# reject warped points outside of image`
			`np.clip(xy[:, 0], 0, width, out=xy[:, 0])`
			`np.clip(xy[:, 2], 0, width, out=xy[:, 2])`
			`np.clip(xy[:, 1], 0, height, out=xy[:, 1])`
			`np.clip(xy[:, 3], 0, height, out=xy[:, 3])`
			`w = xy[:, 2] - xy[:, 0]`
			`h = xy[:, 3] - xy[:, 1]`
			`area = w * h`
			`ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))`
			`i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)`

			`targets = targets[i]`
			`targets[:, 2:6] = xy[i]`

			`return imw, targets, M`
			`else:`
			`return imw`

			`def collate_fn(batch):`
			`imgs, labels, paths, sizes = zip(*batch)`
			`batch_size = len(labels)`
			`imgs = torch.stack(imgs, 0)`
			`max_box_len = max([l.shape[0] for l in labels])`
			`labels = [torch.from_numpy(l) for l in labels]`
			`filled_labels = torch.zeros(batch_size, max_box_len, 6)`
			`labels_len = torch.zeros(batch_size)`

			`for i in range(batch_size):`
			`isize = labels[i].shape[0]`
			`if len(labels[i])>0:`
			`filled_labels[i, :isize, :] = labels[i]`
			`labels_len[i] = isize`

			`return imgs, filled_labels, paths, sizes, labels_len.unsqueeze(1)`


			`class JointDataset(LoadImagesAndLabels): # for training`
			`def __init__(self, root, paths, img_size=(1088,608), augment=False, transforms=None):`

			`dataset_names = paths.keys()`
			`self.img_files = OrderedDict()`
			`self.label_files = OrderedDict()`
			`self.tid_num = OrderedDict()`
			`self.tid_start_index = OrderedDict()`
			`for ds, path in paths.items():`
			`with open(path, 'r') as file:`
			`self.img_files[ds] = file.readlines()`
			`self.img_files[ds] = [osp.join(root, x.strip()) for x in self.img_files[ds]]`
			`self.img_files[ds] = list(filter(lambda x: len(x) > 0, self.img_files[ds]))`

			`self.label_files[ds] = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt')`
			`for x in self.img_files[ds]]`

			`for ds, label_paths in self.label_files.items():`
			`max_index = -1`
			`for lp in label_paths:`
			`lb = np.loadtxt(lp)`
			`if len(lb) < 1:`
			`continue`
			`if len(lb.shape) < 2:`
			`img_max = lb[1]`
			`else:`
			`img_max = np.max(lb[:,1])`
			`if img_max >max_index:`
			`max_index = img_max`
			`self.tid_num[ds] = max_index + 1`

			`last_index = 0`
			`for i, (k, v) in enumerate(self.tid_num.items()):`
			`self.tid_start_index[k] = last_index`
			`last_index += v`

			`self.nID = int(last_index+1)`
			`self.nds = [len(x) for x in self.img_files.values()]`
			`self.cds = [sum(self.nds[:i]) for i in range(len(self.nds))]`
			`self.nF = sum(self.nds)`
			`self.width = img_size[0]`
			`self.height = img_size[1]`
			`self.augment = augment`
			`self.transforms = transforms`

			`print('='*80)`
			`print('dataset summary')`
			`print(self.tid_num)`
			`print('total # identities:', self.nID)`
			`print('start index')`
			`print(self.tid_start_index)`
			`print('='*80)`


			`def __getitem__(self, files_index):`
			`for i, c in enumerate(self.cds):`
			`if files_index >= c:`
			`ds = list(self.label_files.keys())[i]`
			`start_index = c`
			`img_path = self.img_files[ds][files_index - start_index]`
			`label_path = self.label_files[ds][files_index - start_index]`

			`imgs, labels, img_path, (h, w) = self.get_data(img_path, label_path)`
			`for i, _ in enumerate(labels):`
			`if labels[i,1] > -1:`
			`labels[i,1] += self.tid_start_index[ds]`

			`return imgs, labels, img_path, (h, w)`