Source code for densetorch.data.utils

import cv2
import numpy as np
import torch
from torch.utils.data import DataLoader

from ..misc.utils import broadcast, make_list

# Usual dtypes for common modalities
KEYS_TO_DTYPES = {
    "segm": torch.long,
    "mask": torch.long,
    "depth": torch.float,
    "normals": torch.float,
}


[docs]class Pad(object):
    """Pad image and mask to the desired size.

    Args:
      size (int) : minimum length/width.
      img_val (array) : image padding value.
      msk_vals (list of ints) : masks padding value.

    """

    def __init__(self, size, img_val, msk_vals):
        self.hw_size = broadcast(size, 2)
        self.img_val = broadcast(img_val, 3)
        self.msk_vals = make_list(msk_vals)

    def __call__(self, sample):
        image = sample["image"]
        msk_keys = sample["names"]
        h, w = image.shape[:2]
        h_pad, w_pad = max(self.hw_size[0] - h, 0), max(self.hw_size[1] - w, 0)
        top_pad = h_pad // 2
        bottom_pad = h_pad - top_pad
        left_pad = w_pad // 2
        right_pad = w_pad - left_pad
        pad = ((top_pad, bottom_pad), (left_pad, right_pad))
        sample["image"] = np.stack(
            [
                np.pad(
                    image[:, :, c],
                    pad,
                    mode="constant",
                    constant_values=self.img_val[c],
                )
                for c in range(3)
            ],
            axis=2,
        )
        for msk_key, msk_val in zip(msk_keys, self.msk_vals):
            sample[msk_key] = np.pad(
                sample[msk_key], pad, mode="constant", constant_values=msk_val
            )
        return sample


[docs]class RandomCrop(object):
    """Crop randomly the image in a sample.

    Args:
        crop_size (int): Desired output size.

    """

    def __init__(self, crop_size):
        assert isinstance(crop_size, int)
        self.crop_size = crop_size
        if self.crop_size % 2 != 0:
            self.crop_size -= 1

    def __call__(self, sample):
        image = sample["image"]
        msk_keys = sample["names"]
        h, w = image.shape[:2]
        new_h = min(h, self.crop_size)
        new_w = min(w, self.crop_size)
        top = np.random.randint(0, h - new_h + 1)
        left = np.random.randint(0, w - new_w + 1)
        sample["image"] = image[top : top + new_h, left : left + new_w]
        for msk_key in msk_keys:
            sample[msk_key] = sample[msk_key][top : top + new_h, left : left + new_w]
        return sample


[docs]class ResizeAndScale(object):
    """Resize shorter/longer side to a given value and randomly scale.

    Args:
        side (int) : shorter / longer side value.
        low_scale (float) : lower scaling bound.
        high_scale (float) : upper scaling bound.
        shorter (bool) : whether to resize shorter / longer side.

    """

    def __init__(self, side, low_scale, high_scale, shorter=True):
        assert isinstance(side, int)
        assert isinstance(low_scale, float)
        assert isinstance(high_scale, float)
        self.side = side
        self.low_scale = low_scale
        self.high_scale = high_scale
        self.shorter = shorter

    def __call__(self, sample):
        image = sample["image"]
        msk_keys = sample["names"]
        scale = np.random.uniform(self.low_scale, self.high_scale)
        if self.shorter:
            min_side = min(image.shape[:2])
            if min_side * scale < self.side:
                scale = self.side * 1.0 / min_side
        else:
            max_side = max(image.shape[:2])
            if max_side * scale > self.side:
                scale = self.side * 1.0 / max_side
        sample["image"] = cv2.resize(
            image, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC
        )
        for msk_key in msk_keys:
            scale_mult = (1.0 / scale) if "depth" in msk_key else 1
            sample[msk_key] = scale_mult * cv2.resize(
                sample[msk_key],
                None,
                fx=scale,
                fy=scale,
                interpolation=cv2.INTER_NEAREST,
            )
        return sample


[docs]class RandomMirror(object):
    """Randomly flip the image and the mask"""

    def __call__(self, sample):
        image = sample["image"]
        msk_keys = sample["names"]
        do_mirror = np.random.randint(2)
        if do_mirror:
            sample["image"] = cv2.flip(image, 1)
            for msk_key in msk_keys:
                scale_mult = [-1, 1, 1] if "normal" in msk_key else 1
                sample[msk_key] = scale_mult * cv2.flip(sample[msk_key], 1)
        return sample


[docs]class Normalise(object):
    """Normalise a tensor image with mean and standard deviation.
    Given mean: (R, G, B) and std: (R, G, B),
    will normalise each channel of the torch.*Tensor, i.e.
    channel = (scale * channel - mean) / std

    Args:
        scale (float): Scaling constant.
        mean (sequence): Sequence of means for R,G,B channels respecitvely.
        std (sequence): Sequence of standard deviations for R,G,B channels
            respecitvely.
        depth_scale (float): Depth divisor for depth annotations.

    """

    def __init__(self, scale, mean, std, depth_scale=1.0):
        self.scale = scale
        self.mean = mean
        self.std = std
        self.depth_scale = depth_scale

    def __call__(self, sample):
        sample["image"] = (self.scale * sample["image"] - self.mean) / self.std
        if "depth" in sample:
            sample["depth"] = sample["depth"] / self.depth_scale
        return sample


[docs]class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image = sample["image"]
        msk_keys = sample["names"]
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        sample["image"] = torch.from_numpy(image.transpose((2, 0, 1)))
        for msk_key in msk_keys:
            sample[msk_key] = torch.from_numpy(sample[msk_key]).to(
                KEYS_TO_DTYPES[msk_key]
            )
        return sample


[docs]def albumentations2densetorch(augmentation):
    """Wrapper to use Albumentations within DenseTorch dataset.

    Args:
      augmentation: either a list of augmentations or a single augmentation

    Returns:
      A composition of augmentations

    """
    from albumentations import Compose

    def wrapper_func(sample):
        if "names" in sample:
            del sample["names"]
        targets = {
            name: "image" if name == "image" else "mask" for name in sample.keys()
        }
        output = Compose(make_list(augmentation), additional_targets=targets)(**sample)
        return output

    return wrapper_func


[docs]def densetorch2torchvision(augmentation):
    """Wrapper to use DenseTorch augmentations within torchvision dataset.

    Args:
      augmentation: either a list of augmentations or a single augmentation

    Returns:
      A composition of augmentations.

    """
    from torchvision.transforms import Compose

    def wrapper_func(image, target):
        keys = ["image", "mask"]
        names = ["mask"]
        np_dtypes = [np.float32, np.uint8]
        torch_dtypes = [torch.float32, torch.long]
        sample_dict = {
            key: np.array(value, dtype=dtype)
            for key, value, dtype in zip(keys, [image, target], np_dtypes)
        }
        sample_dict["names"] = names
        output = Compose(make_list(augmentation))(sample_dict)
        return [output[key].to(dtype) for key, dtype in zip(keys, torch_dtypes)]

    return wrapper_func


[docs]def denormalise(tensor_bchw, scale, mean_c, std_c):
    """Reversed normalisation

    Args:
      tensor_bchw (torch.tensor): 4D tensor of shape BxCxHxW
      scale (float): scale value
      mean_c (np.ndarray): mean array of shape (C,)
      std_c (np.ndarray): standard deviation array of shape (C,)

    Returns:
      Un-normalised torch tensor.

    """
    mean_bchw = (
        torch.from_numpy(mean_c[None, :, None, None]).float().to(tensor_bchw.device)
    )
    std_bchw = (
        torch.from_numpy(std_c[None, :, None, None]).float().to(tensor_bchw.device)
    )
    return (tensor_bchw * std_bchw + mean_bchw) / scale


[docs]def get_loaders(
    train_batch_size,
    val_batch_size,
    train_set,
    val_set,
    num_stages=1,
    num_workers=8,
    train_shuffle=True,
    val_shuffle=False,
    train_pin_memory=False,
    val_pin_memory=False,
    train_drop_last=False,
    val_drop_last=False,
):
    """Create train and val loaders"""
    train_batch_sizes = broadcast(train_batch_size, num_stages)
    train_sets = broadcast(train_set, num_stages)
    train_loaders = [
        DataLoader(
            train_sets[i],
            batch_size=train_batch_sizes[i],
            shuffle=train_shuffle,
            num_workers=num_workers,
            pin_memory=train_pin_memory,
            drop_last=train_drop_last,
        )
        for i in range(num_stages)
    ]
    val_loader = DataLoader(
        val_set,
        batch_size=val_batch_size,
        shuffle=val_shuffle,
        num_workers=num_workers,
        pin_memory=val_pin_memory,
        drop_last=val_drop_last,
    )
    return train_loaders, val_loader