Source code for imaginaire.datasets.paired_few_shot_videos

# Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, check out LICENSE.md
import copy
import random
import torch
from imaginaire.datasets.paired_videos import Dataset as VideoDataset
from imaginaire.model_utils.fs_vid2vid import select_object
from imaginaire.utils.distributed import master_only_print as print
[docs]class Dataset(VideoDataset): r"""Paired video dataset for use in few-shot vid2vid. Args: cfg (Config): Loaded config object. is_inference (bool): In train or inference mode? sequence_length (int): What sequence of images to provide? few_shot_K (int): How many images to provide for few-shot? """ def __init__(self, cfg, is_inference=False, sequence_length=None, few_shot_K=None, is_test=False): self.paired = True # Get initial few shot K. if few_shot_K is None: self.few_shot_K = cfg.data.initial_few_shot_K else: self.few_shot_K = few_shot_K # Initialize. super(Dataset, self).__init__( cfg, is_inference, sequence_length=sequence_length, is_test=is_test)
[docs] def set_inference_sequence_idx(self, index, k_shot_index, k_shot_frame_index): r"""Get frames from this sequence during inference. Args: index (int): Index of inference sequence. k_shot_index (int): Index of sequence from which k_shot is sampled. k_shot_frame_index (int): Index of frame to sample. """ assert self.is_inference assert index < len(self.mapping) assert k_shot_index < len(self.mapping) assert k_shot_frame_index < len(self.mapping[k_shot_index]) self.inference_sequence_idx = index self.inference_k_shot_sequence_index = k_shot_index self.inference_k_shot_frame_index = k_shot_frame_index self.epoch_length = len( self.mapping[self.inference_sequence_idx]['filenames'])
[docs] def set_sequence_length(self, sequence_length, few_shot_K=None): r"""Set the length of sequence you want as output from dataloader. Args: sequence_length (int): Length of output sequences. few_shot_K (int): Number of few-shot frames. """ if few_shot_K is None: few_shot_K = self.few_shot_K assert isinstance(sequence_length, int) assert isinstance(few_shot_K, int) if (sequence_length + few_shot_K) > self.sequence_length_max: error_message = \ 'Requested sequence length (%d) ' % (sequence_length) + \ '+ few shot K (%d) > ' % (few_shot_K) + \ 'max sequence length (%d). ' % (self.sequence_length_max) print(error_message) sequence_length = self.sequence_length_max - few_shot_K print('Reduced sequence length to %s' % (sequence_length)) self.sequence_length = sequence_length self.few_shot_K = few_shot_K # Recalculate mapping as some sequences might no longer be useful. self.mapping, self.epoch_length = self._create_mapping() print('Epoch length:', self.epoch_length)
def _create_mapping(self): r"""Creates mapping from idx to key in LMDB. Returns: (tuple): - self.mapping (dict): Dict of seq_len to list of sequences. - self.epoch_length (int): Number of samples in an epoch. """ # Create dict mapping length to sequence. length_to_key, num_selected_seq = {}, 0 has_additional_lists = len(self.additional_lists) > 0 for lmdb_idx, sequence_list in enumerate(self.sequence_lists): for sequence_name, filenames in sequence_list.items(): if len(filenames) >= (self.sequence_length + self.few_shot_K): if len(filenames) not in length_to_key: length_to_key[len(filenames)] = [] if has_additional_lists: obj_indices = self.additional_lists[lmdb_idx][ sequence_name] else: obj_indices = [0 for _ in range(len(filenames))] length_to_key[len(filenames)].append({ 'lmdb_root': self.lmdb_roots[lmdb_idx], 'lmdb_idx': lmdb_idx, 'sequence_name': sequence_name, 'filenames': filenames, 'obj_indices': obj_indices, }) num_selected_seq += 1 self.mapping = length_to_key self.epoch_length = num_selected_seq # At inference time, we want to use all sequences, # irrespective of length. if self.is_inference: sequence_list = [] for key, sequences in self.mapping.items(): sequence_list.extend(sequences) self.mapping = sequence_list return self.mapping, self.epoch_length def _sample_keys(self, index): r"""Gets files to load for this sample. Args: index (int): Index in [0, len(dataset)]. Returns: key (dict): - lmdb_idx (int): Chosen LMDB dataset root. - sequence_name (str): Chosen sequence in chosen dataset. - filenames (list of str): Chosen filenames in chosen sequence. """ if self.is_inference: assert index < self.epoch_length chosen_sequence = self.mapping[self.inference_sequence_idx] chosen_filenames = [chosen_sequence['filenames'][index]] chosen_obj_indices = [chosen_sequence['obj_indices'][index]] k_shot_chosen_sequence = self.mapping[ self.inference_k_shot_sequence_index] k_shot_chosen_filenames = [k_shot_chosen_sequence['filenames'][ self.inference_k_shot_frame_index]] k_shot_chosen_obj_indices = [k_shot_chosen_sequence['obj_indices'][ self.inference_k_shot_frame_index]] # Prepare few shot key. few_shot_key = copy.deepcopy(k_shot_chosen_sequence) few_shot_key['filenames'] = k_shot_chosen_filenames few_shot_key['obj_indices'] = k_shot_chosen_obj_indices else: # Pick a time step for temporal augmentation. time_step = random.randint(1, self.augmentor.max_time_step) required_sequence_length = 1 + \ (self.sequence_length - 1) * time_step # If step is too large, default to step size of 1. if required_sequence_length + self.few_shot_K > \ self.sequence_length_max: required_sequence_length = self.sequence_length time_step = 1 # Find valid sequences. valid_sequences = [] for sequence_length, sequences in self.mapping.items(): if sequence_length >= required_sequence_length + \ self.few_shot_K: valid_sequences.extend(sequences) # Pick a sequence. chosen_sequence = random.choice(valid_sequences) # Choose filenames. max_start_idx = len(chosen_sequence['filenames']) - \ required_sequence_length start_idx = random.randint(0, max_start_idx) end_idx = start_idx + required_sequence_length chosen_filenames = chosen_sequence['filenames'][ start_idx:end_idx:time_step] chosen_obj_indices = chosen_sequence['obj_indices'][ start_idx:end_idx:time_step] # Find the K few shot filenames. valid_range = list(range(start_idx)) + \ list(range(end_idx, len(chosen_sequence['filenames']))) k_shot_chosen = sorted(random.sample(valid_range, self.few_shot_K)) k_shot_chosen_filenames = [chosen_sequence['filenames'][idx] for idx in k_shot_chosen] k_shot_chosen_obj_indices = [chosen_sequence['obj_indices'][idx] for idx in k_shot_chosen] assert not (set(chosen_filenames) & set(k_shot_chosen_filenames)) assert len(chosen_filenames) == self.sequence_length assert len(k_shot_chosen_filenames) == self.few_shot_K # Prepare few shot key. few_shot_key = copy.deepcopy(chosen_sequence) few_shot_key['filenames'] = k_shot_chosen_filenames few_shot_key['obj_indices'] = k_shot_chosen_obj_indices # Prepre output key. key = copy.deepcopy(chosen_sequence) key['filenames'] = chosen_filenames key['obj_indices'] = chosen_obj_indices return key, few_shot_key def _prepare_data(self, keys): r"""Load data and perform augmentation. Args: keys (dict): Key into LMDB/folder dataset for this item. Returns: data (dict): Dict with all chosen data_types. """ # Unpack keys. lmdb_idx = keys['lmdb_idx'] sequence_name = keys['sequence_name'] filenames = keys['filenames'] obj_indices = keys['obj_indices'] # Get key and lmdbs. keys, lmdbs = {}, {} for data_type in self.dataset_data_types: keys[data_type] = self._create_sequence_keys( sequence_name, filenames) lmdbs[data_type] = self.lmdbs[data_type][lmdb_idx] # Load all data for this index. data = self.load_from_dataset(keys, lmdbs) # Apply ops pre augmentation. data = self.apply_ops(data, self.pre_aug_ops) # Select the object in data using the object indices. data = select_object(data, obj_indices) # Do augmentations for images. data, is_flipped = self.perform_augmentation(data, paired=True, augment_ops=self.augmentor.augment_ops) # Create copy of keypoint data types before post aug. # kp_data = {} # for data_type in self.keypoint_data_types: # new_key = data_type + '_xy' # kp_data[new_key] = copy.deepcopy(data[data_type]) # Create copy of keypoint data types before post aug. kp_data = {} for data_type in self.keypoint_data_types: new_key = data_type + '_xy' kp_data[new_key] = copy.deepcopy(data[data_type]) # Apply ops post augmentation. data = self.apply_ops(data, self.post_aug_ops) data = self.apply_ops(data, self.full_data_post_aug_ops, full_data=True) # Convert images to tensor. data = self.to_tensor(data) # Pack the sequence of images. for data_type in self.image_data_types: for idx in range(len(data[data_type])): data[data_type][idx] = data[data_type][idx].unsqueeze(0) data[data_type] = torch.cat(data[data_type], dim=0) # Add keypoint xy to data. data.update(kp_data) data['is_flipped'] = is_flipped data['key'] = keys return data def _getitem(self, index): r"""Gets selected files. Args: index (int): Index into dataset. Returns: data (dict): Dict with all chosen data_types. """ # Select a sample from the available data. keys, few_shot_keys = self._sample_keys(index) data = self._prepare_data(keys) few_shot_data = self._prepare_data(few_shot_keys) # Add few shot data into data. for key, value in few_shot_data.items(): data['few_shot_' + key] = few_shot_data[key] # Apply full data ops. if self.is_inference: if index == 0: pass elif index < self.cfg.data.num_workers: data_0 = self._getitem(0) if 'common_attr' in data_0: self.common_attr = data['common_attr'] = \ data_0['common_attr'] else: if hasattr(self, 'common_attr'): data['common_attr'] = self.common_attr data = self.apply_ops(data, self.full_data_ops, full_data=True) if self.is_inference and index == 0 and 'common_attr' in data: self.common_attr = data['common_attr'] return data