Source code for imaginaire.evaluation.fid

# Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, check out LICENSE.md
import os
import numpy as np
import torch
from scipy import linalg

from imaginaire.evaluation.common import load_or_compute_activations
from imaginaire.utils.distributed import is_master
from imaginaire.utils.distributed import master_only_print as print


[docs]@torch.no_grad() def compute_fid(fid_path, data_loader, net_G, key_real='images', key_fake='fake_images', sample_size=None, preprocess=None, return_act=False, is_video=False, few_shot_video=False, **kwargs): r"""Compute the fid score. Args: fid_path (str): Location for the numpy file to store or to load the statistics. data_loader (obj): PyTorch dataloader object. net_G (obj): For image generation modes, net_G is the generator network. For video generation models, net_G is the trainer. key_real (str): Dictionary key value for the real data. key_fake (str): Dictionary key value for the fake data. sample_size (int or tuple): How many samples to be used. preprocess (func): The preprocess function to be applied to the data. return_act (bool): If ``True``, also returns feature activations of real and fake data. is_video (bool): Whether we are handling video sequences. few_shot_video (bool): If ``True``, uses few-shot video synthesis. Returns: (float): FID value. """ print('Computing FID.') act_path = os.path.join(os.path.dirname(fid_path), 'activations_real.npy') # Get the fake mean and covariance. fake_act = load_or_compute_activations( None, data_loader, key_real, key_fake, net_G, sample_size, preprocess, is_video=is_video, few_shot_video=few_shot_video, **kwargs ) # Get the ground truth mean and covariance. real_act = load_or_compute_activations( act_path, data_loader, key_real, key_fake, None, sample_size, preprocess, is_video=is_video, few_shot_video=few_shot_video, **kwargs ) if is_master(): fid = _calculate_frechet_distance( fake_act, real_act)["FID"] if return_act: return fid, real_act, fake_act else: return fid elif return_act: return None, None, None else: return None
[docs]@torch.no_grad() def compute_fid_data(fid_path, data_loader_a, data_loader_b, key_a='images', key_b='images', sample_size=None, is_video=False, few_shot_video=False, **kwargs): r"""Compute the fid score between two datasets. Args: fid_path (str): Location for the numpy file to store or to load the statistics. data_loader_a (obj): PyTorch dataloader object for dataset a. data_loader_b (obj): PyTorch dataloader object for dataset b. key_a (str): Dictionary key value for images in the dataset a. key_b (str): Dictionary key value for images in the dataset b. sample_size (int): How many samples to be used for computing the FID. is_video (bool): Whether we are handling video sequences. few_shot_video (bool): If ``True``, uses few-shot video synthesis. Returns: (float): FID value. """ print('Computing FID.') path_a = os.path.join(os.path.dirname(fid_path), 'activations_a.npy') min_data_size = min(len(data_loader_a.dataset), len(data_loader_b.dataset)) if sample_size is None: sample_size = min_data_size else: sample_size = min(sample_size, min_data_size) act_a = load_or_compute_activations( path_a, data_loader_a, key_a, key_b, None, sample_size=sample_size, is_video=is_video, few_shot_video=few_shot_video, **kwargs ) act_b = load_or_compute_activations( None, data_loader_b, key_a, key_b, None, sample_size=sample_size, is_video=is_video, few_shot_video=few_shot_video, **kwargs ) if is_master(): return _calculate_frechet_distance(act_a, act_b)["FID"]
def _calculate_frechet_distance(act_1, act_2, eps=1e-6): mu1 = np.mean(act_1.cpu().numpy(), axis=0) sigma1 = np.cov(act_1.cpu().numpy(), rowvar=False) mu2 = np.mean(act_2.cpu().numpy(), axis=0) sigma2 = np.cov(act_2.cpu().numpy(), rowvar=False) mu1 = np.atleast_1d(mu1) mu2 = np.atleast_1d(mu2) sigma1 = np.atleast_2d(sigma1) sigma2 = np.atleast_2d(sigma2) assert mu1.shape == mu2.shape, 'Training and test mean vectors have different lengths' assert sigma1.shape == sigma2.shape, 'Training and test covariances have different dimensions' diff = mu1 - mu2 # Product might be almost singular covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) if not np.isfinite(covmean).all(): msg = ('fid calculation produces singular product; ' 'adding %s to diagonal of cov estimates') % eps print(msg) offset = np.eye(sigma1.shape[0]) * eps covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) # Numerical error might give slight imaginary component if np.iscomplexobj(covmean): if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): m = np.max(np.abs(covmean.imag)) print('Imaginary component {}'.format(m)) # raise ValueError('Imaginary component {}'.format(m)) covmean = covmean.real tr_covmean = np.trace(covmean) return {"FID": (diff.dot(diff) + np.trace(sigma1) + np.trace( sigma2) - 2 * tr_covmean)}