Source code for imaginaire.evaluation.fid

# Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, check out LICENSE.md
import os
import numpy as np
import torch
from scipy import linalg
from imaginaire.evaluation.common import load_or_compute_activations
from imaginaire.utils.distributed import is_master
from imaginaire.utils.distributed import master_only_print as print
[docs]@torch.no_grad()
def compute_fid(fid_path, data_loader, net_G,
                key_real='images', key_fake='fake_images',
                sample_size=None, preprocess=None, return_act=False,
                is_video=False, few_shot_video=False, **kwargs):
    r"""Compute the fid score.
    Args:
        fid_path (str): Location for the numpy file to store or to load the
            statistics.
        data_loader (obj): PyTorch dataloader object.
        net_G (obj): For image generation modes, net_G is the generator network.
            For video generation models, net_G is the trainer.
        key_real (str): Dictionary key value for the real data.
        key_fake (str): Dictionary key value for the fake data.
        sample_size (int or tuple): How many samples to be used.
        preprocess (func): The preprocess function to be applied to the data.
        return_act (bool): If ``True``, also returns feature activations of
            real and fake data.
        is_video (bool): Whether we are handling video sequences.
        few_shot_video (bool): If ``True``, uses few-shot video synthesis.
    Returns:
        (float): FID value.
    """
    print('Computing FID.')
    act_path = os.path.join(os.path.dirname(fid_path),
                            'activations_real.npy')
    # Get the fake mean and covariance.
    fake_act = load_or_compute_activations(
        None, data_loader, key_real, key_fake, net_G,
        sample_size, preprocess, is_video=is_video,
        few_shot_video=few_shot_video, **kwargs
    )
    # Get the ground truth mean and covariance.
    real_act = load_or_compute_activations(
        act_path, data_loader, key_real, key_fake, None,
        sample_size, preprocess, is_video=is_video,
        few_shot_video=few_shot_video, **kwargs
    )
    if is_master():
        fid = _calculate_frechet_distance(
            fake_act, real_act)["FID"]
        if return_act:
            return fid, real_act, fake_act
        else:
            return fid
    elif return_act:
        return None, None, None
    else:
        return None
[docs]@torch.no_grad()
def compute_fid_data(fid_path, data_loader_a, data_loader_b,
                     key_a='images', key_b='images', sample_size=None,
                     is_video=False, few_shot_video=False, **kwargs):
    r"""Compute the fid score between two datasets.
    Args:
        fid_path (str): Location for the numpy file to store or to load the
            statistics.
        data_loader_a (obj): PyTorch dataloader object for dataset a.
        data_loader_b (obj): PyTorch dataloader object for dataset b.
        key_a (str): Dictionary key value for images in the dataset a.
        key_b (str): Dictionary key value for images in the dataset b.
        sample_size (int): How many samples to be used for computing the FID.
        is_video (bool): Whether we are handling video sequences.
        few_shot_video (bool): If ``True``, uses few-shot video synthesis.
    Returns:
        (float): FID value.
    """
    print('Computing FID.')
    path_a = os.path.join(os.path.dirname(fid_path),
                          'activations_a.npy')
    min_data_size = min(len(data_loader_a.dataset),
                        len(data_loader_b.dataset))
    if sample_size is None:
        sample_size = min_data_size
    else:
        sample_size = min(sample_size, min_data_size)
    act_a = load_or_compute_activations(
        path_a, data_loader_a, key_a, key_b, None,
        sample_size=sample_size, is_video=is_video,
        few_shot_video=few_shot_video, **kwargs
    )
    act_b = load_or_compute_activations(
        None, data_loader_b, key_a, key_b, None,
        sample_size=sample_size, is_video=is_video,
        few_shot_video=few_shot_video, **kwargs
    )
    if is_master():
        return _calculate_frechet_distance(act_a, act_b)["FID"]
def _calculate_frechet_distance(act_1, act_2, eps=1e-6):
    mu1 = np.mean(act_1.cpu().numpy(), axis=0)
    sigma1 = np.cov(act_1.cpu().numpy(), rowvar=False)
    mu2 = np.mean(act_2.cpu().numpy(), axis=0)
    sigma2 = np.cov(act_2.cpu().numpy(), rowvar=False)
    mu1 = np.atleast_1d(mu1)
    mu2 = np.atleast_1d(mu2)
    sigma1 = np.atleast_2d(sigma1)
    sigma2 = np.atleast_2d(sigma2)
    assert mu1.shape == mu2.shape, 'Training and test mean vectors have different lengths'
    assert sigma1.shape == sigma2.shape, 'Training and test covariances have different dimensions'
    diff = mu1 - mu2
    # Product might be almost singular
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    if not np.isfinite(covmean).all():
        msg = ('fid calculation produces singular product; '
               'adding %s to diagonal of cov estimates') % eps
        print(msg)
        offset = np.eye(sigma1.shape[0]) * eps
        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
    # Numerical error might give slight imaginary component
    if np.iscomplexobj(covmean):
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
            m = np.max(np.abs(covmean.imag))
            print('Imaginary component {}'.format(m))
            # raise ValueError('Imaginary component {}'.format(m))
        covmean = covmean.real
    tr_covmean = np.trace(covmean)
    return {"FID": (diff.dot(diff) + np.trace(sigma1) + np.trace(
        sigma2) - 2 * tr_covmean)}