Source code for imaginaire.discriminators.residual

# Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, check out LICENSE.md
import warnings
import torch
import torch.nn as nn
from imaginaire.layers import Conv2dBlock, Res2dBlock
from imaginaire.third_party.upfirdn2d import BlurDownsample
[docs]class ResDiscriminator(nn.Module):
    r"""Global residual discriminator.
    Args:
        image_channels (int): Num. of channels in the real/fake image.
        num_filters (int): Num. of base filters in a layer.
        max_num_filters (int): Maximum num. of filters in a layer.
        first_kernel_size (int): Kernel size in the first layer.
        num_layers (int): Num. of layers in discriminator.
        padding_mode (str): Padding mode.
        activation_norm_type (str): Type of activation normalization.
            ``'none'``, ``'instance'``, ``'batch'``, ``'sync_batch'``.
        weight_norm_type (str): Type of weight normalization.
            ``'none'``, ``'spectral'``, or ``'weight'``.
        aggregation (str): Method to aggregate features across different
            locations in the final layer. ``'conv'``, or ``'pool'``.
        order (str): Order of operations in the residual link.
        anti_aliased (bool): If ``True``, uses anti-aliased pooling.
    """
    def __init__(self,
                 image_channels=3,
                 num_filters=64,
                 max_num_filters=512,
                 first_kernel_size=1,
                 num_layers=4,
                 padding_mode='zeros',
                 activation_norm_type='',
                 weight_norm_type='',
                 aggregation='conv',
                 order='pre_act',
                 anti_aliased=False,
                 **kwargs):
        super().__init__()
        for key in kwargs:
            if key != 'type' and key != 'patch_wise':
                warnings.warn(
                    "Discriminator argument {} is not used".format(key))
        conv_params = dict(padding_mode=padding_mode,
                           activation_norm_type=activation_norm_type,
                           weight_norm_type=weight_norm_type,
                           nonlinearity='leakyrelu')
        first_padding = (first_kernel_size - 1) // 2
        model = [Conv2dBlock(image_channels, num_filters,
                             first_kernel_size, 1, first_padding,
                             **conv_params)]
        for _ in range(num_layers):
            num_filters_prev = num_filters
            num_filters = min(num_filters * 2, max_num_filters)
            model.append(Res2dBlock(num_filters_prev, num_filters, order=order,
                                    **conv_params))
            if anti_aliased:
                model.append(BlurDownsample())
            else:
                model.append(nn.AvgPool2d(2, stride=2))
        if aggregation == 'pool':
            model += [torch.nn.AdaptiveAvgPool2d(1)]
        elif aggregation == 'conv':
            model += [Conv2dBlock(num_filters, num_filters, 4, 1, 0,
                                  nonlinearity='leakyrelu')]
        else:
            raise ValueError('The aggregation mode is not recognized'
                             % self.aggregation)
        self.model = nn.Sequential(*model)
        self.classifier = nn.Linear(num_filters, 1)
[docs]    def forward(self, images):
        r"""Multi-resolution patch discriminator forward.
        Args:
            images (tensor) : Input images.
        Returns:
            (tuple):
              - outputs (tensor): Output of the discriminator.
              - features (tensor): Intermediate features of the discriminator.
              - images (tensor): Input images.
        """
        batch_size = images.size(0)
        features = self.model(images)
        outputs = self.classifier(features.view(batch_size, -1))
        return outputs, features, images