Source code for imaginaire.evaluation.msid

# Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, check out LICENSE.md
# flake8: noqa
from scipy.sparse import lil_matrix, diags, eye
import math
import numpy as np
import torch
EPSILON = 1e-6
NORMALIZATION = 1e6
def _get_msid(x, y, ts=np.logspace(-1, 1, 256), k=5, m=10, niters=100,
              rademacher=False, graph_builder='full',
              msid_mode='max', normalized_laplacian=True, normalize='empty'):
    """
    Compute the msid score between two samples, x and y
    Arguments:
        x: x samples
        y: y samples
        ts: temperature values
        k: number of neighbours for graph construction
        m: Lanczos steps in SLQ
        niters: number of starting random vectors for SLQ
        rademacher: if True, sample random vectors from Rademacher
            distributions, else sample standard normal distribution
        graph_builder: if 'kgraph', uses faster graph construction (options:
            'sparse', 'kgraph')
        msid_mode: 'l2' to compute the l2 norm of the distance between `msid1`
            and `msid2`; 'max' to find the maximum abosulute difference between
            two  descriptors over temperature
        normalized_laplacian: if True, use normalized Laplacian
        normalize: 'empty' for average heat kernel (corresponds to the empty
            graph normalization of NetLSD), 'complete' for the complete, 'er'
            for erdos-renyi normalization, 'none' for no normalization
    Returns:
        msid_score: the scalar value of the distance between discriptors
    """
    normed_msidx = msid_descriptor(x, ts, k, m, niters, rademacher,
                                   graph_builder, normalized_laplacian,
                                   normalize)
    normed_msidy = msid_descriptor(y, ts, k, m, niters, rademacher,
                                   graph_builder, normalized_laplacian,
                                   normalize)
    c = np.exp(-2 * (ts + 1 / ts))
    if msid_mode == 'l2':
        score = np.linalg.norm(normed_msidx - normed_msidy)
    elif msid_mode == 'max':
        score = np.amax(c * np.abs(normed_msidx - normed_msidy))
    else:
        raise Exception('Use either l2 or max mode.')
    return {'IMD': score}
[docs]def msid_descriptor(x, ts=np.logspace(-1, 1, 256), k=5, m=10, niters=100, rademacher=False, graph_builder='full', normalized_laplacian=True, normalize='empty'): """ Compute the msid descriptor for a single sample x Arguments: x: x samples ts: temperature values k: number of neighbours for graph construction m: Lanczos steps in SLQ niters: number of starting random vectors for SLQ rademacher: if True, sample random vectors from Rademacher distributions, else sample standard normal distribution graph_builder: if 'kgraph', uses faster graph construction (options: ' rse', 'kgraph') normalized_laplacian: if True, use normalized Laplacian normalize: 'empty' for average heat kernel (corresponds to the empty graph normalization of NetLSD), 'complete' for the complete, 'er' for erdos-renyi normalization, 'none' for no normalization Returns: normed_msidx: normalized msid descriptor """ Lx = _build_graph(x, k, graph_builder, normalized_laplacian) nx = Lx.shape[0] msidx = slq_red_var(Lx, m, niters, ts, rademacher) normed_msidx = _normalize_msid(msidx, normalize, nx, k, ts) * NORMALIZATION return normed_msidx
def _build_graph(data, k=5, graph_builder='full', normalized=True): """ Return Laplacian from data or load preconstructed from path Arguments: data: samples k: number of neighbours for graph construction graph_builder: if 'kgraph', use faster graph construction normalized: if True, use nnormalized Laplacian Returns: L: Laplacian of the graph constructed with data """ if graph_builder == 'sparse': A = construct_graph_sparse(data.cpu().numpy(), k) elif graph_builder == 'kgraph': A = construct_graph_kgraph(data.cpu().numpy(), k) elif graph_builder == 'full': A = lil_matrix(construct_graph(data, k).cpu().numpy()).tocsr() else: raise Exception('Please specify graph builder: sparse or kgraph.') A = (A + A.T) / 2 A.data = np.ones(A.data.shape) L = _laplacian_sparse(A, normalized) return L def _normalize_msid(msid, normalization, n, k, ts): normed_msid = msid.copy() if normalization == 'empty': normed_msid /= n elif normalization == 'complete': normed_msid /= (1 + (n - 1) * np.exp(-(1 + 1 / (n - 1)) * ts)) elif normalization == 'er': xs = np.linspace(0, 1, n) er_spectrum = 4 / np.sqrt(k) * xs + 1 - 2 / np.sqrt(k) er_msid = np.exp(-np.outer(ts, er_spectrum)).sum(-1) normed_msid = normed_msid / (er_msid + EPSILON) elif normalization == 'none' or normalization is None: pass else: raise ValueError('Unknown normalization parameter!') return normed_msid def _lanczos_m(A, m, nv, rademacher, SV=None): """ Lanczos algorithm computes symmetric m x m tridiagonal matrix T and matrix V with orthogonal rows constituting the basis of the Krylov subspace K_m(A, x), where x is an arbitrary starting unit vector. This implementation parallelizes `nv` starting vectors. Arguments: m: number of Lanczos steps nv: number of random vectors rademacher: True to use Rademacher distribution, False - standard normal for random vectors SV: specified starting vectors Returns: T: a nv x m x m tensor, T[i, :, :] is the ith symmetric tridiagonal matrix V: a n x m x nv tensor, V[:, :, i] is the ith matrix with orthogonal rows """ orthtol = 1e-5 if type(SV) != np.ndarray: if rademacher: SV = np.sign(np.random.randn(A.shape[0], nv)) else: SV = np.random.randn(A.shape[0], nv) # init random vectors in columns: n x nv V = np.zeros((SV.shape[0], m, nv)) T = np.zeros((nv, m, m)) np.divide(SV, np.linalg.norm(SV, axis=0), out=SV) # normalize each column V[:, 0, :] = SV w = A.dot(SV) alpha = np.einsum('ij,ij->j', w, SV) w -= alpha[None, :] * SV beta = np.einsum('ij,ij->j', w, w) np.sqrt(beta, beta) T[:, 0, 0] = alpha T[:, 0, 1] = beta T[:, 1, 0] = beta np.divide(w, beta[None, :], out=w) V[:, 1, :] = w t = np.zeros((m, nv)) for i in range(1, m): SVold = V[:, i - 1, :] SV = V[:, i, :] w = A.dot(SV) # sparse @ dense w -= beta[None, :] * SVold # n x nv np.einsum('ij,ij->j', w, SV, out=alpha) T[:, i, i] = alpha if i < m - 1: w -= alpha[None, :] * SV # n x nv # reortho np.einsum('ijk,ik->jk', V, w, out=t) w -= np.einsum('ijk,jk->ik', V, t) np.einsum('ij,ij->j', w, w, out=beta) np.sqrt(beta, beta) np.divide(w, beta[None, :], out=w) T[:, i, i + 1] = beta T[:, i + 1, i] = beta # more reotho innerprod = np.einsum('ijk,ik->jk', V, w) reortho = False for _ in range(100): if not (innerprod > orthtol).sum(): reortho = True break np.einsum('ijk,ik->jk', V, w, out=t) w -= np.einsum('ijk,jk->ik', V, t) np.divide(w, np.linalg.norm(w, axis=0)[None, :], out=w) innerprod = np.einsum('ijk,ik->jk', V, w) V[:, i + 1, :] = w if (np.abs(beta) > 1e-6).sum() == 0 or not reortho: break return T, V def _slq(A, m, niters, rademacher): """ Compute the trace of matrix exponential Arguments: A: square matrix in trace(exp(A)) m: number of Lanczos steps niters: number of quadratures (also, the number of random vectors in the hutchinson trace estimator) rademacher: True to use Rademacher distribution, False - standard normal for random vectors in Hutchinson Returns: trace: estimate of trace of matrix exponential """ T, _ = _lanczos_m(A, m, niters, rademacher) eigvals, eigvecs = np.linalg.eigh(T) expeig = np.exp(eigvals) sqeigv1 = np.power(eigvecs[:, 0, :], 2) trace = A.shape[-1] * (expeig * sqeigv1).sum() / niters return trace def _slq_ts(A, m, niters, ts, rademacher): """ Compute the trace of matrix exponential Arguments: A: square matrix in trace(exp(-t*A)), where t is temperature m: number of Lanczos steps niters: number of quadratures (also, the number of random vectors in the hutchinson trace estimator) ts: an array with temperatures rademacher: True to use Rademacher distribution, False - standard normal for random vectors in Hutchinson Returns: trace: estimate of trace of matrix exponential across temperatures `ts` """ T, _ = _lanczos_m(A, m, niters, rademacher) eigvals, eigvecs = np.linalg.eigh(T) expeig = np.exp(-np.outer(ts, eigvals)).reshape(ts.shape[0], niters, m) sqeigv1 = np.power(eigvecs[:, 0, :], 2) traces = A.shape[-1] * (expeig * sqeigv1).sum(-1).mean(-1) return traces def _slq_ts_fs(A, m, niters, ts, rademacher, fs): """ Compute the trace of matrix functions Arguments: A: square matrix in trace(exp(-t*A)), where t is temperature m: number of Lanczos steps niters: number of quadratures (also, the number of random vectors in the hutchinson trace estimator) ts: an array with temperatures rademacher: True to use Rademacher distribution, else - standard normal for random vectors in Hutchinson fs: a list of functions Returns: traces: estimate of traces for each of the functions in fs """ T, _ = _lanczos_m(A, m, niters, rademacher) eigvals, eigvecs = np.linalg.eigh(T) traces = np.zeros((len(fs), len(ts))) for i, f in enumerate(fs): expeig = f(-np.outer(ts, eigvals)).reshape(ts.shape[0], niters, m) sqeigv1 = np.power(eigvecs[:, 0, :], 2) traces[i, :] = A.shape[-1] * (expeig * sqeigv1).sum(-1).mean(-1) return traces
[docs]def slq_red_var(A, m, niters, ts, rademacher): """ Compute the trace of matrix exponential with reduced variance Arguments: A: square matrix in trace(exp(-t*A)), where t is temperature m: number of Lanczos steps niters: number of quadratures (also, the number of random vectors in the hutchinson trace estimator) ts: an array with temperatures Returns: traces: estimate of trace for each temperature value in `ts` """ fs = [np.exp, lambda x: x] traces = _slq_ts_fs(A, m, niters, ts, rademacher, fs) subee = traces[0, :] - traces[1, :] / np.exp(ts) sub = - ts * A.shape[0] / np.exp(ts) return subee + sub
[docs]def np_euc_cdist(data): dd = np.sum(data * data, axis=1) dist = -2 * np.dot(data, data.T) dist += dd + dd[:, np.newaxis] np.fill_diagonal(dist, 0) np.sqrt(dist, dist) return dist
[docs]def construct_graph_sparse(data, k): n = len(data) spmat = lil_matrix((n, n)) dd = np.sum(data * data, axis=1) for i in range(n): dists = dd - 2 * data[i, :].dot(data.T) inds = np.argpartition(dists, k + 1)[:k + 1] inds = inds[inds != i] spmat[i, inds] = 1 return spmat.tocsr()
[docs]def construct_graph_kgraph(data, k): raise NotImplementedError
# # n = len(data) # spmat = lil_matrix((n, n)) # index = pykgraph.KGraph(data, 'euclidean') # index.build(reverse=0, K=2 * k + 1, L=2 * k + 50) # result = index.search(data, K=k + 1)[:, 1:] # spmat[np.repeat(np.arange(n), k, 0), result.ravel()] = 1 # return spmat.tocsr()
[docs]def construct_graph(input_features, k, num_splits=10): num_samples = input_features.shape[0] indices = [] for i in range(num_splits): batch_size = math.ceil(num_samples / num_splits) start_idx = i * batch_size end_idx = min((i + 1) * batch_size, num_samples) dist = torch.cdist(input_features[start_idx:end_idx], input_features) indices.append(torch.topk(dist, k + 1, dim=-1, largest=False)[1].cpu()) indices = torch.cat(indices, dim=0) graph = torch.zeros(num_samples, num_samples, device=indices.device) graph.scatter_(1, indices, 1) graph -= torch.eye(num_samples, device=graph.device) return graph
def _laplacian_sparse(A, normalized=True): D = A.sum(1).A1 if normalized: Dsqrt = diags(1 / np.sqrt(D)) L = eye(A.shape[0]) - Dsqrt.dot(A).dot(Dsqrt) else: L = diags(D) - A return L