metrics.py

from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import print_function

import numpy as np
import torch
from scipy.special import softmax

def compute_metrics(x):
    sx = np.sort(-x, axis=1)
    d = np.diag(-x)
    d = d[:, np.newaxis]
    ind = sx - d
    ind = np.where(ind == 0)
    ind = ind[1]
    metrics = {}
    metrics['R1'] = float(np.sum(ind == 0)) * 100 / len(ind)
    metrics['R5'] = float(np.sum(ind < 5)) * 100 / len(ind)
    metrics['R10'] = float(np.sum(ind < 10)) * 100 / len(ind)
    metrics['R50'] = float(np.sum(ind < 50)) * 100 / len(ind)
    metrics['MR'] = np.median(ind) + 1
    metrics["MedianR"] = metrics['MR']
    metrics["MeanR"] = np.mean(ind) + 1
    metrics["cols"] = [int(i) for i in list(ind)]
    return metrics


def compute_trick_metrics(row_ind,col_ind):
    # sx = np.sort(-x, axis=1)
    ind = row_ind - col_ind
    metrics = {}
    metrics['R1'] = float(np.sum(ind == 0)) * 100 / len(ind)
    return metrics


def compute_dsl_metrics(x):
    x = softmax(x, axis=0) * x
    sx = np.sort(-x, axis=1)
    d = np.diag(-x)
    d = d[:, np.newaxis]
    ind = sx - d
    ind = np.where(ind == 0)
    ind = ind[1]
    metrics = {}
    metrics['R1'] = float(np.sum(ind == 0)) * 100 / len(ind)
    metrics['R5'] = float(np.sum(ind < 5)) * 100 / len(ind)
    metrics['R10'] = float(np.sum(ind < 10)) * 100 / len(ind)
    metrics['MR'] = np.median(ind) + 1
    metrics["MedianR"] = metrics['MR']
    metrics["MeanR"] = np.mean(ind) + 1
    metrics["cols"] = [int(i) for i in list(ind)]
    return metrics

def print_computed_metrics(metrics):
    r1 = metrics['R1']
    r5 = metrics['R5']
    r10 = metrics['R10']
    mr = metrics['MR']
    print('R@1: {:.4f} - R@5: {:.4f} - R@10: {:.4f} - Median R: {}'.format(r1, r5, r10, mr))

# below two functions directly come from: https://github.com/Deferf/Experiments
def tensor_text_to_video_metrics(sim_tensor, top_k = [1,5,10,50]):
    if not torch.is_tensor(sim_tensor):
      sim_tensor = torch.tensor(sim_tensor)

    # Permute sim_tensor so it represents a sequence of text-video similarity matrices.
    # Then obtain the double argsort to position the rank on the diagonal
    stacked_sim_matrices = sim_tensor.permute(1, 0, 2) # 10 1354 1354
    first_argsort = torch.argsort(stacked_sim_matrices, dim = -1, descending= True) 
    second_argsort = torch.argsort(first_argsort, dim = -1, descending= False)

    # Extracts ranks i.e diagonals
    ranks = torch.flatten(torch.diagonal(second_argsort, dim1 = 1, dim2 = 2))

    # Now we need to extract valid ranks, as some belong to inf padding values
    permuted_original_data = torch.flatten(torch.diagonal(sim_tensor, dim1 = 0, dim2 = 2))
    mask = ~ torch.logical_or(torch.isinf(permuted_original_data), torch.isnan(permuted_original_data))
    valid_ranks = ranks[mask]
    # A quick dimension check validates our results, there may be other correctness tests pending
    # Such as dot product localization, but that is for other time.
    #assert int(valid_ranks.shape[0]) ==  sum([len(text_dict[k]) for k in text_dict])
    if not torch.is_tensor(valid_ranks):
      valid_ranks = torch.tensor(valid_ranks)
    results = {f"R{k}": float(torch.sum(valid_ranks < k) * 100 / len(valid_ranks)) for k in top_k}
    results["MedianR"] = float(torch.median(valid_ranks + 1))
    results["MeanR"] = float(np.mean(valid_ranks.numpy() + 1))
    results["Std_Rank"] = float(np.std(valid_ranks.numpy() + 1))
    results['MR'] = results["MedianR"]
    return results

def tensor_video_to_text_sim(sim_tensor):
    if not torch.is_tensor(sim_tensor):
      sim_tensor = torch.tensor(sim_tensor)
    # Code to avoid nans
    sim_tensor[sim_tensor != sim_tensor] = float('-inf')
    # Forms a similarity matrix for use with rank at k
    values, _ = torch.max(sim_tensor, dim=1, keepdim=True)
    return torch.squeeze(values).T

def compute_classification_metrics(pred_labels, gt_labels):
    '''
    pred_labels: (n_videos, n_labels)
    gt_labels: (n_videos, 1)
    '''
    sx = pred_labels
    d = gt_labels
    ind = sx - d
    ind = np.where(ind == 0)
    ind = ind[1]
    metrics = {}
    metrics['R1'] = float(np.sum(ind == 0)) * 100 / len(ind)
    metrics['R5'] = float(np.sum(ind < 5)) * 100 / len(ind)
    metrics['R10'] = float(np.sum(ind < 10)) * 100 / len(ind)
    metrics['MR'] = np.median(ind) + 1
    metrics["MedianR"] = metrics['MR']
    metrics["MeanR"] = np.mean(ind) + 1
    metrics["cols"] = [int(i) for i in list(ind)]
    return metrics


def compute_metrics_pq(xx):
    aaa=np.argsort(-xx, axis=1)
    ind = np.zeros(aaa.shape[0])
    for i in range(aaa.shape[0]):
        ind[i] = np.where(aaa[i] ==i)[0]
    metrics = {}
    metrics['R1'] = float(np.sum(ind == 0)) * 100 / len(ind)
    metrics['R5'] = float(np.sum(ind < 5)) * 100 / len(ind)  
    metrics['R10'] = float(np.sum(ind < 10)) * 100 / len(ind)
    metrics['MR'] = np.median(ind) + 1
    metrics["MedianR"] = metrics['MR']
    metrics["MeanR"] = np.mean(ind) + 1
    metrics["cols"] = [int(i) for i in list(ind)]
    return metrics


# Find the closest codeword index
def Indexing(Z, des, numSeg):
    # descriptor=torch.from_numpy(des).cuda()
    x = torch.chunk(des, numSeg, 1)
    y = torch.chunk(Z, numSeg, 1)
    for i in range(numSeg):
        size_x = x[i].shape[0] 
        size_y = y[i].shape[0] 
        xx =  torch.unsqueeze(x[i], -1)
        xx = xx.repeat((1, 1, size_y))

        yy =  torch.unsqueeze(y[i], -1)
        yy = yy.repeat((1, 1, size_x))
        yy = yy.permute(2, 1, 0)
        diff = torch.sum(torch.mul(xx,yy), 1)

        arg = torch.argmax(diff, 1)
        max_idx = torch.reshape(arg, [-1, 1])

        if i == 0:
            quant_idx = max_idx
        else:
            quant_idx = torch.cat([quant_idx, max_idx], 1)
    return quant_idx

# Compute distances and build look-up-table
def pqDist(Z, numSeg, g_x, q_x):
    n1 = q_x.shape[0]
    n2 = g_x.shape[0]
    l1, l2 = Z.shape

    D_Z = np.zeros((l1, numSeg), dtype=np.float32)

    q_x_split = np.split(q_x, numSeg, 1)
    g_x_split = np.split(g_x, numSeg, 1)
    Z_split = np.split(Z, numSeg, 1)
    D_Z_split = np.split(D_Z, numSeg, 1)

    Dpq = np.zeros((n1, n2), dtype=np.float32)

    for i in range(n1):
        for j in range(numSeg):
            for k in range(l1):
                D_Z_split[j][k] =1-np.dot(q_x_split[j][i],Z_split[j][k])
            if j == 0:
                y = D_Z_split[j][g_x_split[j]]
            else:
                y = np.add(y, D_Z_split[j][g_x_split[j]])
        Dpq[i, :] = np.squeeze(y)
    return Dpq

def cat_apcal(label_Similarity, IX, top_N):

    [_, numtest] = IX.shape

    apall = np.zeros(numtest)

    for i in range(numtest):
        y = IX[:, i]
        x = 0
        p = 0
        # starttime = time.time()
        for j in range(top_N):
            if label_Similarity[i, y[j]] == 1:
                x = x + 1
                p = p + float(x) / (j + 1)
        if p == 0:
            apall[i] = 0
        else:
            apall[i] = p / x
    mAP = np.mean(apall)

    return mAP