-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathindex_linear.py
68 lines (55 loc) · 2.62 KB
/
index_linear.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""An index linear class for generic NCE module"""
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from nce import NCELoss
class IndexLinear(NCELoss):
"""A linear layer that only decodes the results of provided indices
Args:
target_idx: indices of target words
noise_idx: indices of noise words
input: input matrix
Shape:
- target_idx :math:`(B, N)` where `max(M) <= N` B is batch size
- noise_idx :math:`(B, N, N_r)` where `max(M) <= N`
- Input :math:`(B, N, in\_features)`
Return:
- target_score :math:`(N, 1)`
- noise_score :math:`(N, N_r)` the un-normalized score
"""
def __init__(self, input_size, output_size, *args, **kwargs):
super(IndexLinear, self).__init__(*args, **kwargs)
self.weight = nn.Parameter(torch.Tensor(output_size, input_size))
self.bias = nn.Parameter(torch.Tensor(output_size))
self.ce = nn.CrossEntropyLoss(reduce=False)
self.reset_parameters()
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
# initialize the bias with unigram instead of uniform
self.bias.data = torch.log(self.noise + 1e-10) + self.norm_term
def get_score(self, target_idx, noise_idx, input):
"""
Shape:
- target_batch :math:`(N, E, 1+N_r)`where `N = length, E = embedding size, N_r = noise ratio`
"""
# flatten the following matrix
input = input.contiguous().view(-1, input.size(-1))
original_size = target_idx.size() # the size will be used to pack the output of indexlinear
target_idx = target_idx.view(-1)
noise_idx = noise_idx.view(-1, noise_idx.size(-1))
indices = torch.cat([target_idx.unsqueeze(-1), noise_idx], dim=-1)
# the pytorch's [] operator can't BP correctly with redundant indices
# before version 0.2.0
input = input.unsqueeze(1)
target_batch = self.weight.index_select(0, indices.view(-1)).view(*indices.size(), -1).transpose(1,2)
bias = self.bias.index_select(0, indices.view(-1)).view_as(indices).unsqueeze(1)
out = torch.baddbmm(1, bias, 1, input, target_batch).view(*original_size, -1)
target_score, noise_score = out[:, :, 0], out[:, :, 1:]
return target_score, noise_score
def ce_loss(self, target_idx, input):
score = F.linear(input, self.weight, self.bias) # (N, V)
loss = self.ce(score.view(-1, score.size(-1)), target_idx.view(-1)).view_as(target_idx)
return loss