-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,7 @@ static OpRegistration reg("EmbeddingBagOffsetsSum", [](const Context& ctx) { | |
IE_ASSERT(indices.rank() == 1); | ||
auto num_indices = indices.compute_shape().sizes()[0]; | ||
auto offsets = cast_constant_operand<int32_t>(2, layer); | ||
|
||
int32_t default_index = -1; | ||
Tensor per_sample_weights; | ||
bool with_weights = false; | ||
|
@@ -57,44 +58,43 @@ static OpRegistration reg("EmbeddingBagOffsetsSum", [](const Context& ctx) { | |
offsets.push_back(num_indices); | ||
|
||
auto I_gathered = gather(I, indices); | ||
|
||
auto ndims = I_gathered.rank(); | ||
std::vector<TensorDim> I_dims(ndims); | ||
std::vector<TensorIndex> I_idxs(ndims); | ||
std::vector<Tensor> slices, Os; | ||
std::vector<Tensor> Os; | ||
I_gathered.bind_dims(I_dims); | ||
auto O_dims = I_dims; | ||
auto O_idxs = I_idxs; | ||
|
||
O_dims[0] = edsl::TensorDim(1); | ||
for (size_t i = 0; i < num_indices; ++i) { | ||
O_idxs[0] = I_idxs[0] - i; | ||
auto slice = edsl::Contraction(O_dims, O_idxs).sum(I_gathered(I_idxs)).build(); | ||
if (with_weights == true) { | ||
Tensor weight = op::slice(per_sample_weights).add_dim(i, i + 1); | ||
slice = slice * weight; | ||
if (with_weights) { | ||
std::vector<int64_t> unsqueeze_axes; | ||
for (int64_t i = 1; i < I_gathered.rank(); i++) { | ||
unsqueeze_axes.push_back(i); | ||
} | ||
slices.push_back(slice); | ||
auto weights_expanded = op::unsqueeze(per_sample_weights, unsqueeze_axes); | ||
I_gathered = I_gathered * weights_expanded; | ||
} | ||
|
||
for (uint32_t l = 0; l < batch; ++l) { | ||
if (offsets[l + 1] == offsets[l]) { | ||
for (uint32_t i = 0; i < batch; ++i) { | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
haoyouab
|
||
if (offsets[i + 1] == offsets[i]) { | ||
if (default_index == -1) { | ||
auto zero = cast(Tensor{0}, slices[0].dtype()); | ||
auto slice_shape = slices[0].compute_shape().sizes(); | ||
auto zero = cast(Tensor{0}, I_gathered.dtype()); | ||
auto slice_shape = I_gathered.compute_shape().sizes(); | ||
slice_shape[0] = 1; | ||
std::vector<int> target_shape(begin(slice_shape), end(slice_shape)); | ||
std::vector<int> target_axes = {}; | ||
Os.push_back(op::broadcast(zero, target_shape, target_axes)); | ||
This comment has been minimized.
Sorry, something went wrong.
tzerrell
|
||
} else { | ||
O_dims[0] = edsl::TensorDim(1); | ||
O_idxs[0] = I_idxs[0] - default_index; | ||
Os.push_back(edsl::Contraction(O_dims, O_idxs).sum(I(I_idxs))); | ||
Os.push_back(edsl::Contraction(O_dims, O_idxs).assign(I(I_idxs))); | ||
} | ||
} else { | ||
Tensor t = slices[offsets[l]]; | ||
for (uint32_t i = offsets[l] + 1; i < offsets[l + 1]; ++i) { | ||
t = t + slices[i]; | ||
} | ||
Os.push_back(t); | ||
O_dims[0] = edsl::TensorDim(offsets[i + 1] - offsets[i]); | ||
O_idxs[0] = I_idxs[0] - offsets[i]; | ||
Tensor reduced = edsl::Contraction(O_dims, O_idxs).assign(I_gathered(I_idxs)).build(); | ||
This comment has been minimized.
Sorry, something went wrong.
tzerrell
|
||
reduced = op::unsqueeze(op::sum(reduced, edsl::make_tuple(0)), {0}); | ||
Os.push_back(reduced); | ||
} | ||
} | ||
|
||
|
I think it might be possible to perform one overall
Contraction
, rather than having a separateContraction
from each iteration of thefor
loop that is laterconcatenate
d. I will think more about whether this is possible... it would require a tensor that looked likeI_gathered
in theoffsets[i+1] != offsets[i]
locations and likezero
orI
in theoffsets[i+1] == offsets[i]
locations. Also it would probably require careful use ofConstraint
s.If it is possible we should do it, building one contraction will make optimizations work better than using multiple concatenated contractions, and we should expect better performance from doing that.