refactor EmbeddingBagOffsetsSum

Flex-plaidml-team · Nov 12, 2020 · 6130516 · tzerrell · Nov 12, 2020 · haoyouab
1 parent 89d83c2
commit 6130516
Showing 1 changed file with 20 additions and 20 deletions.
diff --git a/inference-engine/src/plaidml_plugin/ops/embedding_bag_offsets_sum.cpp b/inference-engine/src/plaidml_plugin/ops/embedding_bag_offsets_sum.cpp
@@ -40,6 +40,7 @@ static OpRegistration reg("EmbeddingBagOffsetsSum", [](const Context& ctx) {
   IE_ASSERT(indices.rank() == 1);
   auto num_indices = indices.compute_shape().sizes()[0];
   auto offsets = cast_constant_operand<int32_t>(2, layer);
+
   int32_t default_index = -1;
   Tensor per_sample_weights;
   bool with_weights = false;
@@ -57,44 +58,43 @@ static OpRegistration reg("EmbeddingBagOffsetsSum", [](const Context& ctx) {
   offsets.push_back(num_indices);
 
   auto I_gathered = gather(I, indices);
-
   auto ndims = I_gathered.rank();
   std::vector<TensorDim> I_dims(ndims);
   std::vector<TensorIndex> I_idxs(ndims);
-  std::vector<Tensor> slices, Os;
+  std::vector<Tensor> Os;
   I_gathered.bind_dims(I_dims);
   auto O_dims = I_dims;
   auto O_idxs = I_idxs;
 
-  O_dims[0] = edsl::TensorDim(1);
-  for (size_t i = 0; i < num_indices; ++i) {
-    O_idxs[0] = I_idxs[0] - i;
-    auto slice = edsl::Contraction(O_dims, O_idxs).sum(I_gathered(I_idxs)).build();
-    if (with_weights == true) {
-      Tensor weight = op::slice(per_sample_weights).add_dim(i, i + 1);
-      slice = slice * weight;
+  if (with_weights) {
+    std::vector<int64_t> unsqueeze_axes;
+    for (int64_t i = 1; i < I_gathered.rank(); i++) {
+      unsqueeze_axes.push_back(i);
     }
-    slices.push_back(slice);
+    auto weights_expanded = op::unsqueeze(per_sample_weights, unsqueeze_axes);
+    I_gathered = I_gathered * weights_expanded;
   }
 
-  for (uint32_t l = 0; l < batch; ++l) {
-    if (offsets[l + 1] == offsets[l]) {
+  for (uint32_t i = 0; i < batch; ++i) {
+    if (offsets[i + 1] == offsets[i]) {
       if (default_index == -1) {
-        auto zero = cast(Tensor{0}, slices[0].dtype());
-        auto slice_shape = slices[0].compute_shape().sizes();
+        auto zero = cast(Tensor{0}, I_gathered.dtype());
+        auto slice_shape = I_gathered.compute_shape().sizes();
+        slice_shape[0] = 1;
         std::vector<int> target_shape(begin(slice_shape), end(slice_shape));
         std::vector<int> target_axes = {};
         Os.push_back(op::broadcast(zero, target_shape, target_axes));
       } else {
+        O_dims[0] = edsl::TensorDim(1);
         O_idxs[0] = I_idxs[0] - default_index;
-        Os.push_back(edsl::Contraction(O_dims, O_idxs).sum(I(I_idxs)));
+        Os.push_back(edsl::Contraction(O_dims, O_idxs).assign(I(I_idxs)));
       }
     } else {
-      Tensor t = slices[offsets[l]];
-      for (uint32_t i = offsets[l] + 1; i < offsets[l + 1]; ++i) {
-        t = t + slices[i];
-      }
-      Os.push_back(t);
+      O_dims[0] = edsl::TensorDim(offsets[i + 1] - offsets[i]);
+      O_idxs[0] = I_idxs[0] - offsets[i];
+      Tensor reduced = edsl::Contraction(O_dims, O_idxs).assign(I_gathered(I_idxs)).build();
+      reduced = op::unsqueeze(op::sum(reduced, edsl::make_tuple(0)), {0});
+      Os.push_back(reduced);
     }
   }