Skip to content

Commit

Permalink
Update comments.
Browse files Browse the repository at this point in the history
Signed-off-by: Haruki Imai <[email protected]>
  • Loading branch information
imaihal committed Feb 5, 2025
1 parent 47792b6 commit b6f25e9
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 16 deletions.
23 changes: 14 additions & 9 deletions src/Dialect/ONNX/ElementsAttr/ElementsAttrBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,8 +895,8 @@ ElementsAttr ElementsAttrBuilder::reduce(ElementsAttr elms,
StridesRange<1> axesRange(axesShape, {axesStrides});

auto fetchBatch = [&](size_t threadNumber) {
// retrun all data without spliting for serial execution.
if (threadNumber == -1)
// retrun all data without spliting for sequential execution.
if (threadNumber == SIZE_MAX)
return llvm::make_range(batch.begin(), batch.end());
// Each thread fetches the same batch size. The leftovers are set in the
// threads with small thread number.
Expand Down Expand Up @@ -934,16 +934,21 @@ ElementsAttr ElementsAttrBuilder::reduce(ElementsAttr elms,
}
}
};
// Using 'parallelFor()' introduces large overhead. It is not possible to
// disable multi-threading by calling 'ctx->isMultithreadingDisabled()'
// here. So, to avoid the overhead, call work() directry if input size is
// less than `minCount`.
// printf("batch.size() * axesRange.size() %ld\n", batch.size() *
// axesRange.size());
// Using 'parallelFor()' introduces large overhead. Followings are example
// results of 'test_reduce_sum_positive_axis()' in
// 'test/mlir/onnx/onnx_constprop.mlir' on MaxOS. From this results, we
// should not use parallel execution in small input.
//
// Sequential(work()) | parallel(parallelFor())
// -----------------------------------------------
// 0.457 (msec) | 0.185 (msec)
//
// To avoid this overhead, call work() directry if input size is less than
// `minCount`.
constexpr size_t minCount = 2000;
size_t inputCount = batch.size() * axesRange.size();
if (inputCount < minCount)
work(-1);
work(SIZE_MAX); // Sequential
else
parallelFor(ctx, 0, ctx->getNumThreads(), work);
});
Expand Down
13 changes: 6 additions & 7 deletions src/Dialect/ONNX/ElementsAttr/ElementsAttrBuilder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,8 @@ class ElementsAttrBuilder {
return [fun = std::move(fun), ctx](
llvm::MutableArrayRef<WideNum> data) -> void {
auto fetchBatch = [&](size_t threadNumber) {
// retrun all data without spliting for serial execution.
if (threadNumber == -1)
// retrun all data without spliting for sequential execution.
if (threadNumber == SIZE_MAX)
return llvm::make_range(data.begin(), data.end());
// Each thread fetches the same batch size. The leftovers are set in the
// threads with small thread number.
Expand All @@ -277,13 +277,12 @@ class ElementsAttrBuilder {
for (WideNum &n : batch)
n = fun(n);
};
// Using 'parallelFor()' introduces large overhead. It is not possible to
// disable multi-threading by calling 'ctx->isMultithreadingDisabled()'
// here. So, to avoid the overhead, call work() directry if input size is
// less than `minCount`.
// Using 'parallelFor()' introduces large overhead.
// To avoid this overhead, call work() directry if input size is less than
// `minCount`.
constexpr size_t minCount = 1000;
if (data.size() < minCount)
work(-1);
work(SIZE_MAX); // Sequential
else
parallelFor(ctx, 0, ctx->getNumThreads(), work);
};
Expand Down

0 comments on commit b6f25e9

Please sign in to comment.