Skip to content

Commit 8b3a486

Browse files
laanwjvijaydasmp
authored andcommitted
Merge bitcoin#23025: bench: update nanobench add -min_time
e148a52 bench: fixed ubsan implicit conversion (Martin Ankerl) da4e2f1 bench: various args improvements (Jon Atack) d312fd9 bench: clean up includes (Jon Atack) 1f10f16 bench: add usage description and documentation (Martin Ankerl) d3c6f8b bench: introduce -min_time argument (Martin Ankerl) 9fef832 bench: make EvictionProtection.* work with any number of iterations (Martin Ankerl) 153e686 bench: change AddrManGood to AddrManAddThenGood (Martin Ankerl) 468b232 bench: remove unnecessary & incorrect multiplication in MuHashDiv (Martin Ankerl) eed99cf bench: update nanobench from 4.3.4 to 4.3.6 (Martin Ankerl) Pull request description: This PR updates the nanobench with the latest release from upstream, v4.3.6. It fixes the missing performance counters. Due to discussions on bitcoin#22999 I have done some work that should make the benchmark results more reliable. It introduces a new flag `-min_time` that allows to run a benchmark for much longer then the default. When results are unreliable, choosing a large timeframe here should usually get repeatable results even when frequency scaling cannot be disabled. The default is now 10ms. For this to work I have changed the `AddrManGood` and `EvictionProtection` benchmarks so they work with any number of iterations. Also, this adds more usage documentation to `bench_bitcoin -h` and I've cherry-picked two changes from bitcoin#22999 authored by Jon Atack ACKs for top commit: jonatack: re-ACK e148a52 laanwj: Code review ACK e148a52 Tree-SHA512: 2da6de19a5c85ac234b190025e195c727546166dbb75e3f9267e667a73677ba1e29b7765877418a42b1407b65df901e0130763936525e6f1450f18f08837c40c
1 parent d7a20b3 commit 8b3a486

6 files changed

+88
-28
lines changed

src/bench/bench.cpp

+13-2
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,20 @@
44

55
#include <bench/bench.h>
66

7-
#include <chainparams.h>
87
#include <fs.h>
98
#include <test/util/setup_common.h>
10-
#include <validation.h>
119

1210
#include <algorithm>
11+
#include <chrono>
1312
#include <fstream>
13+
#include <functional>
1414
#include <iostream>
15+
#include <map>
1516
#include <regex>
17+
#include <string>
18+
#include <vector>
19+
20+
using namespace std::chrono_literals;
1621

1722
const std::function<void(const std::string&)> G_TEST_LOG_FUN{};
1823

@@ -66,6 +71,12 @@ void benchmark::BenchRunner::RunAll(const Args& args)
6671

6772
Bench bench;
6873
bench.name(p.first);
74+
if (args.min_time > 0ms) {
75+
// convert to nanos before dividing to reduce rounding errors
76+
std::chrono::nanoseconds min_time_ns = args.min_time;
77+
bench.minEpochTime(min_time_ns / bench.epochs());
78+
}
79+
6980
if (args.asymptote.empty()) {
7081
p.second(bench);
7182
} else {

src/bench/bench.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,12 @@ using ankerl::nanobench::Bench;
4242
typedef std::function<void(Bench&)> BenchFunction;
4343

4444
struct Args {
45-
std::string regex_filter;
4645
bool is_list_only;
46+
std::chrono::milliseconds min_time;
4747
std::vector<double> asymptote;
4848
fs::path output_csv;
4949
fs::path output_json;
50+
std::string regex_filter;
5051
};
5152

5253
class BenchRunner

src/bench/bench_bitcoin.cpp

+59-5
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,31 @@
44

55
#include <bench/bench.h>
66

7+
#include <clientversion.h>
78
#include <crypto/sha256.h>
89
#include <fs.h>
910
#include <stacktraces.h>
1011
#include <util/strencodings.h>
1112
#include <util/system.h>
1213

1314
#include <bls/bls.h>
15+
#include <chrono>
16+
#include <cstdint>
17+
#include <iostream>
18+
#include <sstream>
19+
#include <vector>
1420

1521
static const char* DEFAULT_BENCH_FILTER = ".*";
22+
static constexpr int64_t DEFAULT_MIN_TIME_MS{10};
1623

1724
static void SetupBenchArgs(ArgsManager& argsman)
1825
{
1926
SetupHelpOptions(argsman);
2027

21-
argsman.AddArg("-asymptote=n1,n2,n3,...", "Test asymptotic growth of the runtime of an algorithm, if supported by the benchmark", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
28+
argsman.AddArg("-asymptote=<n1,n2,n3,...>", "Test asymptotic growth of the runtime of an algorithm, if supported by the benchmark", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
2229
argsman.AddArg("-filter=<regex>", strprintf("Regular expression filter to select benchmark by name (default: %s)", DEFAULT_BENCH_FILTER), ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
23-
argsman.AddArg("-list", "List benchmarks without executing them", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
30+
argsman.AddArg("-list", "List benchmarks without executing them", ArgsManager::ALLOW_BOOL, OptionsCategory::OPTIONS);
31+
argsman.AddArg("-min_time=<milliseconds>", strprintf("Minimum runtime per benchmark, in milliseconds (default: %d)", DEFAULT_MIN_TIME_MS), ArgsManager::ALLOW_INT, OptionsCategory::OPTIONS);
2432
argsman.AddArg("-output_csv=<output.csv>", "Generate CSV file with the most important benchmark results", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
2533
argsman.AddArg("-output_json=<output.json>", "Generate JSON file with all benchmark results", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
2634
}
@@ -50,16 +58,62 @@ int main(int argc, char** argv)
5058
}
5159

5260
if (HelpRequested(argsman)) {
53-
std::cout << argsman.GetHelpMessage();
61+
std::cout << "Usage: bench_dash [options]\n"
62+
"\n"
63+
<< argsman.GetHelpMessage()
64+
<< "Description:\n"
65+
"\n"
66+
" bench_dash executes microbenchmarks. The quality of the benchmark results\n"
67+
" highly depend on the stability of the machine. It can sometimes be difficult\n"
68+
" to get stable, repeatable results, so here are a few tips:\n"
69+
"\n"
70+
" * Use pyperf [1] to disable frequency scaling, turbo boost etc. For best\n"
71+
" results, use CPU pinning and CPU isolation (see [2]).\n"
72+
"\n"
73+
" * Each call of run() should do exactly the same work. E.g. inserting into\n"
74+
" a std::vector doesn't do that as it will reallocate on certain calls. Make\n"
75+
" sure each run has exactly the same preconditions.\n"
76+
"\n"
77+
" * If results are still not reliable, increase runtime with e.g.\n"
78+
" -min_time=5000 to let a benchmark run for at least 5 seconds.\n"
79+
"\n"
80+
" * bench_dash uses nanobench [3] for which there is extensive\n"
81+
" documentation available online.\n"
82+
"\n"
83+
"Environment Variables:\n"
84+
"\n"
85+
" To attach a profiler you can run a benchmark in endless mode. This can be\n"
86+
" done with the environment variable NANOBENCH_ENDLESS. E.g. like so:\n"
87+
"\n"
88+
" NANOBENCH_ENDLESS=MuHash ./bench_dash -filter=MuHash\n"
89+
"\n"
90+
" In rare cases it can be useful to suppress stability warnings. This can be\n"
91+
" done with the environment variable NANOBENCH_SUPPRESS_WARNINGS, e.g:\n"
92+
"\n"
93+
" NANOBENCH_SUPPRESS_WARNINGS=1 ./bench_dash\n"
94+
"\n"
95+
"Notes:\n"
96+
"\n"
97+
" 1. pyperf\n"
98+
" https://github.com/psf/pyperf\n"
99+
"\n"
100+
" 2. CPU pinning & isolation\n"
101+
" https://pyperf.readthedocs.io/en/latest/system.html\n"
102+
"\n"
103+
" 3. nanobench\n"
104+
" https://github.com/martinus/nanobench\n"
105+
"\n";
106+
54107
return EXIT_SUCCESS;
55108
}
56109

57110
benchmark::Args args;
58-
args.regex_filter = argsman.GetArg("-filter", DEFAULT_BENCH_FILTER);
59-
args.is_list_only = argsman.GetBoolArg("-list", false);
60111
args.asymptote = parseAsymptote(argsman.GetArg("-asymptote", ""));
112+
args.is_list_only = argsman.GetBoolArg("-list", false);
113+
args.min_time = std::chrono::milliseconds(argsman.GetArg("-min_time", DEFAULT_MIN_TIME_MS));
61114
args.output_csv = fs::PathFromString(argsman.GetArg("-output_csv", ""));
62115
args.output_json = fs::PathFromString(argsman.GetArg("-output_json", ""));
116+
args.regex_filter = argsman.GetArg("-filter", DEFAULT_BENCH_FILTER);
63117

64118
benchmark::BenchRunner::RunAll(args);
65119

src/bench/crypto_hash.cpp

+2-6
Original file line numberDiff line numberDiff line change
@@ -249,9 +249,9 @@ static void MuHash(benchmark::Bench& bench)
249249
{
250250
MuHash3072 acc;
251251
unsigned char key[32] = {0};
252-
int i = 0;
252+
uint32_t i = 0;
253253
bench.run([&] {
254-
key[0] = ++i;
254+
key[0] = ++i & 0xFF;
255255
acc *= MuHash3072(key);
256256
});
257257
}
@@ -273,10 +273,6 @@ static void MuHashDiv(benchmark::Bench& bench)
273273
FastRandomContext rng(true);
274274
MuHash3072 muhash{rng.randbytes(32)};
275275

276-
for (size_t i = 0; i < bench.epochIterations(); ++i) {
277-
acc *= muhash;
278-
}
279-
280276
bench.run([&] {
281277
acc /= muhash;
282278
});

src/bench/peer_eviction.cpp

+4-6
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,17 @@ static void EvictionProtectionCommon(
2020
{
2121
using Candidates = std::vector<NodeEvictionCandidate>;
2222
FastRandomContext random_context{true};
23-
bench.warmup(100).epochIterations(1100);
2423

2524
Candidates candidates{GetRandomNodeEvictionCandidates(num_candidates, random_context)};
2625
for (auto& c : candidates) {
2726
candidate_setup_fn(c);
2827
}
2928

30-
std::vector<Candidates> copies{
31-
static_cast<size_t>(bench.epochs() * bench.epochIterations()), candidates};
32-
size_t i{0};
29+
3330
bench.run([&] {
34-
ProtectEvictionCandidatesByRatio(copies.at(i));
35-
++i;
31+
// creating a copy has an overhead of about 3%, so it does not influence the benchmark results much.
32+
auto copy = candidates;
33+
ProtectEvictionCandidatesByRatio(copy);
3634
});
3735
}
3836

src/bench/rollingbloom.cpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@ static void RollingBloom(benchmark::Bench& bench)
1313
uint32_t count = 0;
1414
bench.run([&] {
1515
count++;
16-
data[0] = count;
17-
data[1] = count >> 8;
18-
data[2] = count >> 16;
19-
data[3] = count >> 24;
16+
data[0] = count & 0xFF;
17+
data[1] = (count >> 8) & 0xFF;
18+
data[2] = (count >> 16) & 0xFF;
19+
data[3] = (count >> 24) & 0xFF;
2020
filter.insert(data);
2121

22-
data[0] = count >> 24;
23-
data[1] = count >> 16;
24-
data[2] = count >> 8;
25-
data[3] = count;
22+
data[0] = (count >> 24) & 0xFF;
23+
data[1] = (count >> 16) & 0xFF;
24+
data[2] = (count >> 8) & 0xFF;
25+
data[3] = count & 0xFF;
2626
filter.contains(data);
2727
});
2828
}

0 commit comments

Comments
 (0)