@@ -358,6 +358,18 @@ BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3);
358
358
return subc (x, y).carry ;
359
359
}
360
360
361
+ [[gnu::noinline]] static bool lt_split (const uint256& x, const uint256& y) noexcept
362
+ {
363
+ auto xp = uint128{x[2 ], x[3 ]};
364
+ auto yp = uint128{y[2 ], y[3 ]};
365
+ if (xp == yp)
366
+ {
367
+ xp = uint128{x[0 ], x[1 ]};
368
+ yp = uint128{y[0 ], y[1 ]};
369
+ }
370
+ return xp < yp;
371
+ }
372
+
361
373
[[gnu::noinline]] static bool lt_wordcmp (const uint256& x, const uint256& y) noexcept
362
374
{
363
375
for (size_t i = 3 ; i >= 1 ; --i)
@@ -370,6 +382,34 @@ BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3);
370
382
return x[0 ] < y[0 ];
371
383
}
372
384
385
+ // / Find the first different word and compares it.
386
+ // / Proposed in https://github.com/chfast/intx/pull/269.
387
+ [[gnu::noinline]] static bool lt_ne (const uint256& x, const uint256& y) noexcept
388
+ {
389
+ for (auto i = uint256::num_words - 1 ; i > 0 ; --i)
390
+ {
391
+ if (x[i] != y[i])
392
+ return x[i] < y[i];
393
+ }
394
+ return x[0 ] < y[0 ];
395
+ }
396
+
397
+ // / A modification of lt_ne(). Smaller code, saves one cmp instruction.
398
+ [[gnu::noinline]] static bool lt_ne2 (const uint256& x, const uint256& y) noexcept
399
+ {
400
+ auto a = x[3 ];
401
+ auto b = y[3 ];
402
+ for (auto i = uint256::num_words - 1 ; i > 0 ;)
403
+ {
404
+ if (a != b)
405
+ break ;
406
+ --i;
407
+ a = x[i];
408
+ b = y[i];
409
+ }
410
+ return a < b;
411
+ }
412
+
373
413
[[gnu::noinline]] static bool lt_halves (const uint256& x, const uint256& y) noexcept
374
414
{
375
415
const auto xhi = uint128{x[2 ], x[3 ]};
@@ -426,7 +466,10 @@ static void compare(benchmark::State& state)
426
466
}
427
467
BENCHMARK_TEMPLATE (compare, lt_public)->DenseRange(0 , 256 , 64 );
428
468
BENCHMARK_TEMPLATE (compare, lt_sub)->DenseRange(0 , 256 , 64 );
469
+ BENCHMARK_TEMPLATE (compare, lt_split)->DenseRange(0 , 256 , 64 );
429
470
BENCHMARK_TEMPLATE (compare, lt_wordcmp)->DenseRange(0 , 256 , 64 );
471
+ BENCHMARK_TEMPLATE (compare, lt_ne)->DenseRange(0 , 256 , 64 );
472
+ BENCHMARK_TEMPLATE (compare, lt_ne2)->DenseRange(0 , 256 , 64 );
430
473
BENCHMARK_TEMPLATE (compare, lt_halves)->DenseRange(0 , 256 , 64 );
431
474
#if INTX_HAS_EXTINT
432
475
BENCHMARK_TEMPLATE (compare, lt_llvm)->DenseRange(0 , 256 , 64 );
0 commit comments