Skip to content

Commit

Permalink
Speed up kputll.
Browse files Browse the repository at this point in the history
The kputuw function is considerably faster as it encodes 2 digits at a
time and also utilises __builtin_clz.  This changes kputll to use the
same 2 digits at a time trick.  I have a __builtin_clzll variant too,
but with longer numbers it's not the main bottleneck and we fall back
to kputuw for small numbers.  This avoids complicating the code with
builtin checks and alternate versions.

An alternative, purely for sam_format1_append would be something like:

    static inline int kputll_fast(long long c, kstring_t *s) {
        return c <= INT_MAX && c >= INT_MIN ? kputw(c, s) : kputll(c, s);
    }
    #define kputll kputll_fast

This works as BAM/CRAM only support 32-bit numbers for POS, PNEXT and
TLEN anyway, so ll vs w is an irrelevant distinction.  However I chose
to modify the header file so it fixes other callers.

Overall compressed BAM to uncompressed SAM conversion is about 5%
quicker (tested on 10 million short-read seqs; it'll be minimal on
long seqs).  This includes decode time and other functions too.  The
sam_format1_append only component of that is about 15-25% quicker
depending on compiler and version.
  • Loading branch information
jkbonfield authored and whitwham committed Jul 16, 2024
1 parent 1187fa8 commit 19a27e9
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 11 deletions.
68 changes: 57 additions & 11 deletions htslib/kstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,17 +375,63 @@ static inline int kputw(int c, kstring_t *s)

static inline int kputll(long long c, kstring_t *s)
{
char buf[32];
int i, l = 0;
unsigned long long x = c;
if (c < 0) x = -x;
do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
if (c < 0) buf[l++] = '-';
if (ks_resize(s, s->l + l + 2) < 0)
return EOF;
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
s->s[s->l] = 0;
return 0;
// Worst case expansion. One check reduces function size
// and aids inlining chance. Memory overhead is minimal.
if (ks_resize(s, s->l + 23) < 0)
return EOF;

unsigned long long x = c;
if (c < 0) {
x = -x;
s->s[s->l++] = '-';
}

if (x <= UINT32_MAX)
return kputuw(x, s);

static const char kputull_dig2r[] =
"00010203040506070809"
"10111213141516171819"
"20212223242526272829"
"30313233343536373839"
"40414243444546474849"
"50515253545556575859"
"60616263646566676869"
"70717273747576777879"
"80818283848586878889"
"90919293949596979899";
unsigned int l, j;
char *cp;

// Find out how long the number is (could consider clzll)
uint64_t m = 1;
l = 0;
if (sizeof(long long)==sizeof(uint64_t) && x >= 10000000000000000000ULL) {
// avoids overflow below
l = 20;
} else {
do {
l++;
m *= 10;
} while (x >= m);
}

// Add digits two at a time
j = l;
cp = s->s + s->l;
while (x >= 10) {
const char *d = &kputull_dig2r[2*(x%100)];
x /= 100;
memcpy(&cp[j-=2], d, 2);
}

// Last one (if necessary). We know that x < 10 by now.
if (j == 1)
cp[0] = x + '0';

s->l += l;
s->s[s->l] = 0;
return 0;
}

static inline int kputl(long c, kstring_t *s) {
Expand Down
81 changes: 81 additions & 0 deletions test/test_kstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,84 @@ static int test_kputw(int64_t start, int64_t end) {
return 0;
}

static int test_kputll_from_to(kstring_t *str, long long s, long long e) {
long long i = s;

for (;;) {
str->l = 0;
memset(str->s, 0xff, str->m);
if (kputll(i, str) < 0 || !str->s) {
perror("kputll");
return -1;
}
if (str->l >= str->m || str->s[str->l] != '\0') {
fprintf(stderr, "No NUL termination on string from kputll\n");
return -1;
}
if (i != strtoll(str->s, NULL, 10)) {
fprintf(stderr,
"kputll wrote the wrong value, expected %lld, got %s\n",
i, str->s);
return -1;
}
if (i >= e) break;
i++;
}
return 0;
}

static int test_kputll(long long start, long long end) {
kstring_t str = { 0, 0, NULL };
unsigned long long val;

str.s = malloc(2);
if (!str.s) {
perror("malloc");
return -1;
}
str.m = 2;

for (val = 1; val < INT64_MAX-5; val *= 10) {
if (test_kputll_from_to(&str, val >= 5 ? val - 5 : val, val) < 0) {
free(ks_release(&str));
return -1;
}
}

for (val = 1; val < INT64_MAX-5; val *= 10) {
long long valm = -val;
if (test_kputll_from_to(&str, valm >= 5 ? valm - 5 : valm, valm) < 0) {
free(ks_release(&str));
return -1;
}
}

if (test_kputll_from_to(&str, INT64_MAX - 5, INT64_MAX) < 0) {
free(ks_release(&str));
return -1;
}

if (test_kputll_from_to(&str, INT64_MIN, INT64_MIN + 5) < 0) {
free(ks_release(&str));
return -1;
}

str.m = 1; // Force a resize
int64_t start2 = (int64_t)start; // no larger on our platforms
int64_t end2 = (int64_t)end;
clamp(&start2, INT64_MIN, INT64_MAX);
clamp(&end2, INT64_MIN, INT64_MAX);

if (test_kputll_from_to(&str, start, end) < 0) {
free(ks_release(&str));
return -1;
}

free(ks_release(&str));

return 0;
}

// callback used by test_kgetline
static char *mock_fgets(char *str, int num, void *p) {
int *mock_state = (int*)p;
Expand Down Expand Up @@ -413,6 +491,9 @@ int main(int argc, char **argv) {
if (!test || strcmp(test, "kputw") == 0)
if (test_kputw(start, end) != 0) res = EXIT_FAILURE;

if (!test || strcmp(test, "kputll") == 0)
if (test_kputll(start, end) != 0) res = EXIT_FAILURE;

if (!test || strcmp(test, "kgetline") == 0)
if (test_kgetline() != 0) res = EXIT_FAILURE;

Expand Down

0 comments on commit 19a27e9

Please sign in to comment.