Skip to content

Commit

Permalink
Fix @pg linking when records make a loop
Browse files Browse the repository at this point in the history
If all the @pg records passed to sam_hdr_link_pg() form a single
PP loop, all entries in the sam_hrecs_t::pg_end array it builds
get set to -1, indicating that there are no chain start points.
These entries are then removed to make the final list, but
due to a bug in handling the case where there are no PP links
the number of entries was incorrectly set to 1 instead of 0. This
could lead to an out-of-bounds read in sam_hdr_add_pg() when
linking new @pg entries to the existing ones.

Fix by ensuring that only valid end points are returned in the
sam_hrecs_t::pg_end array, and the length is set to zero if
no ends are detected due to a loop.

Adds a warning if a @pg record is found with a PP link to itself.
Detecting longer loops is left for future work.  Fixes another
warning which incorrectly said 'SN' instead of 'ID' in its message.

Adds an assert() in sam_hdr_add_pg() to catch any other cases of
the out-of-bounds read.

Adds tests for loopy @pg records.

Thanks to Octavio Galland for the bug report.
  • Loading branch information
daviesrob authored and whitwham committed Nov 23, 2023
1 parent 2140d03 commit db16b32
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 8 deletions.
28 changes: 20 additions & 8 deletions header.c
Original file line number Diff line number Diff line change
Expand Up @@ -2139,23 +2139,34 @@ static int sam_hdr_link_pg(sam_hdr_t *bh) {
k = kh_get(m_s2i, hrecs->pg_hash, tag->str+3);

if (k == kh_end(hrecs->pg_hash)) {
hts_log_warning("PG line with PN:%s has a PP link to missing program '%s'",
hts_log_warning("PG line with ID:%s has a PP link to missing program '%s'",
hrecs->pg[i].name, tag->str+3);
continue;
}

hrecs->pg[i].prev_id = hrecs->pg[kh_val(hrecs->pg_hash, k)].id;
hrecs->pg_end[kh_val(hrecs->pg_hash, k)] = -1;
chain_size[i] = chain_size[kh_val(hrecs->pg_hash, k)]+1;
int pp_idx = kh_val(hrecs->pg_hash, k);
if (pp_idx == i) {
hts_log_warning("PG line with ID:%s has a PP link to itself",
hrecs->pg[i].name);
continue;
}

hrecs->pg[i].prev_id = hrecs->pg[pp_idx].id;
hrecs->pg_end[pp_idx] = -1;
chain_size[i] = chain_size[pp_idx]+1;
}

int last_end = -1;
for (i = j = 0; i < hrecs->npg; i++) {
if (hrecs->pg_end[i] != -1 && chain_size[i] > 0)
hrecs->pg_end[j++] = hrecs->pg_end[i];
if (hrecs->pg_end[i] != -1) {
last_end = hrecs->pg_end[i];
if (chain_size[i] > 0)
hrecs->pg_end[j++] = hrecs->pg_end[i];
}
}
/* Only leafs? Choose the last one! */
if (!j && hrecs->npg_end > 0) {
hrecs->pg_end[0] = hrecs->pg_end[hrecs->npg_end-1];
if (!j && hrecs->npg_end > 0 && last_end >= 0) {
hrecs->pg_end[0] = last_end;
j = 1;
}

Expand Down Expand Up @@ -2294,6 +2305,7 @@ int sam_hdr_add_pg(sam_hdr_t *bh, const char *name, ...) {
free(end);
return -1;
}
assert(end[i] >= 0 && end[i] < hrecs->npg);
va_start(args, name);
if (-1 == sam_hrecs_vadd(hrecs, "PG", args,
"ID", id,
Expand Down
81 changes: 81 additions & 0 deletions test/sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,86 @@ static void test_header_pg_lines(void) {
return;
}

// Test handling of @PG PP loops
static void test_header_pg_loops(void) {
static const char *header_texts[2] = {
// Loop to self
"data:,"
"@HD\tVN:1.5\n"
"@PG\tID:loop1\tPN:prog1\tPP:loop1\n",

// circuit
"data:,"
"@HD\tVN:1.5\n"
"@PG\tID:loop1\tPN:prog1\tPP:loop2\n"
"@PG\tID:loop2\tPN:prog2\tPP:loop1\n"
};

static const char *expected[2] = {
"@HD\tVN:1.5\n"
"@PG\tID:loop1\tPN:prog1\tPP:loop1\n"
"@PG\tID:new_prog\tPN:new_prog\tPP:loop1\n",

"@HD\tVN:1.5\n"
"@PG\tID:loop1\tPN:prog1\tPP:loop2\n"
"@PG\tID:loop2\tPN:prog2\tPP:loop1\n"
"@PG\tID:new_prog\tPN:new_prog\n"
};

int i, r;
samFile *in = NULL;
sam_hdr_t *header = NULL;
const char *text = NULL;
enum htsLogLevel old_log_level = hts_get_log_level();

// Silence header loop warning
hts_set_log_level(HTS_LOG_OFF);

for (i = 0; i < 2; i++) {
in = sam_open(header_texts[i], "r");
if (!in) {
fail("couldn't open file for PG loop test %d", i);
goto err;
}

header = sam_hdr_read(in);
if (!header) {
fail("reading header for PG loop test %d", i);
goto err;
}

r = sam_hdr_add_pg(header, "new_prog", NULL);
if (r != 0) {
fail("sam_hdr_add_pg new_prog for PG loop test %d", i);
goto err;
}

text = sam_hdr_str(header);
if (!text || strcmp(text, expected[i]) != 0) {
fail("edited header does not match expected version for PG loop test %d", i);
fprintf(stderr,
"---------- Expected:\n%s\n"
"++++++++++ Got:\n%s\n"
"====================\n",
expected[i], text);
goto err;
}
sam_hdr_destroy(header);
header = NULL;
if (in) sam_close(in);
in = NULL;
}
hts_set_log_level(old_log_level);
return;

err:
sam_hdr_destroy(header);
header = NULL;
if (in) sam_close(in);
hts_set_log_level(old_log_level);
return;
}

static void test_header_updates(void) {
static const char header_text[] =
"@HD\tVN:1.4\n"
Expand Down Expand Up @@ -2256,6 +2336,7 @@ int main(int argc, char **argv)
samrecord_layout();
use_header_api();
test_header_pg_lines();
test_header_pg_loops();
test_header_updates();
test_header_remove_lines();
test_header_ref_altnames();
Expand Down

0 comments on commit db16b32

Please sign in to comment.