From db16b32376877e062e1f7c04c908ee2c946cf420 Mon Sep 17 00:00:00 2001 From: Rob Davies Date: Thu, 16 Nov 2023 17:46:08 +0000 Subject: [PATCH] Fix @PG linking when records make a loop If all the @PG records passed to sam_hdr_link_pg() form a single PP loop, all entries in the sam_hrecs_t::pg_end array it builds get set to -1, indicating that there are no chain start points. These entries are then removed to make the final list, but due to a bug in handling the case where there are no PP links the number of entries was incorrectly set to 1 instead of 0. This could lead to an out-of-bounds read in sam_hdr_add_pg() when linking new @PG entries to the existing ones. Fix by ensuring that only valid end points are returned in the sam_hrecs_t::pg_end array, and the length is set to zero if no ends are detected due to a loop. Adds a warning if a @PG record is found with a PP link to itself. Detecting longer loops is left for future work. Fixes another warning which incorrectly said 'SN' instead of 'ID' in its message. Adds an assert() in sam_hdr_add_pg() to catch any other cases of the out-of-bounds read. Adds tests for loopy @PG records. Thanks to Octavio Galland for the bug report. --- header.c | 28 +++++++++++++------ test/sam.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 8 deletions(-) diff --git a/header.c b/header.c index 43fd52c57..5161034f4 100644 --- a/header.c +++ b/header.c @@ -2139,23 +2139,34 @@ static int sam_hdr_link_pg(sam_hdr_t *bh) { k = kh_get(m_s2i, hrecs->pg_hash, tag->str+3); if (k == kh_end(hrecs->pg_hash)) { - hts_log_warning("PG line with PN:%s has a PP link to missing program '%s'", + hts_log_warning("PG line with ID:%s has a PP link to missing program '%s'", hrecs->pg[i].name, tag->str+3); continue; } - hrecs->pg[i].prev_id = hrecs->pg[kh_val(hrecs->pg_hash, k)].id; - hrecs->pg_end[kh_val(hrecs->pg_hash, k)] = -1; - chain_size[i] = chain_size[kh_val(hrecs->pg_hash, k)]+1; + int pp_idx = kh_val(hrecs->pg_hash, k); + if (pp_idx == i) { + hts_log_warning("PG line with ID:%s has a PP link to itself", + hrecs->pg[i].name); + continue; + } + + hrecs->pg[i].prev_id = hrecs->pg[pp_idx].id; + hrecs->pg_end[pp_idx] = -1; + chain_size[i] = chain_size[pp_idx]+1; } + int last_end = -1; for (i = j = 0; i < hrecs->npg; i++) { - if (hrecs->pg_end[i] != -1 && chain_size[i] > 0) - hrecs->pg_end[j++] = hrecs->pg_end[i]; + if (hrecs->pg_end[i] != -1) { + last_end = hrecs->pg_end[i]; + if (chain_size[i] > 0) + hrecs->pg_end[j++] = hrecs->pg_end[i]; + } } /* Only leafs? Choose the last one! */ - if (!j && hrecs->npg_end > 0) { - hrecs->pg_end[0] = hrecs->pg_end[hrecs->npg_end-1]; + if (!j && hrecs->npg_end > 0 && last_end >= 0) { + hrecs->pg_end[0] = last_end; j = 1; } @@ -2294,6 +2305,7 @@ int sam_hdr_add_pg(sam_hdr_t *bh, const char *name, ...) { free(end); return -1; } + assert(end[i] >= 0 && end[i] < hrecs->npg); va_start(args, name); if (-1 == sam_hrecs_vadd(hrecs, "PG", args, "ID", id, diff --git a/test/sam.c b/test/sam.c index eb404bd65..00edaff6f 100644 --- a/test/sam.c +++ b/test/sam.c @@ -1004,6 +1004,86 @@ static void test_header_pg_lines(void) { return; } +// Test handling of @PG PP loops +static void test_header_pg_loops(void) { + static const char *header_texts[2] = { + // Loop to self + "data:," + "@HD\tVN:1.5\n" + "@PG\tID:loop1\tPN:prog1\tPP:loop1\n", + + // circuit + "data:," + "@HD\tVN:1.5\n" + "@PG\tID:loop1\tPN:prog1\tPP:loop2\n" + "@PG\tID:loop2\tPN:prog2\tPP:loop1\n" + }; + + static const char *expected[2] = { + "@HD\tVN:1.5\n" + "@PG\tID:loop1\tPN:prog1\tPP:loop1\n" + "@PG\tID:new_prog\tPN:new_prog\tPP:loop1\n", + + "@HD\tVN:1.5\n" + "@PG\tID:loop1\tPN:prog1\tPP:loop2\n" + "@PG\tID:loop2\tPN:prog2\tPP:loop1\n" + "@PG\tID:new_prog\tPN:new_prog\n" + }; + + int i, r; + samFile *in = NULL; + sam_hdr_t *header = NULL; + const char *text = NULL; + enum htsLogLevel old_log_level = hts_get_log_level(); + + // Silence header loop warning + hts_set_log_level(HTS_LOG_OFF); + + for (i = 0; i < 2; i++) { + in = sam_open(header_texts[i], "r"); + if (!in) { + fail("couldn't open file for PG loop test %d", i); + goto err; + } + + header = sam_hdr_read(in); + if (!header) { + fail("reading header for PG loop test %d", i); + goto err; + } + + r = sam_hdr_add_pg(header, "new_prog", NULL); + if (r != 0) { + fail("sam_hdr_add_pg new_prog for PG loop test %d", i); + goto err; + } + + text = sam_hdr_str(header); + if (!text || strcmp(text, expected[i]) != 0) { + fail("edited header does not match expected version for PG loop test %d", i); + fprintf(stderr, + "---------- Expected:\n%s\n" + "++++++++++ Got:\n%s\n" + "====================\n", + expected[i], text); + goto err; + } + sam_hdr_destroy(header); + header = NULL; + if (in) sam_close(in); + in = NULL; + } + hts_set_log_level(old_log_level); + return; + + err: + sam_hdr_destroy(header); + header = NULL; + if (in) sam_close(in); + hts_set_log_level(old_log_level); + return; +} + static void test_header_updates(void) { static const char header_text[] = "@HD\tVN:1.4\n" @@ -2256,6 +2336,7 @@ int main(int argc, char **argv) samrecord_layout(); use_header_api(); test_header_pg_lines(); + test_header_pg_loops(); test_header_updates(); test_header_remove_lines(); test_header_ref_altnames();