-
Notifications
You must be signed in to change notification settings - Fork 442
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
bcf_sr_seek() still fails #1362
Comments
I think the problem here is that
At least for your example plugin, that seems to be enough to reset the reader and allow it to start again at the desired position. If that still isn't enough to fix the problem, then you could, of course, fall back to closing the reader and opening a new one for the second pass through. |
@daviesrob this seems to work only once and then weird stuff happens. My application segfaults when using the Here a more complicated example with two contigs:
The following plugin tries to read twice the first contig and twice the second contig while using #include <htslib/synced_bcf_reader.h>
#include "bcftools.h"
const char *about(void) { return "\n"; }
int run(int argc, char *argv[]) {
bcf_srs_t *sr = bcf_sr_init();
bcf_sr_set_opt(sr, BCF_SR_REQUIRE_IDX);
if (!bcf_sr_add_reader(sr, "A.vcf.gz")) error("Failed to open %s: %s\n", "A.vcf.gz", bcf_sr_strerror(sr->errnum));
bcf_hdr_t *hdr = bcf_sr_get_header(sr, 0);
int i;
// read first contig
bcf_sr_seek(sr, NULL, 0);
bcf_sr_seek(sr, bcf_hdr_id2name(hdr, 0), 0);
for (i = 0; bcf_sr_next_line(sr); i++) {
if (bcf_sr_get_line(sr, 0)->rid != 0) {
fprintf(stderr, "round1: observed contig %s so stopping\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid));
break;
}
fprintf(stderr, "round1: chr=%s, pos=%ld\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid), bcf_sr_get_line(sr, 0)->pos + 1);
}
// read first contig a second time
bcf_sr_seek(sr, NULL, 0);
bcf_sr_seek(sr, bcf_hdr_id2name(hdr, 0), 0);
for (i = 0; bcf_sr_next_line(sr); i++) {
if (bcf_sr_get_line(sr, 0)->rid != 0) {
fprintf(stderr, "round2: observed contig %s so stopping\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid));
break;
}
fprintf(stderr, "round2: chr=%s, pos=%ld\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid), bcf_sr_get_line(sr, 0)->pos + 1);
}
// read second contig
bcf_sr_seek(sr, NULL, 0);
bcf_sr_seek(sr, bcf_hdr_id2name(hdr, 1), 0);
for (i = 0; bcf_sr_next_line(sr); i++) {
if (bcf_sr_get_line(sr, 0)->rid != 1) {
fprintf(stderr, "round3: observed contig %s so stopping\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid));
break;
}
fprintf(stderr, "round3: chr=%s, pos=%ld\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid), bcf_sr_get_line(sr, 0)->pos + 1);
}
// read second contig a second time
bcf_sr_seek(sr, NULL, 0);
bcf_sr_seek(sr, bcf_hdr_id2name(hdr, 1), 0);
for (i = 0; bcf_sr_next_line(sr); i++) {
if (bcf_sr_get_line(sr, 0)->rid != 1) {
fprintf(stderr, "round4: observed contig %s so stopping\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid));
break;
}
fprintf(stderr, "round4: chr=%s, pos=%ld\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid), bcf_sr_get_line(sr, 0)->pos + 1);
}
bcf_sr_destroy(sr);
return 0;
} The output is as follows:
When trying to seek on the second contig for the second time, This issue could arise with an application that is trying for example to phase all chromosomes one by one and needs to read them once to import the genotypes in memory and a second time to write the updated output VCFs with the phased genotypes. And it used to work fine with HTSlib 1.13 |
Hmm, it looks like it won't be quite so easy to work around then. If the problem appeared between 1.13 and 1.14, then it must have been in #1327 as that was the only change that touched Meanwhile, I think closing and reopening the reader may be your best solution. This should be alright as long as you don't do it too often. Or for your phasing example, you could open two readers on the same file - using the first to read in the genotypes, then the second to make the updates. I tried rewriting your test plugin to close and reopen the files, and it does work correctly: #include <htslib/synced_bcf_reader.h>
#include "bcftools.h"
const char *about(void) { return "\n"; }
static int get_reader(bcf_srs_t **sr_out, bcf_hdr_t **hdr_out) {
bcf_srs_t *sr = bcf_sr_init();
bcf_hdr_t *hdr = NULL;
int err;
if (!sr)
return -1;
bcf_sr_set_opt(sr, BCF_SR_REQUIRE_IDX);
if (!bcf_sr_add_reader(sr, "A.vcf.gz"))
goto fail;
hdr = bcf_sr_get_header(sr, 0);
if (!hdr)
goto fail;
*sr_out = sr;
*hdr_out = hdr;
return 0;
fail:
err = sr->errnum;
bcf_sr_destroy(sr);
return err ? err : -1;
}
int run(int argc, char *argv[]) {
bcf_srs_t *sr = NULL;
bcf_hdr_t *hdr = NULL;
int i, err;
// read first contig
err = get_reader(&sr, &hdr);
if (err)
error("Failed to open %s: %s\n", "A.vcf.gz", bcf_sr_strerror(sr->errnum));
bcf_sr_seek(sr, bcf_hdr_id2name(hdr, 0), 0);
for (i = 0; bcf_sr_next_line(sr); i++) {
if (bcf_sr_get_line(sr, 0)->rid != 0) {
fprintf(stderr, "round1: observed contig %s so stopping\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid));
break;
}
fprintf(stderr, "round1: chr=%s, pos=%ld\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid), bcf_sr_get_line(sr, 0)->pos + 1);
}
bcf_sr_destroy(sr);
// read first contig a second time
err = get_reader(&sr, &hdr);
if (err)
error("Failed to open %s: %s\n", "A.vcf.gz", bcf_sr_strerror(sr->errnum));
bcf_sr_seek(sr, bcf_hdr_id2name(hdr, 0), 0);
for (i = 0; bcf_sr_next_line(sr); i++) {
if (bcf_sr_get_line(sr, 0)->rid != 0) {
fprintf(stderr, "round2: observed contig %s so stopping\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid));
break;
}
fprintf(stderr, "round2: chr=%s, pos=%ld\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid), bcf_sr_get_line(sr, 0)->pos + 1);
}
bcf_sr_destroy(sr);
// read second contig
err = get_reader(&sr, &hdr);
if (err)
error("Failed to open %s: %s\n", "A.vcf.gz", bcf_sr_strerror(sr->errnum));
bcf_sr_seek(sr, bcf_hdr_id2name(hdr, 1), 0);
for (i = 0; bcf_sr_next_line(sr); i++) {
if (bcf_sr_get_line(sr, 0)->rid != 1) {
fprintf(stderr, "round3: observed contig %s so stopping\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid));
break;
}
fprintf(stderr, "round3: chr=%s, pos=%ld\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid), bcf_sr_get_line(sr, 0)->pos + 1);
}
bcf_sr_destroy(sr);
// read second contig a second time
err = get_reader(&sr, &hdr);
if (err)
error("Failed to open %s: %s\n", "A.vcf.gz", bcf_sr_strerror(sr->errnum));
bcf_sr_seek(sr, bcf_hdr_id2name(hdr, 1), 0);
for (i = 0; bcf_sr_next_line(sr); i++) {
if (bcf_sr_get_line(sr, 0)->rid != 1) {
fprintf(stderr, "round4: observed contig %s so stopping\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid));
break;
}
fprintf(stderr, "round4: chr=%s, pos=%ld\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid), bcf_sr_get_line(sr, 0)->pos + 1);
}
bcf_sr_destroy(sr);
return 0;
}
|
It seems like using #include <htslib/synced_bcf_reader.h>
#include "bcftools.h"
const char *about(void) { return "\n"; }
int run(int argc, char *argv[]) {
bcf_srs_t *sr = bcf_sr_init();
bcf_sr_set_opt(sr, BCF_SR_REQUIRE_IDX);
if (!bcf_sr_add_reader(sr, "A.vcf.gz")) error("Failed to open %s: %s\n", "A.vcf.gz", bcf_sr_strerror(sr->errnum));
bcf_hdr_t *hdr = bcf_sr_get_header(sr, 0);
int i;
// read first contig
bcf_sr_seek(sr, bcf_hdr_id2name(hdr, 0), 0);
bcf_sr_regions_seek(sr->regions, bcf_hdr_id2name(hdr, 0));
for (i = 0; bcf_sr_next_line(sr); i++) {
if (bcf_sr_get_line(sr, 0)->rid != 0) {
fprintf(stderr, "round1: observed contig %s so stopping\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid));
break;
}
fprintf(stderr, "round1: chr=%s, pos=%ld\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid), bcf_sr_get_line(sr, 0)->pos + 1);
}
// read first contig a second time
bcf_sr_seek(sr, bcf_hdr_id2name(hdr, 0), 0);
bcf_sr_regions_seek(sr->regions, bcf_hdr_id2name(hdr, 0));
for (i = 0; bcf_sr_next_line(sr); i++) {
if (bcf_sr_get_line(sr, 0)->rid != 0) {
fprintf(stderr, "round2: observed contig %s so stopping\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid));
break;
}
fprintf(stderr, "round2: chr=%s, pos=%ld\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid), bcf_sr_get_line(sr, 0)->pos + 1);
}
// read second contig
bcf_sr_seek(sr, bcf_hdr_id2name(hdr, 1), 0);
bcf_sr_regions_seek(sr->regions, bcf_hdr_id2name(hdr, 1));
for (i = 0; bcf_sr_next_line(sr); i++) {
if (bcf_sr_get_line(sr, 0)->rid != 1) {
fprintf(stderr, "round3: observed contig %s so stopping\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid));
break;
}
fprintf(stderr, "round3: chr=%s, pos=%ld\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid), bcf_sr_get_line(sr, 0)->pos + 1);
}
// read second contig a second time
bcf_sr_seek(sr, bcf_hdr_id2name(hdr, 1), 0);
bcf_sr_regions_seek(sr->regions, bcf_hdr_id2name(hdr, 1));
for (i = 0; bcf_sr_next_line(sr); i++) {
if (bcf_sr_get_line(sr, 0)->rid != 1) {
fprintf(stderr, "round4: observed contig %s so stopping\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid));
break;
}
fprintf(stderr, "round4: chr=%s, pos=%ld\n", bcf_hdr_id2name(hdr, bcf_sr_get_line(sr, 0)->rid), bcf_sr_get_line(sr, 0)->pos + 1);
}
bcf_sr_destroy(sr);
return 0;
} Gives the following output:
And by using the full:
The previous behavior is restored:
|
Repeated seeks with implicitly created region list wouldn't initialize all internal structures to the original clean state, as demonstrated by the issue samtools#1362, resolved by this commit
Thanks for reporting the problem, this should be solved by the pull request #1363 |
Repeated seeks with implicitly created region list wouldn't initialize all internal structures to the original clean state, as demonstrated by the issue samtools#1362, resolved by this commit
Repeated seeks with implicitly created region list wouldn't initialize all internal structures to the original clean state, as demonstrated by the issue samtools#1362, resolved by this commit
Repeated seeks with implicitly created region list wouldn't initialize all internal structures to the original clean state, as demonstrated by the issue #1362, resolved by this commit
This should now have been fixed by #1363. |
This issue seems related to issue #691 but I was not able to understand the issue in the source code:
Say you have a simple VCF:
And a
test.c
BCFtools plugin with the following code:The plugin seeks at the beginning of contig
chr1
two times and each time it tries to read all the records.When running the plugin with HTSlib 1.13:
When running the plugin with HTSlib 1.14:
The second time contig
chr1
does not produce any records.The text was updated successfully, but these errors were encountered: