Skip to content

Commit

Permalink
Use only _regions_add() when adding the list of contig names
Browse files Browse the repository at this point in the history
Don't use _regions_init_string(), which misinterprets contig names
containing colons as region specification strings. The code used
_regions_init_string() rather than _regions_add() only when needed
to allocate a new bcf_sr_regions_t structure; instead extract basic
initialisation into a new bcf_sr_regions_alloc() function, which as
a bonus checks the memory allocation. Use the new function throughout.

Fixes samtools/bcftools#2179.
  • Loading branch information
jmarshall authored and jkbonfield committed May 9, 2024
1 parent 292a35d commit 9ad8270
Showing 1 changed file with 30 additions and 11 deletions.
41 changes: 30 additions & 11 deletions synced_bcf_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ typedef struct
}
aux_t;

static bcf_sr_regions_t *bcf_sr_regions_alloc(void);
static int _regions_add(bcf_sr_regions_t *reg, const char *chr, hts_pos_t start, hts_pos_t end);
static bcf_sr_regions_t *_regions_init_string(const char *str);
static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *rec);
Expand Down Expand Up @@ -368,13 +369,22 @@ int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
if ( !files->explicit_regs && !files->streaming )
{
int n = 0, i;
const char **names = reader->tbx_idx ? tbx_seqnames(reader->tbx_idx, &n) : bcf_hdr_seqnames(reader->header, &n);
for (i=0; i<n; i++)
const char **names;

if ( !files->regions )
{
files->regions = bcf_sr_regions_alloc();
if ( !files->regions )
files->regions = _regions_init_string(names[i]);
else
_regions_add(files->regions, names[i], -1, -1);
{
hts_log_error("Cannot allocate regions data structure");
return 0;
}
}

names = reader->tbx_idx ? tbx_seqnames(reader->tbx_idx, &n) : bcf_hdr_seqnames(reader->header, &n);
for (i=0; i<n; i++)
{
_regions_add(files->regions, names[i], -1, -1);
}
free(names);
_regions_sort_and_merge(files->regions);
Expand Down Expand Up @@ -956,6 +966,17 @@ int bcf_sr_set_samples(bcf_srs_t *files, const char *fname, int is_file)
return 1;
}

// Allocate a new region list structure.
static bcf_sr_regions_t *bcf_sr_regions_alloc(void)
{
bcf_sr_regions_t *reg = (bcf_sr_regions_t *) calloc(1, sizeof(bcf_sr_regions_t));
if ( !reg ) return NULL;

reg->start = reg->end = -1;
reg->prev_start = reg->prev_end = reg->prev_seq = -1;
return reg;
}

// Add a new region into a list. On input the coordinates are 1-based, inclusive, then stored 0-based,
// inclusive. Sorting and merging step needed afterwards: qsort(..,cmp_regions) and merge_regions().
static int _regions_add(bcf_sr_regions_t *reg, const char *chr, hts_pos_t start, hts_pos_t end)
Expand Down Expand Up @@ -1037,9 +1058,8 @@ void _regions_sort_and_merge(bcf_sr_regions_t *reg)
// wouldn't learn the chromosome name.
static bcf_sr_regions_t *_regions_init_string(const char *str)
{
bcf_sr_regions_t *reg = (bcf_sr_regions_t *) calloc(1, sizeof(bcf_sr_regions_t));
reg->start = reg->end = -1;
reg->prev_start = reg->prev_end = reg->prev_seq = -1;
bcf_sr_regions_t *reg = bcf_sr_regions_alloc();
if ( !reg ) return NULL;

kstring_t tmp = {0,0,0};
const char *sp = str, *ep = str;
Expand Down Expand Up @@ -1189,9 +1209,8 @@ bcf_sr_regions_t *bcf_sr_regions_init(const char *regions, int is_file, int ichr
return reg;
}

reg = (bcf_sr_regions_t *) calloc(1, sizeof(bcf_sr_regions_t));
reg->start = reg->end = -1;
reg->prev_start = reg->prev_end = reg->prev_seq = -1;
reg = bcf_sr_regions_alloc();
if ( !reg ) return NULL;

reg->file = hts_open(regions, "rb");
if ( !reg->file )
Expand Down

0 comments on commit 9ad8270

Please sign in to comment.