Skip to content

Commit

Permalink
Sanitize strand to use int8 dtype and update checks (#60)
Browse files Browse the repository at this point in the history
Also fixes issues for window functions where strand is None.
  • Loading branch information
jkanche authored Jan 5, 2024
1 parent b2a4681 commit afd8a24
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 12 deletions.
6 changes: 3 additions & 3 deletions src/genomicranges/GenomicRanges.py
Original file line number Diff line number Diff line change
Expand Up @@ -2532,7 +2532,7 @@ def tile_by_range(
)

seqnames.extend([val.seqnames[0]] * len(all_intervals))
strand.extend([val.strand[0]] * len(all_intervals))
strand.extend([int(val.strand[0])] * len(all_intervals))
starts.extend([x[0] for x in all_intervals])
widths.extend(x[1] for x in all_intervals)

Expand Down Expand Up @@ -2594,7 +2594,7 @@ def tile(
)

seqnames.extend([val.seqnames[0]] * len(all_intervals))
strand.extend([val.strand[0]] * len(all_intervals))
strand.extend([int(val.strand[0])] * len(all_intervals))
starts.extend([x[0] for x in all_intervals])
widths.extend(x[1] for x in all_intervals)

Expand Down Expand Up @@ -2636,7 +2636,7 @@ def sliding_windows(self, width: int, step: int = 1) -> "GenomicRanges":
)

seqnames.extend([val.seqnames[0]] * len(all_intervals))
strand.extend([val.strand[0]] * len(all_intervals))
strand.extend([int(val.strand[0])] * len(all_intervals))
starts.extend([x[0] for x in all_intervals])
widths.extend(x[1] for x in all_intervals)

Expand Down
23 changes: 14 additions & 9 deletions src/genomicranges/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Sequence, Union, Optional, Tuple
from typing import List, Optional, Sequence, Tuple, Union

import biocutils as ut
import numpy as np
Expand Down Expand Up @@ -41,18 +41,23 @@ def sanitize_strand_vector(
raise ValueError(
"'strand' must only contain values 1 (forward strand), -1 (reverse strand) or 0 (reverse strand)."
)
return strand
return strand.astype(np.int8)

if ut.is_list_of_type(strand, str):
if not set(strand).issubset(["+", "-", "*"]):
raise ValueError("Values in 'strand' must be either +, - or *.")
return np.array([STRAND_MAP[x] for x in strand])
elif ut.is_list_of_type(strand, int):
return np.array(strand)
else:
TypeError(
"'strand' must be either a numpy vector, a list of integers or strings representing strand."
)
return np.array([STRAND_MAP[x] for x in strand], dtype=np.int8)

if ut.is_list_of_type(strand, (int, float, np.int_)):
if not set(strand).issubset([1, 0, -1]):
raise ValueError(
"'strand' must only contain values 1 (forward strand), -1 (reverse strand) or 0 (reverse strand)."
)
return np.array(strand, dtype=np.int8)

raise TypeError(
"'strand' must be either a numpy vector, a list of integers or strings representing strand."
)


def _sanitize_strand_search_ops(query_strand, subject_strand):
Expand Down

0 comments on commit afd8a24

Please sign in to comment.