Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add BoolArrayDimIndexer #114

Merged
merged 1 commit into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions R/atomic.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,25 @@ ensure_list <- function(selection) {
}
return(as.list(selection))
}

#' Check that a value is boolean with length 1
#' @keywords internal
is_bool <- function(s){
if(is.logical(s) && length(s) == 1)
return(TRUE)
return(FALSE)
}
#' Check that a value is a vector of one or more boolean
#' @keywords internal
is_bool_vec <- function(s) {
if(is.vector(s) && !is.list(s) && is.logical(s) && length(s) > 1)
return(TRUE)
return(FALSE)
}
#' Check that a value is a list of one or more boolean
#' @keywords internal
is_bool_list <- function(s) {
if(is.list(s) && is_bool_vec(unlist(s)))
return(TRUE)
return(FALSE)
}
16 changes: 13 additions & 3 deletions R/filters.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# a:b => slice(a,b)
# seq(from, to, by) => slice(start, stop, step) ? for now indices of seq(from, to, by) are passed to get_orthogonal_selection (check below, TODO)
# c(a,b,c) => c(a,b,c), combine elements are passed as indices
# c(a,b,c) => c(a,b,c), combine elements are passed as indices or boolean
# empty dimension => return everything
#
manage_filters <- function(filters) {
Expand All @@ -13,7 +13,8 @@ manage_filters <- function(filters) {
if(x == "") {
return(NULL)
} else {
stop("Unsupported filter '", as.character(x), "' supplied")
# TODO: is eval() always the solution here ?, e.g. ind <- c(TRUE, FALSE, TRUE, TRUE) then eval(ind) if typeof(ind) == "symbol"
return(eval(x))
}
} else if(typeof(x) == "double") {
# Return single value for dimension
Expand Down Expand Up @@ -41,7 +42,16 @@ manage_filters <- function(filters) {
check_func <- sapply(x, function(y) {
!is.function(eval(y))
})
return(int(floor(unlist(x[check_func]))))
x <- x[check_func]

# correct for integer or boolean vectors
x <- sapply(x, function(y) {
if(is.numeric(y))
return(int(floor(y)))
if(is_bool(y))
return(y)
})
return(x)
} else {
stop("Unsupported filter '", as.character(x), "' supplied")
}
Expand Down
144 changes: 138 additions & 6 deletions R/indexing.R
Original file line number Diff line number Diff line change
Expand Up @@ -401,16 +401,14 @@ OrthogonalIndexer <- R6::R6Class("OrthogonalIndexer",
dim_sel <- zb_slice(NA)
}

# TODO: for now, normalize_list_selection will get SliceDimIndexer for single integer
if(length(dim_sel) == 1) {
if(is_integer(dim_sel)) {
dim_indexer <- IntDimIndexer$new(dim_sel, dim_len, dim_chunk_len)
} else if(is_slice(dim_sel)) {
dim_indexer <- SliceDimIndexer$new(dim_sel, dim_len, dim_chunk_len)
} else if(length(dim_sel) > 1) {
} else if(is_bool_vec(dim_sel)) {
dim_indexer <- BoolArrayDimIndexer$new(dim_sel, dim_len, dim_chunk_len)
} else if(is_integer_vec(dim_sel)) {
dim_indexer <- IntArrayDimIndexer$new(dim_sel, dim_len, dim_chunk_len)
# TODO: implement BoolArrayDimIndexer and fix if condition here (is_bool_vec)
# } else if(is_bool_vec(dim_sel)) {
# dim_indexer <- BoolArrayDimIndexer$new(dim_sel, dim_len, dim_chunk_len)
} else {
stop('Unsupported selection item for basic indexing, expected integer, slice, vector of integer or boolean')
}
Expand Down Expand Up @@ -663,3 +661,137 @@ IntArrayDimIndexer <- R6::R6Class("IntArrayDimIndexer",
}
)
)

# Reference: https://github.com/zarr-developers/zarr-python/blob/4a3bbf1cbb89e90ea9ca4d6d75dae23ed4b957c9/src/zarr/core/indexing.py#L581
#' The Zarr BoolArrayDimIndexer class.
#' @title BoolArrayDimIndexer Class
#' @docType class
#' @description
#' TODO
#' @rdname BoolArrayDimIndexer
#' @keywords internal
BoolArrayDimIndexer <- R6::R6Class("BoolArrayDimIndexer",
inherit = DimIndexer,
public = list(
#' @field dim_sel selection on dimension
#' @keywords internal
dim_sel = NULL,
#' @field dim_len dimension length
#' @keywords internal
dim_len = NULL,
#' @field dim_chunk_len dimension chunk length
#' @keywords internal
dim_chunk_len = NULL,
#' @field num_chunks number of chunks
#' @keywords internal
num_chunks = NULL,
#' @field chunk_nitems number of items per chunk
#' @keywords internal
chunk_nitems = NULL,
#' @field chunk_nitems_cumsum offsets into the output array
#' @keywords internal
chunk_nitems_cumsum = NULL,
#' @field dim_chunk_ixs chunks that should be visited
#' @keywords internal
dim_chunk_ixs = NULL,
#' @field dim_out_sel TODO
#' @keywords internal
dim_out_sel = NULL,
#' @description
#' Create a new BoolArrayDimIndexer instance.
#' @param dim_sel integer dimension selection
#' @param dim_len integer dimension length
#' @param dim_chunk_len integer dimension chunk length
#' @return A `BoolArrayDimIndexer` instance.
initialize = function(dim_sel, dim_len, dim_chunk_len) {

# check selection length
if(length(dim_sel) != dim_len)
stop(paste0("IndexError: Boolean vector has the wrong length for dimension; expected ", dim_len, ", got ", length(dim_sel)))

# precompute number of selected items for each chunk
num_chunks <- ceiling(dim_len / dim_chunk_len)
chunk_nitems <- rep(0, num_chunks)
for(dim_chunk_ix in 1:num_chunks){
dim_offset <- ((dim_chunk_ix - 1) * dim_chunk_len) + 1
# START R-SPECIFIC
dim_offset_limits <- dim_offset+dim_chunk_len-1
dim_offset_limits <- ifelse(dim_offset_limits > length(dim_sel), length(dim_sel), dim_offset_limits)
# STOP R-SPECIFIC

chunk_nitems[dim_chunk_ix] <- sum(dim_sel[dim_offset:dim_offset_limits] != 0)
}

# compute offsets into the output array
chunk_nitems_cumsum <- cumsum(chunk_nitems)
num_items <- rev(chunk_nitems_cumsum)[1]

# find chunks that we need to visit
dim_chunk_ixs <- which(chunk_nitems != 0)

# store attributes
self$dim_sel <- dim_sel
self$dim_len <- dim_len
self$dim_chunk_len <- dim_chunk_len
self$num_chunks <- num_chunks
self$chunk_nitems <- chunk_nitems
self$chunk_nitems_cumsum <- chunk_nitems_cumsum
self$num_items <- num_items
self$dim_chunk_ixs <- dim_chunk_ixs
},
#' @description
#' An iterator over the dimensions of an array
#' @return A list of ChunkProjection objects
iter = function() {

# Iterate over chunks in range
result <- list()
for(dim_chunk_ix in self$dim_chunk_ixs) {

# find region in chunk
dim_offset <- ((dim_chunk_ix - 1) * self$dim_chunk_len) + 1
dim_chunk_sel <- self$dim_sel[dim_offset:(dim_offset+self$dim_chunk_len-1)]

# pad out if final chunk
if(length(dim_chunk_sel) < length(self$dim_chunk_len)){
tmp <- rep(FALSE, self$dim_chunk_len)
tmp[1:length(dim_chunk_sel)] <- dim_chunk_sel
dim_chunk_sel <- tmp
}

# find region in output
if (dim_chunk_ix == 1) {
start <- 0
} else {
start <- self$chunk_nitems_cumsum[dim_chunk_ix - 1]
}
stop <- self$chunk_nitems_cumsum[dim_chunk_ix]

# START R-SPECIFIC
if(start == stop) {
stop <- stop + 1
}
# END R-SPECIFIC

# get out selection
dim_out_sel <- seq(start, stop - 1)

# make boolean as integer, specific to pizzarr
dim_chunk_sel <- which(dim_chunk_sel) - 1

# START R-SPECIFIC
dim_chunk_ix <- dim_chunk_ix - 1
# END R-SPECIFIC

result <- append(result, ChunkDimProjection$new(
dim_chunk_ix,
dim_chunk_sel,
dim_out_sel
))

}

return(result)
}
)
)
2 changes: 2 additions & 0 deletions R/normalize.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ normalize_list_selection <- function(selection, shape, convert_integer_selection
}
} else if(is_integer_vec(dim_sel)) {
selection[[i]] <- sapply(dim_sel, normalize_integer_selection, dim_len = shape[i])
} else if(is_bool_vec(dim_sel)) {
selection[[i]] <- selection[[i]]
} else if(!is.null(dim_sel) && !is.environment(dim_sel) &&
(is.na(dim_sel) || dim_sel == ":")) {
selection[[i]] <- zb_slice(NA, NA, 1)
Expand Down
100 changes: 100 additions & 0 deletions man/BoolArrayDimIndexer.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions man/is_bool.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions man/is_bool_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions man/is_bool_vec.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading