From e627f3da01d58252e86099e29967dc94090b5394 Mon Sep 17 00:00:00 2001 From: Artur-man Date: Sun, 10 Nov 2024 15:13:59 +0100 Subject: [PATCH] add BoolArrayDimIndexer for orthogonal selection #97 #104 #113 --- R/atomic.R | 22 ++++ R/filters.R | 16 ++- R/indexing.R | 144 ++++++++++++++++++++- R/normalize.R | 2 + man/BoolArrayDimIndexer.Rd | 100 ++++++++++++++ man/is_bool.Rd | 12 ++ man/is_bool_list.Rd | 12 ++ man/is_bool_vec.Rd | 12 ++ tests/testthat/test-indexing-orthogonal.R | 31 +++++ tests/testthat/test-orthogonal-selection.R | 9 ++ 10 files changed, 351 insertions(+), 9 deletions(-) create mode 100644 man/BoolArrayDimIndexer.Rd create mode 100644 man/is_bool.Rd create mode 100644 man/is_bool_list.Rd create mode 100644 man/is_bool_vec.Rd diff --git a/R/atomic.R b/R/atomic.R index c6fc68a..8ba4257 100644 --- a/R/atomic.R +++ b/R/atomic.R @@ -86,3 +86,25 @@ ensure_list <- function(selection) { } return(as.list(selection)) } + +#' Check that a value is boolean with length 1 +#' @keywords internal +is_bool <- function(s){ + if(is.logical(s) && length(s) == 1) + return(TRUE) + return(FALSE) +} +#' Check that a value is a vector of one or more boolean +#' @keywords internal +is_bool_vec <- function(s) { + if(is.vector(s) && !is.list(s) && is.logical(s) && length(s) > 1) + return(TRUE) + return(FALSE) +} +#' Check that a value is a list of one or more boolean +#' @keywords internal +is_bool_list <- function(s) { + if(is.list(s) && is_bool_vec(unlist(s))) + return(TRUE) + return(FALSE) +} \ No newline at end of file diff --git a/R/filters.R b/R/filters.R index b226980..ed594b3 100644 --- a/R/filters.R +++ b/R/filters.R @@ -2,7 +2,7 @@ # # a:b => slice(a,b) # seq(from, to, by) => slice(start, stop, step) ? for now indices of seq(from, to, by) are passed to get_orthogonal_selection (check below, TODO) -# c(a,b,c) => c(a,b,c), combine elements are passed as indices +# c(a,b,c) => c(a,b,c), combine elements are passed as indices or boolean # empty dimension => return everything # manage_filters <- function(filters) { @@ -13,7 +13,8 @@ manage_filters <- function(filters) { if(x == "") { return(NULL) } else { - stop("Unsupported filter '", as.character(x), "' supplied") + # TODO: is eval() always the solution here ?, e.g. ind <- c(TRUE, FALSE, TRUE, TRUE) then eval(ind) if typeof(ind) == "symbol" + return(eval(x)) } } else if(typeof(x) == "double") { # Return single value for dimension @@ -41,7 +42,16 @@ manage_filters <- function(filters) { check_func <- sapply(x, function(y) { !is.function(eval(y)) }) - return(int(floor(unlist(x[check_func])))) + x <- x[check_func] + + # correct for integer or boolean vectors + x <- sapply(x, function(y) { + if(is.numeric(y)) + return(int(floor(y))) + if(is_bool(y)) + return(y) + }) + return(x) } else { stop("Unsupported filter '", as.character(x), "' supplied") } diff --git a/R/indexing.R b/R/indexing.R index d0d530d..fb687b5 100644 --- a/R/indexing.R +++ b/R/indexing.R @@ -401,16 +401,14 @@ OrthogonalIndexer <- R6::R6Class("OrthogonalIndexer", dim_sel <- zb_slice(NA) } - # TODO: for now, normalize_list_selection will get SliceDimIndexer for single integer - if(length(dim_sel) == 1) { + if(is_integer(dim_sel)) { dim_indexer <- IntDimIndexer$new(dim_sel, dim_len, dim_chunk_len) } else if(is_slice(dim_sel)) { dim_indexer <- SliceDimIndexer$new(dim_sel, dim_len, dim_chunk_len) - } else if(length(dim_sel) > 1) { + } else if(is_bool_vec(dim_sel)) { + dim_indexer <- BoolArrayDimIndexer$new(dim_sel, dim_len, dim_chunk_len) + } else if(is_integer_vec(dim_sel)) { dim_indexer <- IntArrayDimIndexer$new(dim_sel, dim_len, dim_chunk_len) - # TODO: implement BoolArrayDimIndexer and fix if condition here (is_bool_vec) - # } else if(is_bool_vec(dim_sel)) { - # dim_indexer <- BoolArrayDimIndexer$new(dim_sel, dim_len, dim_chunk_len) } else { stop('Unsupported selection item for basic indexing, expected integer, slice, vector of integer or boolean') } @@ -663,3 +661,137 @@ IntArrayDimIndexer <- R6::R6Class("IntArrayDimIndexer", } ) ) + +# Reference: https://github.com/zarr-developers/zarr-python/blob/4a3bbf1cbb89e90ea9ca4d6d75dae23ed4b957c9/src/zarr/core/indexing.py#L581 +#' The Zarr BoolArrayDimIndexer class. +#' @title BoolArrayDimIndexer Class +#' @docType class +#' @description +#' TODO +#' @rdname BoolArrayDimIndexer +#' @keywords internal +BoolArrayDimIndexer <- R6::R6Class("BoolArrayDimIndexer", + inherit = DimIndexer, + public = list( + #' @field dim_sel selection on dimension + #' @keywords internal + dim_sel = NULL, + #' @field dim_len dimension length + #' @keywords internal + dim_len = NULL, + #' @field dim_chunk_len dimension chunk length + #' @keywords internal + dim_chunk_len = NULL, + #' @field num_chunks number of chunks + #' @keywords internal + num_chunks = NULL, + #' @field chunk_nitems number of items per chunk + #' @keywords internal + chunk_nitems = NULL, + #' @field chunk_nitems_cumsum offsets into the output array + #' @keywords internal + chunk_nitems_cumsum = NULL, + #' @field dim_chunk_ixs chunks that should be visited + #' @keywords internal + dim_chunk_ixs = NULL, + #' @field dim_out_sel TODO + #' @keywords internal + dim_out_sel = NULL, + #' @description + #' Create a new BoolArrayDimIndexer instance. + #' @param dim_sel integer dimension selection + #' @param dim_len integer dimension length + #' @param dim_chunk_len integer dimension chunk length + #' @return A `BoolArrayDimIndexer` instance. + initialize = function(dim_sel, dim_len, dim_chunk_len) { + + # check selection length + if(length(dim_sel) != dim_len) + stop(paste0("IndexError: Boolean vector has the wrong length for dimension; expected ", dim_len, ", got ", length(dim_sel))) + + # precompute number of selected items for each chunk + num_chunks <- ceiling(dim_len / dim_chunk_len) + chunk_nitems <- rep(0, num_chunks) + for(dim_chunk_ix in 1:num_chunks){ + dim_offset <- ((dim_chunk_ix - 1) * dim_chunk_len) + 1 + # START R-SPECIFIC + dim_offset_limits <- dim_offset+dim_chunk_len-1 + dim_offset_limits <- ifelse(dim_offset_limits > length(dim_sel), length(dim_sel), dim_offset_limits) + # STOP R-SPECIFIC + + chunk_nitems[dim_chunk_ix] <- sum(dim_sel[dim_offset:dim_offset_limits] != 0) + } + + # compute offsets into the output array + chunk_nitems_cumsum <- cumsum(chunk_nitems) + num_items <- rev(chunk_nitems_cumsum)[1] + + # find chunks that we need to visit + dim_chunk_ixs <- which(chunk_nitems != 0) + + # store attributes + self$dim_sel <- dim_sel + self$dim_len <- dim_len + self$dim_chunk_len <- dim_chunk_len + self$num_chunks <- num_chunks + self$chunk_nitems <- chunk_nitems + self$chunk_nitems_cumsum <- chunk_nitems_cumsum + self$num_items <- num_items + self$dim_chunk_ixs <- dim_chunk_ixs + }, + #' @description + #' An iterator over the dimensions of an array + #' @return A list of ChunkProjection objects + iter = function() { + + # Iterate over chunks in range + result <- list() + for(dim_chunk_ix in self$dim_chunk_ixs) { + + # find region in chunk + dim_offset <- ((dim_chunk_ix - 1) * self$dim_chunk_len) + 1 + dim_chunk_sel <- self$dim_sel[dim_offset:(dim_offset+self$dim_chunk_len-1)] + + # pad out if final chunk + if(length(dim_chunk_sel) < length(self$dim_chunk_len)){ + tmp <- rep(FALSE, self$dim_chunk_len) + tmp[1:length(dim_chunk_sel)] <- dim_chunk_sel + dim_chunk_sel <- tmp + } + + # find region in output + if (dim_chunk_ix == 1) { + start <- 0 + } else { + start <- self$chunk_nitems_cumsum[dim_chunk_ix - 1] + } + stop <- self$chunk_nitems_cumsum[dim_chunk_ix] + + # START R-SPECIFIC + if(start == stop) { + stop <- stop + 1 + } + # END R-SPECIFIC + + # get out selection + dim_out_sel <- seq(start, stop - 1) + + # make boolean as integer, specific to pizzarr + dim_chunk_sel <- which(dim_chunk_sel) - 1 + + # START R-SPECIFIC + dim_chunk_ix <- dim_chunk_ix - 1 + # END R-SPECIFIC + + result <- append(result, ChunkDimProjection$new( + dim_chunk_ix, + dim_chunk_sel, + dim_out_sel + )) + + } + + return(result) + } + ) +) diff --git a/R/normalize.R b/R/normalize.R index 9bfebec..3506f50 100644 --- a/R/normalize.R +++ b/R/normalize.R @@ -13,6 +13,8 @@ normalize_list_selection <- function(selection, shape, convert_integer_selection } } else if(is_integer_vec(dim_sel)) { selection[[i]] <- sapply(dim_sel, normalize_integer_selection, dim_len = shape[i]) + } else if(is_bool_vec(dim_sel)) { + selection[[i]] <- selection[[i]] } else if(!is.null(dim_sel) && !is.environment(dim_sel) && (is.na(dim_sel) || dim_sel == ":")) { selection[[i]] <- zb_slice(NA, NA, 1) diff --git a/man/BoolArrayDimIndexer.Rd b/man/BoolArrayDimIndexer.Rd new file mode 100644 index 0000000..bbd1d78 --- /dev/null +++ b/man/BoolArrayDimIndexer.Rd @@ -0,0 +1,100 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/indexing.R +\docType{class} +\name{BoolArrayDimIndexer} +\alias{BoolArrayDimIndexer} +\title{BoolArrayDimIndexer Class} +\description{ +TODO +} +\details{ +The Zarr BoolArrayDimIndexer class. +} +\keyword{internal} +\section{Super class}{ +\code{pizzarr::DimIndexer} -> \code{BoolArrayDimIndexer} +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{dim_sel}}{selection on dimension} + +\item{\code{dim_len}}{dimension length} + +\item{\code{dim_chunk_len}}{dimension chunk length} + +\item{\code{num_chunks}}{number of chunks} + +\item{\code{chunk_nitems}}{number of items per chunk} + +\item{\code{chunk_nitems_cumsum}}{offsets into the output array} + +\item{\code{dim_chunk_ixs}}{chunks that should be visited} + +\item{\code{dim_out_sel}}{TODO} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-BoolArrayDimIndexer-new}{\code{BoolArrayDimIndexer$new()}} +\item \href{#method-BoolArrayDimIndexer-iter}{\code{BoolArrayDimIndexer$iter()}} +\item \href{#method-BoolArrayDimIndexer-clone}{\code{BoolArrayDimIndexer$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BoolArrayDimIndexer-new}{}}} +\subsection{Method \code{new()}}{ +Create a new BoolArrayDimIndexer instance. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BoolArrayDimIndexer$new(dim_sel, dim_len, dim_chunk_len)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{dim_sel}}{integer dimension selection} + +\item{\code{dim_len}}{integer dimension length} + +\item{\code{dim_chunk_len}}{integer dimension chunk length} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +A \code{BoolArrayDimIndexer} instance. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BoolArrayDimIndexer-iter}{}}} +\subsection{Method \code{iter()}}{ +An iterator over the dimensions of an array +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BoolArrayDimIndexer$iter()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +A list of ChunkProjection objects +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BoolArrayDimIndexer-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BoolArrayDimIndexer$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/is_bool.Rd b/man/is_bool.Rd new file mode 100644 index 0000000..2acf224 --- /dev/null +++ b/man/is_bool.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/atomic.R +\name{is_bool} +\alias{is_bool} +\title{Check that a value is boolean with length 1} +\usage{ +is_bool(s) +} +\description{ +Check that a value is boolean with length 1 +} +\keyword{internal} diff --git a/man/is_bool_list.Rd b/man/is_bool_list.Rd new file mode 100644 index 0000000..1777b9e --- /dev/null +++ b/man/is_bool_list.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/atomic.R +\name{is_bool_list} +\alias{is_bool_list} +\title{Check that a value is a list of one or more boolean} +\usage{ +is_bool_list(s) +} +\description{ +Check that a value is a list of one or more boolean +} +\keyword{internal} diff --git a/man/is_bool_vec.Rd b/man/is_bool_vec.Rd new file mode 100644 index 0000000..248fe32 --- /dev/null +++ b/man/is_bool_vec.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/atomic.R +\name{is_bool_vec} +\alias{is_bool_vec} +\title{Check that a value is a vector of one or more boolean} +\usage{ +is_bool_vec(s) +} +\description{ +Check that a value is a vector of one or more boolean +} +\keyword{internal} diff --git a/tests/testthat/test-indexing-orthogonal.R b/tests/testthat/test-indexing-orthogonal.R index a606b70..c03e210 100644 --- a/tests/testthat/test-indexing-orthogonal.R +++ b/tests/testthat/test-indexing-orthogonal.R @@ -102,4 +102,35 @@ test_that("int array dimension indexer", { # missing chunk size expect_error(IntArrayDimIndexer$new(1:10, 10)) +}) + +test_that("bool array dimension indexer", { + + # boolean checks + expect_equal(is_bool(TRUE), TRUE) + expect_equal(is_bool(1), FALSE) + expect_equal(is_bool(1.2), FALSE) + expect_equal(is_bool(c(TRUE, FALSE, TRUE)), FALSE) + expect_equal(is_bool_vec(c(TRUE, FALSE, TRUE)), TRUE) + expect_equal(is_bool_vec(c(TRUE, FALSE, 1)), FALSE) + expect_equal(is_bool_vec(c(TRUE, 1.2, 1)), FALSE) + expect_equal(is_bool_list(list(TRUE, FALSE, FALSE)), TRUE) + expect_equal(is_bool_list(list(TRUE, 1.2, 1)), FALSE) + expect_equal(is_bool_list(c(TRUE, FALSE, FALSE)), FALSE) + + # ordered int array index + iad <- BoolArrayDimIndexer$new(c(TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE), 8, 5) + expect_equal(iad$dim_sel, c(TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE)) + expect_equal(iad$dim_chunk_ixs, c(1,2)) + expect_equal(iad$dim_len, 8) + expect_equal(iad$dim_chunk_len, 5) + expect_equal(iad$num_chunks, 2) + expect_equal(iad$chunk_nitems, c(3,2)) + + # error for wrong dimension length + expect_error(BoolArrayDimIndexer$new(c(TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE), 3, 5)) + + # missing chunk size + expect_error(BoolArrayDimIndexer$new(c(TRUE, TRUE, FALSE, TRUE, FALSE), 5)) + }) \ No newline at end of file diff --git a/tests/testthat/test-orthogonal-selection.R b/tests/testthat/test-orthogonal-selection.R index 22ccf8f..51d8d4b 100644 --- a/tests/testthat/test-orthogonal-selection.R +++ b/tests/testthat/test-orthogonal-selection.R @@ -37,4 +37,13 @@ test_that("orthogonal selection", { z$get_orthogonal_selection(list(c(1,0), slice(1,3)))$data, array(data=c(2, 1, 4, 3, 6, 5), dim=c(2, 3)) ) + + # boolean indexing + ind <- rep(FALSE, 10) + ind[c(3,5,8)] <- TRUE + expect_equal( + z$get_orthogonal_selection(list(c(1,0), ind))$data, + array(data=c(6, 5, 10, 9, 16, 15), dim=c(2, 3)) + ) + expect_error(z$get_orthogonal_selection(list(c(0,1),c(TRUE,FALSE,TRUE)))$data) }) \ No newline at end of file