Skip to content

Commit

Permalink
port X
Browse files Browse the repository at this point in the history
  • Loading branch information
rcannood committed Nov 14, 2024
1 parent 9528eee commit 9bc7aa7
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 84 deletions.
3 changes: 2 additions & 1 deletion inst/known_issues.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
known_issues:
- backend: HDF5AnnData
slot:
- X
- layers
- obsp
- varp
- obsm
- varm
- layers
dtype:
- integer_csparse
- integer_rsparse
Expand Down
161 changes: 78 additions & 83 deletions tests/testthat/test-roundtrip-X.R
Original file line number Diff line number Diff line change
@@ -1,109 +1,104 @@
skip_if_no_anndata()
skip_if_not_installed("hdf5r")
skip_if_not_installed("reticulate")

data <- generate_dataset(10L, 20L)
library(reticulate)
testthat::skip_if_not(
reticulate::py_module_available("dummy_anndata"),
message = "Python dummy_anndata module not available for testing"
)

test_names <- names(data$layers)
ad <- reticulate::import("anndata", convert = FALSE)
da <- reticulate::import("dummy_anndata", convert = FALSE)
bi <- reticulate::import_builtins()

# TODO: Add denseMatrix support to anndata and anndataR
test_names <- test_names[!grepl("_dense", test_names)]
known_issues <- read_known_issues()

test_names <- names(da$matrix_generators)

for (name in test_names) {
test_that(paste0("roundtrip with X '", name, "'"), {
# create anndata
ad <- AnnData(
X = data$layers[[name]],
obs = data$obs[, c(), drop = FALSE],
var = data$var[, c(), drop = FALSE]
# first generate a python h5ad
adata_py <- da$generate_dataset(
x_type = name,
obs_types = list(),
var_types = list(),
layer_types = list(),
obsm_types = list(),
varm_types = list(),
obsp_types = list(),
varp_types = list(),
uns_types = list(),
nested_uns_types = list()
)

# create a couple of paths
file_py <- withr::local_file(tempfile(paste0("anndata_py_", name), fileext = ".h5ad"))
file_r <- withr::local_file(tempfile(paste0("anndata_r_", name), fileext = ".h5ad"))

# write to file
adata_py$write_h5ad(file_py)

test_that(paste0("Reading an AnnData with X '", name, "' works"), {
msg <- message_if_known(
backend = "HDF5AnnData",
slot = c("X"),
dtype = name,
process = "read",
known_issues = known_issues
)
skip_if(!is.null(msg), message = msg)

# write to file
filename <- withr::local_file(tempfile(fileext = ".h5ad"))
write_h5ad(ad, filename)

# read from file
ad_new <- read_h5ad(filename, to = "HDF5AnnData")

# expect slots are unchanged
adata_r <- read_h5ad(file_py, to = "HDF5AnnData")
expect_equal(
ad_new$X,
data$layers[[name]],
ignore_attr = TRUE,
tolerance = 1e-6
adata_r$shape(),
unlist(reticulate::py_to_r(adata_py$shape))
)

# check that the print output is the same
str_r <- capture.output(print(adata_r))
str_py <- capture.output(print(adata_py))
expect_equal(str_r, str_py)
})
}

for (name in test_names) {
test_that(paste0("reticulate->hdf5 with X '", name, "'"), {
# add rownames
X <- data$layers[[name]]
obs <- data.frame(row.names = rownames(data$obs))
var <- data.frame(row.names = rownames(data$var))

# TODO: remove this?
if (is.matrix(X) && any(is.na(X))) {
na_indices <- is.na(X)
X[na_indices] <- NaN
}

# create anndata
ad <- anndata::AnnData(
X = X,
obs = obs,
var = var
# maybe this test simply shouldn't be run if there is a known issue with reticulate
test_that(paste0("Comparing an anndata with X '", name, "' with reticulate works"), {
msg <- message_if_known(
backend = "HDF5AnnData",
slot = c("X"),
dtype = name,
process = c("read", "reticulate"),
known_issues = known_issues
)
skip_if(!is.null(msg), message = msg)

# write to file
filename <- withr::local_file(tempfile(fileext = ".h5ad"))
ad$write_h5ad(filename)

# read from file
ad_new <- HDF5AnnData$new(filename)
adata_r <- read_h5ad(file_py, to = "HDF5AnnData")

# expect slots are unchanged
expect_equal(
ad_new$X,
data$layers[[name]],
adata_r$X,
py_to_r(adata_py$X),
tolerance = 1e-6
)
})
}

r2py_names <- test_names
# TODO: re-enable -- rsparse gets converted to csparse by anndata
r2py_names <- r2py_names[!grepl("rsparse", r2py_names)]

for (name in r2py_names) {
test_that(paste0("hdf5->reticulate with X '", name, "'"), {
# write to file
filename <- withr::local_file(tempfile(fileext = ".h5ad"))


# make anndata
ad <- AnnData(
X = data$layers[[name]],
obs = data$obs[, c(), drop = FALSE],
var = data$var[, c(), drop = FALSE]
test_that(paste0("Writing an AnnData with X '", name, "' works"), {
msg <- message_if_known(
backend = "HDF5AnnData",
slot = c("X"),
dtype = name,
process = c("read", "write"),
known_issues = known_issues
)
write_h5ad(ad, filename)
skip_if(!is.null(msg), message = msg)

adata_r <- read_h5ad(file_py, to = "InMemoryAnnData")
write_h5ad(adata_r, file_r)

# read from file
ad_new <- anndata::read_h5ad(filename)

# expect slots are unchanged
layer_ <- ad_new$X
# anndata returns these layers as CsparseMatrix
if (grepl("rsparse", name)) {
layer_ <- as(layer_, "RsparseMatrix")
}
# strip rownames
dimnames(layer_) <- list(NULL, NULL)
expect_equal(
layer_,
data$layers[[name]],
ignore_attr = TRUE,
tolerance = 1e-6
adata_py2 <- ad$read_h5ad(file_r)

# expect that the objects are the same
expect_py_matrix_equal(
adata_py2$X,
adata_py$X
)
})
}

0 comments on commit 9bc7aa7

Please sign in to comment.