Skip to content

Commit

Permalink
Added methods to save and query subject pipelines
Browse files Browse the repository at this point in the history
  • Loading branch information
dipterix committed Aug 22, 2024
1 parent 1d5af0b commit b89872b
Show file tree
Hide file tree
Showing 9 changed files with 270 additions and 1 deletion.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: raveio
Type: Package
Title: File-System Toolbox for RAVE Project
Version: 0.9.0.64
Version: 0.9.0.66
Language: en-US
Authors@R: c(
person("Zhengjia", "Wang", email = "[email protected]", role = c("aut", "cre", "cph")),
Expand Down
64 changes: 64 additions & 0 deletions R/class-pipeline_tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,70 @@ PipelineTools <- R6::R6Class(
)
},

#' @description fork (copy) the current pipeline to a 'RAVE' subject
#' @param subject subject ID or instance in which pipeline will be saved
#' @param label pipeline label describing the pipeline
#' @param policy fork policy defined by module author, see text file
#' 'fork-policy' under the pipeline directory; if missing, then default to
#' avoid copying \code{main.html} and \code{shared} folder
#' @returns A new pipeline object based on the path given
fork_to_subject = function(subject, label = "NA", policy = "default") {
subject <- restore_subject_instance(subject, strict = TRUE)
label <- paste(label, collapse = "")
label_cleaned <- gsub("[^a-zA-Z0-9_.-]+", "_", label)

timestamp <- Sys.time()
name <- sprintf(
"%s-%s-%s",
self$pipeline_name,
label_cleaned,
format(timestamp, "%Y%m%dT%H%M%S")
)
path <- file.path(
subject$pipeline_path,
self$pipeline_name,
name
)
# make sure parent folder exists
dir_create2(dirname(path))
re <- self$fork(path = path, policy = policy)
# register
registry_path <- file.path(subject$pipeline_path, "pipeline-registry.csv")
if(file.exists(registry_path)) {
registry <- tryCatch({
registry <- data.table::fread(registry_path, stringsAsFactors = FALSE, colClasses = c(
project = "character",
subject = "character",
pipeline_name = "character",
timestamp = "POSIXct",
label = "character",
directory = "character"
))
stopifnot(all(c("project", "subject", "pipeline_name", "timestamp", "label", "directory") %in% names(registry)))
registry
}, error = function(...) { NULL })
} else {
registry <- NULL
}
registry <- rbind(
data.table::data.table(
project = subject$project_name,
subject = subject$subject_code,
pipeline_name = self$pipeline_name,
timestamp = timestamp,
label = label,
directory = name,
keep.rownames = FALSE, stringsAsFactors = FALSE
),
registry
)
tf <- tempfile()
on.exit({ unlink(tf) })
data.table::fwrite(registry, tf, row.names = FALSE, col.names = TRUE)
file.copy(tf, to = registry_path, overwrite = TRUE, recursive = FALSE, copy.date = TRUE)
re
},

#' @description run code with pipeline activated, some environment variables
#' and function behaviors might change under such condition (for example,
#' \code{targets} package functions)
Expand Down
17 changes: 17 additions & 0 deletions R/class-project.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,23 @@ RAVEProject <- R6::R6Class(
dir_create2(p, check = FALSE)
}
normalizePath(p, mustWork = FALSE)
},

#' @description list saved pipelines
#' @param pipeline_name name of the pipeline
#' @param cache whether to use cached registry
#' @param check whether to check if the pipelines exist as directories
#' @returns A data table of pipeline timestamps and directories
subject_pipelines = function(pipeline_name, cache = FALSE, check = TRUE) {
# pipeline_name <- "power_explorer"
# self <- raveio::as_rave_project("demo")
# check = FALSE
subjects <- self$subjects()
re <- lapply(subjects, function(subject_code) {
subject <- RAVESubject$new(project_name = self$name, subject_code = subject_code, strict = FALSE)
subject$list_pipelines(pipeline_name = pipeline_name, check = check)
})
data.table::rbindlist(re)
}

),
Expand Down
87 changes: 87 additions & 0 deletions R/class-subject.R
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,93 @@ RAVESubject <- R6::R6Class(
return(frequency_table$Frequency)
}
frequency_table
},


#' @description list saved pipelines
#' @param pipeline_name pipeline ID
#' @param cache whether to use cache registry to speed up
#' @param check whether to check if the pipelines exist
#' @returns A table of pipeline registry
list_pipelines = function(pipeline_name, cache = FALSE, check = TRUE) {

if( cache ) {
# use cached registry
registry_path <- file.path(self$pipeline_path, "pipeline-registry.csv")
if(file.exists(registry_path)) {
registry <- tryCatch({
registry <- data.table::fread(
registry_path,
stringsAsFactors = FALSE,
colClasses = c(
project = "character",
subject = "character",
pipeline_name = "character",
timestamp = "POSIXct",
label = "character",
directory = "character"
)
)
stopifnot(all(c("project", "subject", "pipeline_name", "timestamp", "label", "directory") %in% names(registry)))
registry
}, error = function(...) { NULL })
if(nrow(registry)) {
if(check) {
registry <- registry[registry$pipeline_name == pipeline_name, ]
registry <- registry[dir.exists(file.path(self$pipeline_path, registry$pipeline_name, registry$directory)), ]
}
return(registry)
}
}
}

pipeline_paths <- file.path(self$pipeline_path, pipeline_name)
prefix <- sprintf("^%s-", pipeline_name)
re <- list.files(
pipeline_paths,
pattern = prefix,
include.dirs = TRUE,
all.files = FALSE,
ignore.case = TRUE,
no.. = TRUE,
recursive = FALSE,
full.names = FALSE
)
if( check && length(re) ) {
re <- re[dir.exists(file.path(pipeline_paths, re))]
}
re <- lapply(re, function(name) {
tryCatch({
item <- strsplit(gsub(prefix, "", name, ignore.case = TRUE), "-", fixed = TRUE)[[1]]
idx <- length(item)
timestamp <- as.POSIXct(strptime(paste(item[idx], collapse = "-"), "%Y%m%dT%H%M%S"))
label <- paste(item[-idx], collapse = "-")
list(
project = self$project_name,
subject = self$subject_code,
pipeline_name = pipeline_name,
timestamp = timestamp,
label = label,
directory = name
)
}, error = function(...) { NULL })
})

data.table::rbindlist(re)
},

#' @description load saved pipeline
#' @param directory pipeline directory name
#' @returns A \code{PipelineTools} instance
load_pipeline = function(directory) {

# directory <- "power_explorer-NA-20240822T184419"
pipeline_name <- strsplit(directory, "-", fixed = TRUE)[[1]][[1]]
pipeline_path <- file.path(self$pipeline_path, pipeline_name, directory)
if(!file.exists(pipeline_path)) {
stop("Unable to find pipeline [", directory, "] from subject ", self$subject_id)
}
pipeline_from_path(pipeline_path)
}

),
Expand Down
5 changes: 5 additions & 0 deletions inst/rave-pipelines/template-rmd-bare/fork-policy
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
[default]
^shared
^main\.html$

[group_analysis]
^shared/objects/(?!data_for_group)
^shared/user
^main\.html$
26 changes: 26 additions & 0 deletions man/PipelineTools.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/RAVEMetaSubject.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions man/RAVEProject.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 44 additions & 0 deletions man/RAVESubject.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b89872b

Please sign in to comment.