Skip to content

Commit

Permalink
Merge pull request #97 from ropensci-review-tools/progress-msg
Browse files Browse the repository at this point in the history
improve progress message in cran update #92
  • Loading branch information
mpadge authored Jan 17, 2025
2 parents 3bfce24 + c9fb697 commit 474adf6
Show file tree
Hide file tree
Showing 17 changed files with 132 additions and 60 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: pkgmatch
Title: Find R Packages Matching Either Descriptions or Other R Packages
Version: 0.4.3.003
Version: 0.4.3.011
Authors@R: c(
person("Mark", "Padgham", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-2172-5265")),
Expand Down
20 changes: 12 additions & 8 deletions R/bm25.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ pkgmatch_bm25_internal <- function (input, txt, idfs, corpus) {
idfs <- pkgmatch_load_data ("idfs", corpus = corpus, fns = FALSE)
}
checkmate::assert_list (idfs, len = 2L)
checkmate::assert_names (names (idfs), identical.to = c ("idfs", "token_lists"))
checkmate::assert_names (
names (idfs),
identical.to = c ("idfs", "token_lists")
)
tokens_idf <- idfs$idfs
tokens_list <- idfs$token_lists
} else {
Expand Down Expand Up @@ -120,7 +123,7 @@ pkgmatch_bm25_fn_calls <- function (path, corpus = "ropensci") {
chk <- checkmate::check_directory_exists (path)
}
if (!is.logical (chk)) {
chk <- is_installed_pkg <- input_is_pkg (path)
chk <- input_is_pkg (path)
}
if (!chk) {
cli::cli_abort ("'path' does not appear to be an R package.")
Expand All @@ -131,9 +134,10 @@ pkgmatch_bm25_fn_calls <- function (path, corpus = "ropensci") {
m_pkgmatch_bm25_fn_calls (path, corpus)
}

pkgmatch_bm25_fn_calls_internal <- function (path, corpus) {
pkgmatch_bm25_fn_calls_internal <- function (path, corpus) { # nolint

tokens_idf <- pkgmatch_load_data (what = "calls", corpus = corpus, raw = FALSE)
tokens_idf <-
pkgmatch_load_data (what = "calls", corpus = corpus, raw = FALSE)
calls <- pkgmatch_load_data (what = "calls", corpus = corpus, raw = TRUE)

tokens_list <- lapply (calls, function (i) {
Expand All @@ -154,7 +158,7 @@ pkgmatch_bm25_from_idf <- function (input, tokens_list, tokens_idf) {
m_pkgmatch_bm25_from_idf (input, tokens_list, tokens_idf)
}

pkgmatch_bm25_from_idf_internal <- function (input, tokens_list, tokens_idf) {
pkgmatch_bm25_from_idf_internal <- function (input, tokens_list, tokens_idf) { # nolint

n <- name <- NULL # suppress no visible binding note

Expand All @@ -173,7 +177,9 @@ pkgmatch_bm25_from_idf_internal <- function (input, tokens_list, tokens_idf) {
} else if (is.data.frame (input)) {
treesit_nms <- c ("fn", "name", "start", "end", "file")
if (!identical (names (input), treesit_nms)) {
cli::cli_abort ("'input' must be from 'pkgmatch_treesitter_fn_tags()'")
cli::cli_abort (
"'input' must be from 'pkgmatch_treesitter_fn_tags()'"
)
}
tokens_i <-
dplyr::summarise (dplyr::group_by (input, name), np = dplyr::n ())
Expand Down Expand Up @@ -270,8 +276,6 @@ bm25_idf_internal <- function (txt) {

n_docs <- length (txt)

tokens_txt <- bm25_tokens (txt)

tokens_list <- bm25_tokens_list (txt)
index <- which (vapply (tokens_list, nrow, integer (1L)) > 0L)

Expand Down
30 changes: 22 additions & 8 deletions R/cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ pkgmatch_cache_update_interval <- function () {
#'
#' @examples
#' \dontrun{
#' pkgmatch_update_cache()
#' pkgmatch_update_cache ()
#' }
#' @export
pkgmatch_update_cache <- function () {
Expand All @@ -82,12 +82,16 @@ pkgmatch_update_cache <- function () {

vals <- expand.grid (what = what, corpus = corpus, fns = fns, raw = raw)
vals$fname <- apply (vals, 1, function (i) {
get_cache_file_name (what = i [1], corpus = i [2], fns = i [3], raw = i [4])
get_cache_file_name (
what = i [1], corpus = i [2], fns = i [3], raw = i [4]
)
})
vals <- vals [-which (duplicated (vals$fname)), ]

files <- apply (vals, 1, function (i) {
pkgmatch_dl_data (what = i [1], corpus = i [2], fns = i [3], raw = i [4])
pkgmatch_dl_data (
what = i [1], corpus = i [2], fns = i [3], raw = i [4]
)
})

invisible (files)
Expand All @@ -105,11 +109,15 @@ load_data_internal <- function (what, corpus, fns, raw) {
dt <- difftime (as.Date (Sys.time ()), fdate, units = "days")
dl <- dt > pkgmatch_cache_update_interval ()
if (dl) {
cli::cli_inform ("Local data are {dt} days old and will be updated ...")
cli::cli_inform (
"Local data are {dt} days old and will be updated ..."
)
}
}
if (dl) {
fname <- pkgmatch_dl_data (what = what, corpus = corpus, fns = fns, raw = raw)
fname <- pkgmatch_dl_data (
what = what, corpus = corpus, fns = fns, raw = raw
)
}
readRDS (fname)
}
Expand All @@ -131,10 +139,14 @@ get_cache_file_name <- function (what, corpus, fns, raw) {
if (corpus == "ropensci") {

fname <- switch (what,
"embeddings" = ifelse (fns, "embeddings-fns.Rds", "embeddings-ropensci.Rds"),
"embeddings" = ifelse (
fns, "embeddings-fns.Rds", "embeddings-ropensci.Rds"
),
"idfs" = ifelse (fns, "bm25-ropensci-fns.Rds", "bm25-ropensci.Rds"),
"functions" = "fn-calls-ropensci.Rds",
"calls" = ifelse (raw, "fn-calls-ropensci.Rds", "idfs-fn-calls-ropensci.Rds")
"calls" = ifelse (
raw, "fn-calls-ropensci.Rds", "idfs-fn-calls-ropensci.Rds"
)
)

} else if (corpus == "cran") {
Expand All @@ -143,7 +155,9 @@ get_cache_file_name <- function (what, corpus, fns, raw) {
"embeddings" = "embeddings-cran.Rds",
"idfs" = "bm25-cran.Rds",
"functions" = "fn-calls-cran.Rds",
"calls" = ifelse (raw, "fn-calls-cran.Rds", "idfs-fn-calls-cran.Rds")
"calls" = ifelse (
raw, "fn-calls-cran.Rds", "idfs-fn-calls-cran.Rds"
)
)
}

Expand Down
6 changes: 4 additions & 2 deletions R/data-update-cran.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ pkgmatch_update_cran <- function () {

requireNamespace ("piggyback", quietly = TRUE)

results_path <- fs::dir_create (fs::path (fs::path_temp (), "pkgmatch-results"))
results_path <-
fs::dir_create (fs::path (fs::path_temp (), "pkgmatch-results"))
flist <- dl_prev_data (results_path)

new_cran_pkgs <- list_new_cran_updates (flist, latest_only = TRUE)
Expand All @@ -29,6 +30,7 @@ pkgmatch_update_cran <- function () {
cli::cli_inform ("Downloading and analysing {npkgs} packages.")

pt0 <- proc.time ()
op_is_quiet <- opt_is_quiet ()
op <- getOption ("rlib_message_verbosity")
options ("rlib_message_verbosity" = "quiet")

Expand All @@ -51,7 +53,7 @@ pkgmatch_update_cran <- function () {
# connections:
closeAllConnections ()

pkgmatch_update_progress_message (p, 1, npkgs, pt0)
pkgmatch_update_progress_message (p, 1, npkgs, pt0, op_is_quiet)

return (dat)
})
Expand Down
11 changes: 7 additions & 4 deletions R/data-update-pkgstats-fns.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,14 @@ extract_tarball <- function (tarball) {
))
}

flist <- utils::untar (tarball,
exdir = fs::path_temp (),
list = TRUE, tar = "internal"
exdir <- fs::path_temp ()
flist <- utils::untar (
tarball,
exdir = exdir,
list = TRUE,
tar = "internal"
)
if (utils::untar (tarball, exdir = fs::path_temp (), tar = "internal") != 0) {
if (utils::untar (tarball, exdir = exdir, tar = "internal") != 0) {
stop ("Unable to extract tarball to 'tempdir'")
}

Expand Down
8 changes: 6 additions & 2 deletions R/data-update-ropensci.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ pkgmatch_update_ropensci <- function () {
requireNamespace ("gert", quietly = TRUE)
requireNamespace ("piggyback", quietly = TRUE)

results_path <- fs::dir_create (fs::path (fs::path_temp (), "pkgmatch-results"))
results_path <-
fs::dir_create (fs::path (fs::path_temp (), "pkgmatch-results"))
flist <- dl_prev_data (results_path)

pkgmatch_date <- min (list_remote_files ()$timestamp)
Expand All @@ -29,6 +30,7 @@ pkgmatch_update_ropensci <- function () {
}

pt0 <- proc.time ()
op_is_quiet <- opt_is_quiet ()
op <- getOption ("rlib_message_verbosity")
options ("rlib_message_verbosity" = "quiet")

Expand All @@ -47,7 +49,9 @@ pkgmatch_update_ropensci <- function () {
)
fs::dir_delete (pkg_dir)

pkgmatch_update_progress_message (i, 1, nrow (reg_updated), pt0)
pkgmatch_update_progress_message (
i, 1, nrow (reg_updated), pt0, op_is_quiet
)

return (list (dat = dat, fns = fns))
})
Expand Down
57 changes: 44 additions & 13 deletions R/data-update.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# General functions for both CRAN and rOpenSci update workflows

RELEASE_TAG <- "v0.4.0"
RELEASE_TAG <- "v0.4.0" # nolint

#' Update pkgmatch` data for both CRAN and rOpenSci packages on GitHub release
#'
Expand Down Expand Up @@ -30,7 +30,8 @@ pkgmatch_update_data <- function (upload = TRUE) {

requireNamespace ("piggyback", quietly = TRUE)

results_path <- fs::dir_create (fs::path (fs::path_temp (), "pkgmatch-results"))
results_path <-
fs::dir_create (fs::path (fs::path_temp (), "pkgmatch-results"))
flist <- dl_prev_data (results_path)

updated_cran <- pkgmatch_update_cran ()
Expand Down Expand Up @@ -106,7 +107,8 @@ append_data_to_embeddings <- function (res, flist, cran = TRUE) {
colnames (emb) <- names (res) [index]

index <- which (!colnames (embeddings [[what]]) %in% colnames (emb))
embeddings [[what]] <- cbind (embeddings [[what]] [, index, drop = FALSE], emb)
embeddings [[what]] <-
cbind (embeddings [[what]] [, index, drop = FALSE], emb)
index <- order (colnames (embeddings [[what]]))
embeddings [[what]] <- embeddings [[what]] [, index]

Expand Down Expand Up @@ -137,7 +139,8 @@ append_data_to_bm25 <- function (res, flist, cran = TRUE) {
append_cols <- function (res, bm25, what) {
what <- match.arg (what, c ("with_fns", "wo_fns"))
index <- not_null_index (res, what)
bm25_these <- lapply (res, function (i) i$bm25$token_lists [[what]] [[1]])
bm25_these <-
lapply (res, function (i) i$bm25$token_lists [[what]] [[1]])
names (bm25_these) <- names (res) [index]

what_toks <- bm25$token_lists [[what]]
Expand Down Expand Up @@ -190,7 +193,11 @@ append_data_to_bm25 <- function (res, flist, cran = TRUE) {

# Remove updated packages from token lists:
updated_pkgs <- names (res)
ptn <- paste0 ("^", paste0 (updated_pkgs, collapse = "|"), paste0 ("\\:\\:"))
ptn <- paste0 (
"^",
paste0 (updated_pkgs, collapse = "|"),
paste0 ("\\:\\:")
)
index <- which (!grepl (ptn, names (bm25$token_list)))
bm25$token_lists <- bm25$token_lists [index]

Expand Down Expand Up @@ -244,7 +251,11 @@ append_data_to_fn_calls <- function (res, flist, cran = TRUE) {
idf = idf
)

fname <- ifelse (cran, "idfs-fn-calls-cran.Rds", "idfs-fn-calls-ropensci.Rds")
fname <- ifelse (
cran,
"idfs-fn-calls-cran.Rds",
"idfs-fn-calls-ropensci.Rds"
)
fname <- flist [which (basename (flist) == fname)]
saveRDS (toks_idf, fname)
}
Expand All @@ -254,7 +265,8 @@ dl_prev_data <- function (results_path) {

flist_remote <- list_remote_files ()
file_names <- flist_remote$file_name
file_names_done <- file_names [which (file_names %in% list.files (results_path))]
file_names_done <-
file_names [which (file_names %in% list.files (results_path))]

dl_data <- piggyback::pb_download (
repo = "ropensci-review-tools/pkgmatch",
Expand All @@ -266,21 +278,40 @@ dl_prev_data <- function (results_path) {
return (fs::dir_ls (results_path))
}

pkgmatch_update_progress_message <- function (index, chunk_size, npkgs, pt0) {
#' Issue progress message as long as global package-level option is not set to
#' 'quiet'.
#'
#' "rlib_message_verbosity" is set to "quiet" in several internal calls. The
#' `opt_is_quiet` parameter allows progress messages to be issued as long as
#' that option is not globally set.
#'
#' @noRd
pkgmatch_update_progress_message <- function (index, # nolint
chunk_size,
npkgs,
pt0,
op_is_quiet) {

prog <- index * chunk_size / npkgs
prog_fmt <- format (100 * prog, digits = 2)
pt1 <- as.integer ((proc.time () - pt0) [3])
t_per_file <- pt1 / (index * chunk_size)
t_total <- as.integer (t_per_file * npkgs)
t_rem <- hms::hms (t_total - pt1)
pt1 <- hms::hms (pt1)

ndone <- min (c (npkgs, index * chunk_size))

message (
"[", ndone, " / ", npkgs,
"] = ", prog_fmt, "%; (elapsed, remaining) = (",
pt1, ", ", t_rem, ")"
)
if (!op_is_quiet) {
op <- getOption ("rlib_message_verbosity")
options ("rlib_message_verbosity" = "verbose")
}
cli::cli_inform (paste0 (
"[{ndone} / {npkgs}] = {prog_fmt}%; ",
"(elapsed, remaining) = ({pt1}, {t_rem})"
))
if (!op_is_quiet) {
options ("rlib_message_verbosity" = op)
}
}
# nocov end
3 changes: 2 additions & 1 deletion R/embeddings.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ pkgmatch_embeddings_from_pkgs <- function (packages = NULL,

if (!opt_is_quiet () && length (packages) > 100) {
cli::cli_inform ("Extracting package text ...")
txt_with_fns <- pbapply::pblapply (pkgs_full, function (p) get_pkg_text (p))
txt_with_fns <-
pbapply::pblapply (pkgs_full, function (p) get_pkg_text (p))
} else {
txt_with_fns <- lapply (pkgs_full, function (p) get_pkg_text (p))
}
Expand Down
1 change: 0 additions & 1 deletion R/get-pkg-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ tarball_to_path <- function (path) {
}
path2 <- fs::path (tempdir, basename (path))
fs::file_copy (path, path2)
base_dir <- fs::path (tempdir, gsub ("\\.tar\\.gz$", "", basename (path)))
utils::untar (path2, exdir = tempdir)
fs::file_delete (path2)

Expand Down
3 changes: 2 additions & 1 deletion R/namespaces.R
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ attach_local_dep_namespaces <- function (path, calls) {
index_to_fns <- match (calls$name [index], fns$fn_name)
dep_pkgs <- fns$package [index_to_fns]
index_pkgs <- which (!is.na (dep_pkgs))
calls$name [index] [index_pkgs] <- paste0 (dep_pkgs [index_pkgs], "::", calls$name [index] [index_pkgs])
calls$name [index] [index_pkgs] <-
paste0 (dep_pkgs [index_pkgs], "::", calls$name [index] [index_pkgs])

return (calls)
}
14 changes: 11 additions & 3 deletions R/similar-fns.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
#' p # Default print method, lists 5 best matching packages
#' head (p) # Shows first 5 rows of full `data.frame` object
#' }
pkgmatch_similar_fns <- function (input, embeddings = NULL, n = 5L, browse = FALSE) {
pkgmatch_similar_fns <- function (input,
embeddings = NULL,
n = 5L,
browse = FALSE) {

expected_embedding_len <- 768L

Expand All @@ -27,9 +30,14 @@ pkgmatch_similar_fns <- function (input, embeddings = NULL, n = 5L, browse = FAL
checkmate::assert_logical (browse, len = 1L)

if (is.null (embeddings)) {
embeddings <- pkgmatch_load_data ("embeddings", corpus = "ropensci", fns = TRUE)
embeddings <-
pkgmatch_load_data ("embeddings", corpus = "ropensci", fns = TRUE)
}
checkmate::assert_matrix (embeddings, nrow = expected_embedding_len, any.missing = FALSE)
checkmate::assert_matrix (
embeddings,
nrow = expected_embedding_len,
any.missing = FALSE
)
nms <- colnames (embeddings)
stopifnot (!is.null (nms))
stopifnot (all (grepl ("\\:\\:", nms)))
Expand Down
Loading

0 comments on commit 474adf6

Please sign in to comment.