Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rtf rich text #184

Merged
merged 20 commits into from
Jul 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Suggests:
rmarkdown,
stringi,
testthat,
tidyr,
xml2
Config/testthat/edition: 3
Roxygen: list(markdown = TRUE)
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export(rtf_page_footer)
export(rtf_page_header)
export(rtf_read_figure)
export(rtf_read_png)
export(rtf_rich_text)
export(rtf_source)
export(rtf_subline)
export(rtf_title)
Expand Down
230 changes: 230 additions & 0 deletions R/rtf_rich_text.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
# Copyright (c) 2023 Merck & Co., Inc., Rahway, NJ, USA and its affiliates. All rights reserved.
#
# This file is part of the r2rtf program.
#
# r2rtf is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

#' Text to Formatted RTF Encode
#'
#' @param text Plain text.
#' @param theme Named list defining themes for tags. See `rtf_text()` for
#' details on possible formatting.
#'
#' @section Specification:
#' \if{latex}{
#' \itemize{
#' \item Validate if theme list items correspond to \code{font_type()} arguments.
#' \item Create regex expressions to match `{}` and `.tag` in text.
#' \item Extract tagged text from input text.
#' \item Extract tags from tagged text.
#' \item Extract text from tagged text.
#' \item Validate that lengths of extractions are all the same.
#' \item Validate that tags are defined in the `theme` argument.
#' \item Execute \code{rtf_text()} with extracted text and relevant formatting.
#' \item Reinsert encoded formatted text to original input text.
#' }
#' }
#' \if{html}{The contents of this section are shown in PDF user manual only.}
#'
#' @export
#'
#' @examples
#' rtf_rich_text(
#' text = paste(
#' "This is {.emph important}.",
#' "This is {.strong relevant}.", "This is {.zebra ZEBRA}."
#' ),
#' theme = list(
#' .emph = list(format = "i"),
#' .strong = list(format = "b"),
#' .zebra = list(color = "white", background_color = "black")
#' )
#' )
rtf_rich_text <- function(
text,
theme = list(
.emph = list(format = "i"),
.strong = list(format = "b")
)) {
# Bulletproof the styles requested within the `theme` argument
theme_arg <- theme
names(theme_arg) <- NULL
unique_styles <- unique(names(unlist(theme_arg)))
bad_style <- unique_styles[!(unique_styles %in% names(formals(rtf_text)))]
if (length(bad_style) > 0) {
stop(
"Theme lists have styles which are not supported (",
paste0(bad_style, collapse = ", "), ").",
call. = FALSE
)
}

# Find all paired braces in text string
extracted <- list()
extracted$matches <- gsub(
pattern = "^\\{",
replacement = "",
x = gsub(
pattern = "\\}$",
replacement = "",
x = extract_tagged_text(text)
)
)

# For each paired brace: extract the theme tag (only allow one per match string).
# Regex patterns for parsing input text.
extraction_pattern <- "(^\\.[A-Za-z]*)(\\s)(.*$)"

extracted$tags <- gsub(
pattern = extraction_pattern,
replacement = "\\1",
x = extracted$matches
)
if (length(extracted$tags) != length(extracted$matches)) {
stop("Length missmatch of tags found and matches found.", call. = FALSE)
}

# For each paired brace: extract the text to be wrapped with `rtf_text()`
extracted$text <- gsub(
pattern = extraction_pattern,
replacement = "\\3",
x = extracted$matches
)
if (length(extracted$text) != length(extracted$matches)) {
stop("Length missmatch of extracted text found and matches found.", call. = FALSE)
}

# Validate that tags in text are reflected in the `themes` argument
missing_themes <- extracted$tags[!(extracted$tags %in% names(theme))]
if (length(missing_themes) != 0) {
stop(
"Input text has tags which are not available in the theme (",
paste0(missing_themes, collapse = ", "), ").",
call. = FALSE
)
}

# Execute `rtf_text()` calls with theme tags
extracted$replacements <- vapply(
X = seq_along(extracted$tags),
FUN = function(x) {
do.call(
rtf_text,
args = c(text = extracted$text[x], theme[[extracted$tags[x]]])
)
},
FUN.VALUE = "character"
)

# Insert `rtf_text()` calls into the original text
new_text <- text
for (i in seq_along(extracted$matches)) {
new_text <- gsub(
pattern = paste0("{", extracted$matches[i], "}"),
replacement = extracted$replacements[i],
x = new_text,
fixed = TRUE
)
}

rtf_text(new_text)
}

#' Extract Tagged Text
#'
#' Identify the text that is in brackets and correctly resolve the ordering of
#' the brackets such that everything is correctly tagged with the needed style.
#'
#' @param input Plain text containing matched curly braces with tags.
#'
#' @noRd
extract_tagged_text <- function(input) {
opening <- gregexec("\\{", text = input, perl = TRUE)[[1]]
closing <- gregexec("\\}", text = input, perl = TRUE)[[1]]
styles <- gregexec("\\{\\.[A-Za-z]*", text = input, perl = TRUE)

# Check for equal number of opening and closing braces
check_braces(input)

# Identify matching brace pairs
brace_matches <- match_braces(opening, closing)

# Identify which matched braces are associated with a style tag
styles_matches <- brace_matches[which(brace_matches$opening %in% styles[[1]]), ]

# Extract tagged brackets
unname(apply(
styles_matches,
MARGIN = 1,
FUN = function(X) {
substr(input, start = X[["opening"]], stop = X[["closing"]])
}
))
}

#' Identify Opening and Closing Brace Pairs
#'
#' Identify which opening and closing braces in a string belong together.
#' Follows a first-in-last-out matching.
#'
#' @param openings Vector of indices indicating location of opening braces.
#' @param closings Vector of indices indicating location of closing braces.
#'
#' @noRd
match_braces <- function(openings, closings) {
# Verify that equal numbers of opening and closing braces exist
stopifnot(length(openings) == length(closings))

# Create data frame to hold opening / closing pairs
holder <- data.frame(
opening = numeric(length(openings)),
closing = numeric(length(closings))
)

for (i in seq_along(closings)) {
# Find the matching opening brace.
# The matching opening brace is the one most recently preceding a closing brace.
match_opening <- max(openings[openings < closings[i]])
# Record the opening brace index to be removed
match_opening_index <- which(openings == match_opening)
# Add the pair of opening and closing braces to the holder data frame
holder[i, ] <- c(match_opening, closings[i])
# Remove the matching opening brace from the vector of opening braces
openings <- openings[-match_opening_index]
}

holder
}

#' Check that braces are correctly matched
#'
#' Braces must be matched appropriately. First brace should be an opening brace.
#' Every brace should be closed appropriately.
#'
#' @param input Plain text containing matched curly braces with tags.
#'
#' @noRd
check_braces <- function(input) {
input_parse <- gsub(x = input, pattern = "[^{}]", replacement = "")
input_split <- unlist(strsplit(input_parse, ""))
checker <- ifelse(input_split == "{", 1, -1)
if (grepl(x = input, pattern = "(\\\\{)|(\\\\})", perl = TRUE)) {
warning(c(
"It seems that you have some escaped brackets in your input, ",
"this might not work as expected."
))
}
if (sum(checker) != 0) stop("Number of opening { and closing } must match.", call. = FALSE)
if (any(cumsum(checker) < 0)) stop("Input has at least one unpaired '{'.", call. = FALSE)
}
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ articles:
reference:
- title: RTF Table
contents:
- "rtf_rich_text"
- "rtf_page"
- "rtf_page_header"
- "rtf_page_footer"
Expand Down
51 changes: 51 additions & 0 deletions man/rtf_rich_text.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions tests/testthat/test-developer-testing-rtf_rich_text.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
test_that("rtf_rich_text fundamentally works.", {
output <- rtf_rich_text(
text = "This is {.emph important}. This is {.blah relevant}.",
theme = list(
.emph = list(color = "blue", `format` = "b"),
.blah = list(color = "red")
)
)

expectation <- rtf_text(paste0(
"This is ",
rtf_text("important", color = "blue", `format` = "b"),
". This is ",
rtf_text("relevant", color = "red"),
"."
))

expect_equal(output, expectation)
})

test_that("rtf_rich_text works with example from check_args.", {
output <- r2rtf:::rtf_paragraph(
r2rtf:::rtf_rich_text(
text = "3.5{^\\dagger}\n{.red red} {.hl highlight}",
theme = list(
.red = list(color = "red"),
.hl = list(background_color = "yellow")
)
)
)

expectation <- r2rtf:::rtf_paragraph(
r2rtf:::rtf_text(paste0(
"3.5{^\\dagger}\\line ",
r2rtf:::rtf_text("red", color = "red"),
" ",
r2rtf:::rtf_text("highlight", background_color = "yellow")
))
)

expect_equal(output, expectation)
})