Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Always read and write in UTF-8 #649

Merged
merged 3 commits into from
Aug 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Suggests:
LinkingTo:
Rcpp
VignetteBuilder: knitr
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 6.0.1.9000
Remotes:
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@
* If a package logo exists (`man/figures/logo.png`) it will be automatically
included in generated package docs (#609).

* roxygen2 now always reads and writes using UTF-8 encoding. If used with a
package that does not have `Encoding: UTF-8` in the DESCRIPTION, you'll
now get a warning (#564, #592).

* Usage for data objects now correctly generated, avoiding double escaping
other components of usage (#562).

Expand Down
8 changes: 0 additions & 8 deletions R/enc.R

This file was deleted.

2 changes: 1 addition & 1 deletion R/object-usage.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ usage_args <- function(args) {
}
arg_to_text <- function(arg) {
if (is.missing.arg(arg)) return("")
text <- deparse(arg, backtick = TRUE, width.cutoff = 500L)
text <- enc2utf8(deparse(arg, backtick = TRUE, width.cutoff = 500L))
text <- paste0(text, collapse = "\n")
Encoding(text) <- "UTF-8"

Expand Down
6 changes: 2 additions & 4 deletions R/parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,11 @@ parse_package <- function(path = ".",
registry = default_tags(),
global_options = list()
) {
desc <- read_pkg_description(path)

files <- package_files(path)
list_of_blocks <- lapply(files, tokenize_file,
registry = registry,
global_options = global_options,
file_encoding = desc$Encoding %||% "UTF-8"
global_options = global_options
)

blocks <- purrr::flatten(list_of_blocks)
Expand Down Expand Up @@ -83,7 +81,7 @@ parse_text <- function(text,
global_options = list()) {

file <- tempfile()
writeLines(text, file)
write_lines(text, file)
on.exit(unlink(file))

parse_file(
Expand Down
2 changes: 1 addition & 1 deletion R/rd.R
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ topic_add_examples <- function(topic, block, base_path) {
next
}

code <- readLines(path)
code <- read_lines(path)
examples <- escape_examples(code)

topic$add_simple_field("examples", examples)
Expand Down
2 changes: 1 addition & 1 deletion R/roclet.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ roc_proc_text <- function(roclet, input, registry = default_tags(),
stopifnot(is.roclet(roclet))

file <- tempfile()
writeLines(input, file)
write_lines(input, file)
on.exit(unlink(file))

env <- env_file(file)
Expand Down
5 changes: 5 additions & 0 deletions R/roxygenize.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ roxygenize <- function(package.dir = ".",
base_path <- normalizePath(package.dir)
is_first <- roxygen_setup(base_path)

encoding <- desc::desc_get("Encoding", file = base_path)[[1]]
if (!identical(encoding, "UTF-8")) {
warning("roxygen2 requires Encoding: UTF-8", call. = FALSE)
}

options <- load_options(base_path)
roclets <- roclets %||% options$roclets

Expand Down
6 changes: 3 additions & 3 deletions R/safety.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@ first_time <- function(path) {
made_by_roxygen <- function(path) {
if (!file.exists(path)) return(TRUE)

first <- readLines(path, n = 1)
first <- read_lines(path, n = 1)
check_made_by(first)
}

add_made_by_roxygen <- function(path, comment) {
if (!file.exists(path)) stop("Can't find ", path, call. = FALSE)

lines <- readLines(path, warn = FALSE)
lines <- read_lines(path)
if (check_made_by(lines[1])) return()

writeLines(c(made_by(comment), lines), path)
write_lines(c(made_by(comment), lines), path)
}

check_made_by <- function(first) {
Expand Down
5 changes: 2 additions & 3 deletions R/tokenize.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# Returns list of roxy_blocks
tokenize_file <- function(file,
registry = list(),
global_options = list(),
file_encoding = "UTF-8"
global_options = list()
) {
lines <- read_lines_enc(file, file_encoding = file_encoding)
lines <- read_lines(file)

parsed <- parse(
text = lines,
Expand Down
10 changes: 10 additions & 0 deletions R/utils-io.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
readLines <- function(...) stop("Use read_lines!")
writeLines <- function(...) stop("Use write_lines!")

read_lines <- function(path, n = -1L) {
base::readLines(path, n = n, encoding = "UTF-8", warn = FALSE)
}

write_lines <- function(text, path) {
base::writeLines(enc2utf8(text), path, useBytes = TRUE)
}
4 changes: 2 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ write_if_different <- function(path, contents, check = TRUE) {
FALSE
} else {
cat(sprintf('Writing %s\n', name))
writeLines(contents, path, useBytes = TRUE)
write_lines(contents, path)
TRUE
}
}
Expand Down Expand Up @@ -113,7 +113,7 @@ ignore_files <- function(rfiles, path) {
rfiles_relative <- sub("^[/]*", "", rfiles_relative)

# Remove any files that match any perl-compatible regexp
patterns <- readLines(rbuildignore, warn = FALSE)
patterns <- read_lines(rbuildignore)
patterns <- patterns[patterns != ""]
if (length(patterns) == 0L) {
return(rfiles)
Expand Down
5 changes: 2 additions & 3 deletions tests/testthat/test-Rbuildignore.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@ test_that("roxygen ignores files with matching pattern in .Rbuildignore", {

expect_equal(basename(package_files(test_pkg)), c("a.R", "ignore_me.R"))

#writeLines("^R/ignore_me.R$", file.path(test_pkg, ".Rbuildignore"))
writeChar("^R/ignore_me.R$\n", file.path(test_pkg, ".Rbuildignore"), eos = NULL)
write_lines("^R/ignore_me.R$\n", file.path(test_pkg, ".Rbuildignore"))
expect_equal(basename(package_files(test_pkg)), "a.R")
})

test_that("roxygen works with empty lines in .Rbuildignore", {
test_pkg <- temp_copy_pkg(test_path("testRbuildignore"))
on.exit(unlink(test_pkg, recursive = TRUE))

writeChar("^R/ignore_me.R$\n\n.nonexistentfile", file.path(test_pkg, ".Rbuildignore"), eos = NULL)
write_lines("^R/ignore_me.R$\n\n.nonexistentfile", file.path(test_pkg, ".Rbuildignore"))
expect_equal(basename(package_files(test_pkg)), "a.R")
})
19 changes: 0 additions & 19 deletions tests/testthat/test-nonASCII.R

This file was deleted.

35 changes: 35 additions & 0 deletions tests/testthat/test-utf8.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
context("nonASCII")

test_that("can generate nonASCII document", {
test_pkg <- temp_copy_pkg(test_path('testNonASCII'))
on.exit(unlink(test_pkg, recursive = TRUE), add = TRUE)

expect_output(roxygenise(test_pkg, roclets = "rd"), "printChineseMsg[.]Rd")

rd_path <- file.path(test_pkg, "man", "printChineseMsg.Rd")
expect_true(file.exists(rd_path))
rd <- read_lines(rd_path)

expect_true(any(grepl("\u6211\u7231\u4e2d\u6587", rd)))
expect_true(any(grepl("\u4e2d\u6587\u6ce8\u91ca", rd)))

# Shouldn't change again
expect_output(roxygenise(test_pkg, roclets = "rd"), NA)
})


test_that("unicode escapes are ok", {
test_pkg <- temp_copy_pkg(test_path('testUtf8Escape'))
on.exit(unlink(test_pkg, recursive = TRUE), add = TRUE)

expect_output(roxygenise(test_pkg, roclets = "rd"), "a[.]Rd")

rd_path <- file.path(test_pkg, "man", "a.Rd")
expect_true(file.exists(rd_path))
rd <- read_lines(rd_path)

expect_true(any(grepl("7\u00b0C", rd)))

# Shouldn't change again
expect_output(roxygenise(test_pkg, roclets = "rd"), NA)
})
1 change: 1 addition & 0 deletions tests/testthat/testEagerData/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ Description:
Author: Hadley <[email protected]>
Maintainer: Hadley <[email protected]>
Version: 0.1
Encoding: UTF-8
1 change: 1 addition & 0 deletions tests/testthat/testLazyData/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ Author: Hadley <[email protected]>
Maintainer: Hadley <[email protected]>
Version: 0.1
LazyData: TRUE
Encoding: UTF-8
2 changes: 1 addition & 1 deletion tests/testthat/testNonASCII/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ License: GPL-2
Description:
Author: Shrektan <[email protected]>
Maintainer: Shrektan <[email protected]>
Encoding: GB2312
Encoding: UTF-8
Version: 0.1
9 changes: 3 additions & 6 deletions tests/testthat/testNonASCII/R/a.r
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
# This script is intended to be saved in GB2312 to test if non UTF-8 encoding is
# supported.

#' ����ע��
#' 中文注释
#'
#' @note �Ұ����ġ�
#' @note 我爱中文。
printChineseMsg <- function() {
message("����GB2312�������ַ���")
message("我是UTF8的中文字符。")
}
8 changes: 8 additions & 0 deletions tests/testthat/testUtf8Escape/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Package: testUtf8Escape
Title: Check that utf8 escapes are round tripped ok
License: GPL-2
Description:
Author: Hadley <[email protected]>
Maintainer: Hadley <[email protected]>
Encoding: UTF-8
Version: 0.1
4 changes: 4 additions & 0 deletions tests/testthat/testUtf8Escape/R/a.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#' Title
#'
#' @param b Some label
a <- function(b = '7°C') 1