r-lib · hadley · Aug 23, 2017 · Aug 17, 2017 · Aug 18, 2017 · Aug 22, 2017
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -39,6 +39,7 @@ Suggests:
 LinkingTo: 
     Rcpp
 VignetteBuilder: knitr
+Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 6.0.1.9000
 Remotes: 

diff --git a/NEWS.md b/NEWS.md
@@ -81,6 +81,10 @@
 * If a package logo exists (`man/figures/logo.png`) it will be automatically
   included in generated package docs (#609).
 
+* roxygen2 now always reads and writes using UTF-8 encoding. If used with a
+  package that does not have `Encoding: UTF-8` in the DESCRIPTION, you'll
+  now get a warning (#564, #592).
+
 * Usage for data objects now correctly generated, avoiding double escaping
   other components of usage (#562).
 

diff --git a/R/enc.R b/R/enc.R
diff --git a/R/object-usage.R b/R/object-usage.R
@@ -84,7 +84,7 @@ usage_args <- function(args) {
   }
   arg_to_text <- function(arg) {
     if (is.missing.arg(arg)) return("")
-    text <- deparse(arg, backtick = TRUE, width.cutoff = 500L)
+    text <- enc2utf8(deparse(arg, backtick = TRUE, width.cutoff = 500L))
     text <- paste0(text, collapse = "\n")
     Encoding(text) <- "UTF-8"
 

diff --git a/R/parse.R b/R/parse.R
@@ -30,13 +30,11 @@ parse_package <- function(path = ".",
                           registry = default_tags(),
                           global_options = list()
                           ) {
-  desc <- read_pkg_description(path)
 
   files <- package_files(path)
   list_of_blocks <- lapply(files, tokenize_file,
     registry = registry,
-    global_options = global_options,
-    file_encoding = desc$Encoding %||% "UTF-8"
+    global_options = global_options
   )
 
   blocks <- purrr::flatten(list_of_blocks)
@@ -83,7 +81,7 @@ parse_text <- function(text,
                        global_options = list()) {
 
   file <- tempfile()
-  writeLines(text, file)
+  write_lines(text, file)
   on.exit(unlink(file))
 
   parse_file(

diff --git a/R/rd.R b/R/rd.R
@@ -366,7 +366,7 @@ topic_add_examples <- function(topic, block, base_path) {
       next
     }
 
-    code <- readLines(path)
+    code <- read_lines(path)
     examples <- escape_examples(code)
 
     topic$add_simple_field("examples", examples)

diff --git a/R/roclet.R b/R/roclet.R
@@ -125,7 +125,7 @@ roc_proc_text <- function(roclet, input, registry = default_tags(),
   stopifnot(is.roclet(roclet))
 
   file <- tempfile()
-  writeLines(input, file)
+  write_lines(input, file)
   on.exit(unlink(file))
 
   env <- env_file(file)

diff --git a/R/roxygenize.R b/R/roxygenize.R
@@ -32,6 +32,11 @@ roxygenize <- function(package.dir = ".",
   base_path <- normalizePath(package.dir)
   is_first <- roxygen_setup(base_path)
 
+  encoding <- desc::desc_get("Encoding", file = base_path)[[1]]
+  if (!identical(encoding, "UTF-8")) {
+    warning("roxygen2 requires Encoding: UTF-8", call. = FALSE)
+  }
+
   options <- load_options(base_path)
   roclets <- roclets %||% options$roclets
 

diff --git a/R/safety.R b/R/safety.R
@@ -14,17 +14,17 @@ first_time <- function(path) {
 made_by_roxygen <- function(path) {
   if (!file.exists(path)) return(TRUE)
 
-  first <- readLines(path, n = 1)
+  first <- read_lines(path, n = 1)
   check_made_by(first)
 }
 
 add_made_by_roxygen <- function(path, comment) {
   if (!file.exists(path)) stop("Can't find ", path, call. = FALSE)
 
-  lines <- readLines(path, warn = FALSE)
+  lines <- read_lines(path)
   if (check_made_by(lines[1])) return()
 
-  writeLines(c(made_by(comment), lines), path)
+  write_lines(c(made_by(comment), lines), path)
 }
 
 check_made_by <- function(first) {

diff --git a/R/tokenize.R b/R/tokenize.R
@@ -1,10 +1,9 @@
 # Returns list of roxy_blocks
 tokenize_file <- function(file,
                           registry = list(),
-                          global_options = list(),
-                          file_encoding = "UTF-8"
+                          global_options = list()
                           ) {
-  lines <- read_lines_enc(file, file_encoding = file_encoding)
+  lines <- read_lines(file)
 
   parsed <- parse(
     text = lines,

diff --git a/R/utils-io.R b/R/utils-io.R
@@ -0,0 +1,10 @@
+readLines <- function(...) stop("Use read_lines!")
+writeLines <- function(...) stop("Use write_lines!")
+
+read_lines <- function(path, n = -1L) {
+  base::readLines(path, n = n, encoding = "UTF-8", warn = FALSE)
+}
+
+write_lines <- function(text, path) {
+  base::writeLines(enc2utf8(text), path, useBytes = TRUE)
+}
diff --git a/R/utils.R b/R/utils.R
@@ -80,7 +80,7 @@ write_if_different <- function(path, contents, check = TRUE) {
     FALSE
   } else {
     cat(sprintf('Writing %s\n', name))
-    writeLines(contents, path, useBytes = TRUE)
+    write_lines(contents, path)
     TRUE
   }
 }
@@ -113,7 +113,7 @@ ignore_files <- function(rfiles, path) {
   rfiles_relative <- sub("^[/]*", "", rfiles_relative)
 
   # Remove any files that match any perl-compatible regexp
-  patterns <- readLines(rbuildignore, warn = FALSE)
+  patterns <- read_lines(rbuildignore)
   patterns <- patterns[patterns != ""]
   if (length(patterns) == 0L) {
     return(rfiles)

diff --git a/tests/testthat/test-Rbuildignore.R b/tests/testthat/test-Rbuildignore.R
@@ -6,15 +6,14 @@ test_that("roxygen ignores files with matching pattern in .Rbuildignore", {
 
   expect_equal(basename(package_files(test_pkg)), c("a.R", "ignore_me.R"))
 
-  #writeLines("^R/ignore_me.R$", file.path(test_pkg, ".Rbuildignore"))
-  writeChar("^R/ignore_me.R$\n", file.path(test_pkg, ".Rbuildignore"), eos = NULL)
+  write_lines("^R/ignore_me.R$\n", file.path(test_pkg, ".Rbuildignore"))
   expect_equal(basename(package_files(test_pkg)), "a.R")
 })
 
 test_that("roxygen works with empty lines in .Rbuildignore", {
   test_pkg <- temp_copy_pkg(test_path("testRbuildignore"))
   on.exit(unlink(test_pkg, recursive = TRUE))
 
-  writeChar("^R/ignore_me.R$\n\n.nonexistentfile", file.path(test_pkg, ".Rbuildignore"), eos = NULL)
+  write_lines("^R/ignore_me.R$\n\n.nonexistentfile", file.path(test_pkg, ".Rbuildignore"))
   expect_equal(basename(package_files(test_pkg)), "a.R")
 })
diff --git a/tests/testthat/test-nonASCII.R b/tests/testthat/test-nonASCII.R
diff --git a/tests/testthat/test-utf8.R b/tests/testthat/test-utf8.R
@@ -0,0 +1,35 @@
+context("nonASCII")
+
+test_that("can generate nonASCII document", {
+  test_pkg <- temp_copy_pkg(test_path('testNonASCII'))
+  on.exit(unlink(test_pkg, recursive = TRUE), add = TRUE)
+
+  expect_output(roxygenise(test_pkg, roclets = "rd"), "printChineseMsg[.]Rd")
+
+  rd_path <- file.path(test_pkg, "man", "printChineseMsg.Rd")
+  expect_true(file.exists(rd_path))
+  rd <- read_lines(rd_path)
+
+  expect_true(any(grepl("\u6211\u7231\u4e2d\u6587", rd)))
+  expect_true(any(grepl("\u4e2d\u6587\u6ce8\u91ca", rd)))
+
+  # Shouldn't change again
+  expect_output(roxygenise(test_pkg, roclets = "rd"), NA)
+})
+
+
+test_that("unicode escapes are ok", {
+  test_pkg <- temp_copy_pkg(test_path('testUtf8Escape'))
+  on.exit(unlink(test_pkg, recursive = TRUE), add = TRUE)
+
+  expect_output(roxygenise(test_pkg, roclets = "rd"), "a[.]Rd")
+
+  rd_path <- file.path(test_pkg, "man", "a.Rd")
+  expect_true(file.exists(rd_path))
+  rd <- read_lines(rd_path)
+
+  expect_true(any(grepl("7\u00b0C", rd)))
+
+  # Shouldn't change again
+  expect_output(roxygenise(test_pkg, roclets = "rd"), NA)
+})
diff --git a/tests/testthat/testEagerData/DESCRIPTION b/tests/testthat/testEagerData/DESCRIPTION
@@ -5,3 +5,4 @@ Description:
 Author: Hadley <[email protected]>
 Maintainer: Hadley <[email protected]>
 Version: 0.1
+Encoding: UTF-8
diff --git a/tests/testthat/testLazyData/DESCRIPTION b/tests/testthat/testLazyData/DESCRIPTION
@@ -6,3 +6,4 @@ Author: Hadley <[email protected]>
 Maintainer: Hadley <[email protected]>
 Version: 0.1
 LazyData: TRUE
+Encoding: UTF-8
diff --git a/tests/testthat/testNonASCII/DESCRIPTION b/tests/testthat/testNonASCII/DESCRIPTION
@@ -4,5 +4,5 @@ License: GPL-2
 Description:
 Author: Shrektan <[email protected]>
 Maintainer: Shrektan <[email protected]>
-Encoding: GB2312
+Encoding: UTF-8
 Version: 0.1
diff --git a/tests/testthat/testNonASCII/R/a.r b/tests/testthat/testNonASCII/R/a.r
@@ -1,9 +1,6 @@
-# This script is intended to be saved in GB2312 to test if non UTF-8 encoding is
-# supported.
-
-#' ����ע��
+#' 中文注释
 #'
-#' @note �Ұ����ġ�
+#' @note 我爱中文。
 printChineseMsg <- function() {
-  message("����GB2312�������ַ���")
+  message("我是UTF8的中文字符。")
 }
diff --git a/tests/testthat/testUtf8Escape/DESCRIPTION b/tests/testthat/testUtf8Escape/DESCRIPTION
@@ -0,0 +1,8 @@
+Package: testUtf8Escape
+Title: Check that utf8 escapes are round tripped ok
+License: GPL-2
+Description:
+Author: Hadley <[email protected]>
+Maintainer: Hadley <[email protected]>
+Encoding: UTF-8
+Version: 0.1
diff --git a/tests/testthat/testUtf8Escape/R/a.r b/tests/testthat/testUtf8Escape/R/a.r
@@ -0,0 +1,4 @@
+#' Title
+#'
+#' @param b Some label
+a <- function(b = '7°C') 1