From b9cde49ddefacae5be421831836eec4de99bea1c Mon Sep 17 00:00:00 2001
From: hadley <h.wickham@gmail.com>
Date: Thu, 17 Aug 2017 11:10:45 -0500
Subject: [PATCH] Always read and write in UTF-8

* Helpers read_lines and write_lines do the right thing
* readLines() and writeLines() through errors to prevent accidental re-use in the future
* Warn if package encoding is not utf-8

Fixes #564. Fixes #592
---
 DESCRIPTION                               |  1 +
 NEWS.md                                   |  4 +++
 R/enc.R                                   |  8 ------
 R/parse.R                                 |  8 +++---
 R/rd.R                                    |  2 +-
 R/roxygenize.R                            |  5 ++++
 R/safety.R                                |  6 ++--
 R/utils-io.R                              | 16 +++++++++++
 R/utils.R                                 |  4 +--
 tests/testthat/test-Rbuildignore.R        |  5 ++--
 tests/testthat/test-nonASCII.R            | 19 ------------
 tests/testthat/test-utf8.R                | 35 +++++++++++++++++++++++
 tests/testthat/testEagerData/DESCRIPTION  |  1 +
 tests/testthat/testLazyData/DESCRIPTION   |  1 +
 tests/testthat/testNonASCII/DESCRIPTION   |  2 +-
 tests/testthat/testNonASCII/R/a.r         |  9 ++----
 tests/testthat/testUtf8Escape/DESCRIPTION |  8 ++++++
 tests/testthat/testUtf8Escape/R/a.r       |  4 +++
 18 files changed, 91 insertions(+), 47 deletions(-)
 delete mode 100644 R/enc.R
 create mode 100644 R/utils-io.R
 delete mode 100644 tests/testthat/test-nonASCII.R
 create mode 100644 tests/testthat/test-utf8.R
 create mode 100644 tests/testthat/testUtf8Escape/DESCRIPTION
 create mode 100644 tests/testthat/testUtf8Escape/R/a.r

diff --git a/DESCRIPTION b/DESCRIPTION
index 6edaae114..546ad2d1c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -38,6 +38,7 @@ Suggests:
 LinkingTo: 
     Rcpp
 VignetteBuilder: knitr
+Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 6.0.1.9000
 Remotes: 
diff --git a/NEWS.md b/NEWS.md
index 3cb7493f6..ed8b93514 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -25,6 +25,10 @@
 * If a package logo exists (`man/figures/logo.png`) it will be automatically
   included in generated package docs (#609).
 
+* roxygen2 now always reads and writes using UTF-8 encoding. If used with a
+  package that does not have `Encoding: UTF-8` in the DESCRIPTION, you'll
+  now get a warning (#564, #592).
+
 * Usage for data objects now correctly generated, avoiding double escaping
   other components of usage (#562).
 
diff --git a/R/enc.R b/R/enc.R
deleted file mode 100644
index b7811ec79..000000000
--- a/R/enc.R
+++ /dev/null
@@ -1,8 +0,0 @@
-read_lines_enc <- function(path, file_encoding = "UTF-8", n = -1L, ok = TRUE, skipNul = FALSE) {
-  con <- file(path, encoding = file_encoding)
-  on.exit(close(con), add = TRUE)
-
-  lines <- readLines(con, warn = FALSE, n = n, ok = ok, skipNul = skipNul)
-  Encoding(lines) <- "UTF-8"
-  lines
-}
diff --git a/R/parse.R b/R/parse.R
index 73eb82dba..02533c48b 100644
--- a/R/parse.R
+++ b/R/parse.R
@@ -4,7 +4,7 @@ parse_package <- function(base_path, load_code, registry, global_options = list(
 
   files <- package_files(base_path)
   parsed <- lapply(files, parse_blocks, env = env, registry = registry,
-                   global_options = global_options, fileEncoding = desc$Encoding %||% "UTF-8")
+                   global_options = global_options)
   blocks <- unlist(parsed, recursive = FALSE)
 
   list(env = env, blocks = blocks)
@@ -54,7 +54,7 @@ parse_code <- function(file,
 
 parse_text <- function(text, registry = default_tags(), global_options = list()) {
   file <- tempfile()
-  writeLines(text, file)
+  write_lines(text, file)
   on.exit(unlink(file))
 
   env <- new.env(parent = parent.env(globalenv()))
@@ -67,9 +67,9 @@ parse_text <- function(text, registry = default_tags(), global_options = list())
   list(env = env, blocks = blocks)
 }
 
-parse_blocks <- function(file, env, registry, global_options = list(), fileEncoding = "UTF-8") {
+parse_blocks <- function(file, env, registry, global_options = list()) {
 
-  lines <- read_lines_enc(file, file_encoding = fileEncoding)
+  lines <- read_lines(file)
   parsed <- parse(text = lines, keep.source = TRUE, srcfile = srcfilecopy(file, lines, isFile = TRUE))
   if (length(parsed) == 0) return()
 
diff --git a/R/rd.R b/R/rd.R
index a5acc59b9..0b57f200e 100644
--- a/R/rd.R
+++ b/R/rd.R
@@ -356,7 +356,7 @@ topic_add_examples <- function(topic, block, base_path) {
       next
     }
 
-    code <- readLines(path)
+    code <- read_lines(path)
     examples <- escape_examples(code)
 
     topic$add_simple_field("examples", examples)
diff --git a/R/roxygenize.R b/R/roxygenize.R
index 51df40a39..da84671c9 100644
--- a/R/roxygenize.R
+++ b/R/roxygenize.R
@@ -43,6 +43,11 @@ roxygenize <- function(package.dir = ".",
   dir.create(man_path, recursive = TRUE, showWarnings = FALSE)
   update_roxygen_version(base_path)
 
+  encoding <- desc::desc_get("Encoding", file = base_path)[[1]]
+  if (!identical(encoding, "UTF-8")) {
+    warning("roxygen2 requires Encoding: UTF-8", call. = FALSE)
+  }
+
   options <- load_options(base_path)
   roclets <- roclets %||% options$roclets
 
diff --git a/R/safety.R b/R/safety.R
index bf6cdf67f..7d11d7e27 100644
--- a/R/safety.R
+++ b/R/safety.R
@@ -14,17 +14,17 @@ first_time <- function(path) {
 made_by_roxygen <- function(path) {
   if (!file.exists(path)) return(TRUE)
 
-  first <- readLines(path, n = 1)
+  first <- read_lines(path, n = 1)
   check_made_by(first)
 }
 
 add_made_by_roxygen <- function(path, comment) {
   if (!file.exists(path)) stop("Can't find ", path, call. = FALSE)
 
-  lines <- readLines(path, warn = FALSE)
+  lines <- read_lines(path)
   if (check_made_by(lines[1])) return()
 
-  writeLines(c(made_by(comment), lines), path)
+  write_lines(c(made_by(comment), lines), path)
 }
 
 check_made_by <- function(first) {
diff --git a/R/utils-io.R b/R/utils-io.R
new file mode 100644
index 000000000..fb305ded2
--- /dev/null
+++ b/R/utils-io.R
@@ -0,0 +1,16 @@
+readLines <- function(...) stop("Use read_lines!")
+writeLines <- function(...) stop("Use write_lines!")
+
+read_lines <- function(path, n = -1L) {
+  con <- file(path, open = "r", encoding = "utf-8")
+  on.exit(close(con))
+
+  base::readLines(con, n = n, warn = FALSE)
+}
+
+write_lines <- function(text, path) {
+  con <- file(path, open = "w", encoding = "utf-8")
+  on.exit(close(con))
+
+  base::writeLines(text, con)
+}
diff --git a/R/utils.R b/R/utils.R
index 86955dec9..1c186a531 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -80,7 +80,7 @@ write_if_different <- function(path, contents, check = TRUE) {
     FALSE
   } else {
     cat(sprintf('Writing %s\n', name))
-    writeLines(contents, path, useBytes = TRUE)
+    write_lines(contents, path)
     TRUE
   }
 }
@@ -113,7 +113,7 @@ ignore_files <- function(rfiles, path) {
   rfiles_relative <- sub("^[/]*", "", rfiles_relative)
 
   # Remove any files that match any perl-compatible regexp
-  patterns <- readLines(rbuildignore, warn = FALSE)
+  patterns <- read_lines(rbuildignore)
   patterns <- patterns[patterns != ""]
   matches <- lapply(patterns, grepl, rfiles_relative, perl = TRUE)
   matches <- Reduce("|", matches)
diff --git a/tests/testthat/test-Rbuildignore.R b/tests/testthat/test-Rbuildignore.R
index 68b310d0b..071b0218f 100644
--- a/tests/testthat/test-Rbuildignore.R
+++ b/tests/testthat/test-Rbuildignore.R
@@ -6,8 +6,7 @@ test_that("roxygen ignores files with matching pattern in .Rbuildignore", {
 
   expect_equal(basename(package_files(test_pkg)), c("a.R", "ignore_me.R"))
 
-  #writeLines("^R/ignore_me.R$", file.path(test_pkg, ".Rbuildignore"))
-  writeChar("^R/ignore_me.R$\n", file.path(test_pkg, ".Rbuildignore"), eos = NULL)
+  write_lines("^R/ignore_me.R$\n", file.path(test_pkg, ".Rbuildignore"))
   expect_equal(basename(package_files(test_pkg)), "a.R")
 })
 
@@ -15,6 +14,6 @@ test_that("roxygen works with empty lines in .Rbuildignore", {
   test_pkg <- temp_copy_pkg(test_path("testRbuildignore"))
   on.exit(unlink(test_pkg, recursive = TRUE))
 
-  writeChar("^R/ignore_me.R$\n\n.nonexistentfile", file.path(test_pkg, ".Rbuildignore"), eos = NULL)
+  write_lines("^R/ignore_me.R$\n\n.nonexistentfile", file.path(test_pkg, ".Rbuildignore"))
   expect_equal(basename(package_files(test_pkg)), "a.R")
 })
diff --git a/tests/testthat/test-nonASCII.R b/tests/testthat/test-nonASCII.R
deleted file mode 100644
index 26202ceae..000000000
--- a/tests/testthat/test-nonASCII.R
+++ /dev/null
@@ -1,19 +0,0 @@
-context("nonASCII")
-
-test_that("can generate nonASCII document", {
-  test_pkg <- temp_copy_pkg('testNonASCII')
-  on.exit(unlink(test_pkg, recursive = TRUE), add = TRUE)
-
-  expect_output(roxygenise(test_pkg, roclets = "rd"), "printChineseMsg[.]Rd")
-  expect_true(file.exists(file.path(test_pkg, "man", "printChineseMsg.Rd")))
-
-  cnChar <- read_lines_enc(file.path(test_pkg, "man", "printChineseMsg.Rd"))
-
-  # Because the parse in testthat::test don't specify encoding to UTF-8 as well,
-  # so we have to use unicode escapes.
-  expect_true(any(grepl("\u6211\u7231\u4e2d\u6587", cnChar)))
-  expect_true(any(grepl("\u4e2d\u6587\u6ce8\u91ca", cnChar)))
-
-  # No output on second run
-  expect_output(roxygenise(test_pkg, roclets = "rd"), NA)
-})
diff --git a/tests/testthat/test-utf8.R b/tests/testthat/test-utf8.R
new file mode 100644
index 000000000..53b72d683
--- /dev/null
+++ b/tests/testthat/test-utf8.R
@@ -0,0 +1,35 @@
+context("nonASCII")
+
+test_that("can generate nonASCII document", {
+  test_pkg <- temp_copy_pkg(test_path('testNonASCII'))
+  on.exit(unlink(test_pkg, recursive = TRUE), add = TRUE)
+
+  expect_output(roxygenise(test_pkg, roclets = "rd"), "printChineseMsg[.]Rd")
+
+  rd_path <- file.path(test_pkg, "man", "printChineseMsg.Rd")
+  expect_true(file.exists(rd_path))
+  rd <- read_lines(rd_path)
+
+  expect_true(any(grepl("\u6211\u7231\u4e2d\u6587", rd)))
+  expect_true(any(grepl("\u4e2d\u6587\u6ce8\u91ca", rd)))
+
+  # Shouldn't change again
+  expect_output(roxygenise(test_pkg, roclets = "rd"), NA)
+})
+
+
+test_that("unicode escapes are ok", {
+  test_pkg <- temp_copy_pkg(test_path('testUtf8Escape'))
+  on.exit(unlink(test_pkg, recursive = TRUE), add = TRUE)
+
+  expect_output(roxygenise(test_pkg, roclets = "rd"), "a[.]Rd")
+
+  rd_path <- file.path(test_pkg, "man", "a.Rd")
+  expect_true(file.exists(rd_path))
+  rd <- read_lines(rd_path)
+
+  expect_true(any(grepl("7\u00b0C", rd)))
+
+  # Shouldn't change again
+  expect_output(roxygenise(test_pkg, roclets = "rd"), NA)
+})
diff --git a/tests/testthat/testEagerData/DESCRIPTION b/tests/testthat/testEagerData/DESCRIPTION
index 90100c7e6..6aab7f12c 100644
--- a/tests/testthat/testEagerData/DESCRIPTION
+++ b/tests/testthat/testEagerData/DESCRIPTION
@@ -5,3 +5,4 @@ Description:
 Author: Hadley <h.wickham@gmail.com>
 Maintainer: Hadley <h.wickham@gmail.com>
 Version: 0.1
+Encoding: UTF-8
diff --git a/tests/testthat/testLazyData/DESCRIPTION b/tests/testthat/testLazyData/DESCRIPTION
index 25165991d..383391bee 100644
--- a/tests/testthat/testLazyData/DESCRIPTION
+++ b/tests/testthat/testLazyData/DESCRIPTION
@@ -6,3 +6,4 @@ Author: Hadley <h.wickham@gmail.com>
 Maintainer: Hadley <h.wickham@gmail.com>
 Version: 0.1
 LazyData: TRUE
+Encoding: UTF-8
diff --git a/tests/testthat/testNonASCII/DESCRIPTION b/tests/testthat/testNonASCII/DESCRIPTION
index 9e5ca820e..ae54a67cb 100644
--- a/tests/testthat/testNonASCII/DESCRIPTION
+++ b/tests/testthat/testNonASCII/DESCRIPTION
@@ -4,5 +4,5 @@ License: GPL-2
 Description:
 Author: Shrektan <shrektan@126.com>
 Maintainer: Shrektan <shrektan@126.com>
-Encoding: GB2312
+Encoding: UTF-8
 Version: 0.1
diff --git a/tests/testthat/testNonASCII/R/a.r b/tests/testthat/testNonASCII/R/a.r
index 770d735a9..c76b2d89d 100644
--- a/tests/testthat/testNonASCII/R/a.r
+++ b/tests/testthat/testNonASCII/R/a.r
@@ -1,9 +1,6 @@
-# This script is intended to be saved in GB2312 to test if non UTF-8 encoding is
-# supported.
-
-#' ÖÐÎÄ×¢ÊÍ
+#' ä¸­æ–‡æ³¨é‡Š
 #'
-#' @note ÎÒ°®ÖÐÎÄ¡£
+#' @note æˆ‘çˆ±ä¸­æ–‡ã€‚
 printChineseMsg <- function() {
-  message("ÎÒÊÇGB2312µÄÖÐÎÄ×Ö·û¡£")
+  message("æˆ‘æ˜¯UTF8çš„ä¸­æ–‡å­—ç¬¦ã€‚")
 }
diff --git a/tests/testthat/testUtf8Escape/DESCRIPTION b/tests/testthat/testUtf8Escape/DESCRIPTION
new file mode 100644
index 000000000..c0c5348b7
--- /dev/null
+++ b/tests/testthat/testUtf8Escape/DESCRIPTION
@@ -0,0 +1,8 @@
+Package: testUtf8Escape
+Title: Check that utf8 escapes are round tripped ok
+License: GPL-2
+Description:
+Author: Hadley <hadley@rstudio.com>
+Maintainer: Hadley <hadley@rstudio.com>
+Encoding: UTF-8
+Version: 0.1
diff --git a/tests/testthat/testUtf8Escape/R/a.r b/tests/testthat/testUtf8Escape/R/a.r
new file mode 100644
index 000000000..4d88cd70c
--- /dev/null
+++ b/tests/testthat/testUtf8Escape/R/a.r
@@ -0,0 +1,4 @@
+#' Title
+#'
+#' @param b Some label
+a <- function(b = '7Â°C') 1