Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Capture parser warnings as lints #2792

Merged
merged 13 commits into from
Mar 4, 2025
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* `undesirable_operator_linter()` lints operators in prefix form, e.g. `` `%%`(x, 2)`` (#1910, @MichaelChirico). Disable this by setting `call_is_undesirable=FALSE`.
* `indentation_linter()` handles `for` un-braced for loops correctly (#2564, @MichaelChirico).
* Setting `exclusions` supports globs like `knitr*` to exclude files/directories with a pattern (#1554, @MichaelChirico).
* `get_source_expression()` captures warnings emitted by the R parser (currently always for mis-specified literal integers like `1.1L`) and `lint()` returns them as lints (#2065, @MichaelChirico).
* `object_name_linter()` and `object_length_linter()` apply to objects assigned with `assign()` or generics created with `setGeneric()` (#1665, @MichaelChirico).
* `object_usage_linter()` gains argument `interpret_extensions` to govern which false positive-prone common syntaxes should be checked for used objects (#1472, @MichaelChirico). Currently `"glue"` (renamed from earlier argument `interpret_glue`) and `"rlang"` are supported. The latter newly covers usage of the `.env` pronoun like `.env$key`, where `key` was previously missed as being a used variable.

Expand Down
156 changes: 114 additions & 42 deletions R/get_source_expressions.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#' }
#' }
#' \item{error}{A `Lint` object describing any parsing error.}
#' \item{warning}{A `lints` object describing any parsing warning.}
#' \item{lines}{The [readLines()] output for this file.}
#' }
#'
Expand All @@ -72,7 +73,7 @@ get_source_expressions <- function(filename, lines = NULL) {
# Only regard explicit attribute terminal_newline=FALSE as FALSE and all other cases (e.g. NULL or TRUE) as TRUE.
terminal_newline <- !isFALSE(attr(source_expression$lines, "terminal_newline", exact = TRUE))

e <- NULL
e <- w <- NULL
source_expression$lines <- extract_r_source(
filename = source_expression$filename,
lines = source_expression$lines,
Expand All @@ -82,46 +83,44 @@ get_source_expressions <- function(filename, lines = NULL) {
source_expression$content <- get_content(source_expression$lines)
parsed_content <- get_source_expression(source_expression, error = function(e) lint_parse_error(e, source_expression))

# Currently no way to distinguish the source of the warning
# from the message itself, so we just grep the source for the
# exact string generating the warning; de-dupe in case of
# multiple exact matches like '1e-3L; 1e-3L'.
# See https://bugs.r-project.org/show_bug.cgi?id=18863.
w <- lint_parse_warnings(w, parsed_content, source_expression)

if (is_lint(e) && (is.na(e$line) || !nzchar(e$line) || e$message == "unexpected end of input")) {
# Don't create expression list if it's unreliable (invalid encoding or unhandled parse error)
expressions <- list()
} else {
top_level_map <- generate_top_level_map(parsed_content)
xml_parsed_content <- safe_parse_to_xml(parsed_content)

expressions <- lapply(
X = top_level_expressions(parsed_content),
FUN = get_single_source_expression,
parsed_content,
source_expression,
filename,
top_level_map
)
return(list(expressions = list(), error = e, warning = w, lines = source_expression$lines))
}

if (!is.null(xml_parsed_content) && !is.na(xml_parsed_content)) {
expression_xmls <- lapply(
xml_find_all(xml_parsed_content, "/exprlist/*"),
function(top_level_expr) xml2::xml_add_parent(xml2::xml_new_root(top_level_expr), "exprlist")
)
for (i in seq_along(expressions)) {
expressions[[i]]$xml_parsed_content <- expression_xmls[[i]]
expressions[[i]]$xml_find_function_calls <- build_xml_find_function_calls(expression_xmls[[i]])
}
}
top_level_map <- generate_top_level_map(parsed_content)
xml_parsed_content <- safe_parse_to_xml(parsed_content)

# add global expression
expressions[[length(expressions) + 1L]] <- list(
filename = filename,
file_lines = source_expression$lines,
content = source_expression$lines,
full_parsed_content = parsed_content,
full_xml_parsed_content = xml_parsed_content,
xml_find_function_calls = build_xml_find_function_calls(xml_parsed_content),
terminal_newline = terminal_newline
)
}
expressions <- lapply(
X = top_level_expressions(parsed_content),
FUN = get_single_source_expression,
parsed_content,
source_expression,
filename,
top_level_map
)

expressions <- maybe_append_expression_xml(expressions, xml_parsed_content)

# add global expression
expressions[[length(expressions) + 1L]] <- list(
filename = filename,
file_lines = source_expression$lines,
content = source_expression$lines,
full_parsed_content = parsed_content,
full_xml_parsed_content = xml_parsed_content,
xml_find_function_calls = build_xml_find_function_calls(xml_parsed_content),
terminal_newline = terminal_newline
)

list(expressions = expressions, error = e, lines = source_expression$lines)
list(expressions = expressions, error = e, warning = w, lines = source_expression$lines)
}

lint_parse_error <- function(e, source_expression) {
Expand Down Expand Up @@ -153,6 +152,56 @@ lint_parse_error <- function(e, source_expression) {
lint_parse_error_nonstandard(e, source_expression)
}

#' Currently no way to distinguish the source of the warning
#' from the message itself, so we just grep the source for the
#' exact string generating the warning; de-dupe in case of
#' multiple exact matches like '1e-3L; 1e-3L'
#' @noRd
lint_parse_warnings <- function(w, parsed_content, source_expression) {
if (!length(w)) {
return(w)
}
flatten_lints(lapply(unique(w), lint_parse_warning, parsed_content, source_expression))
}

#' The set of parser warnings seems pretty stable, but as
#' long as they don't generate sourceref hints, we're
#' stuck with this somewhat-manual approach.
#' @noRd
lint_parse_warning <- function(w, parsed_content, source_expression) {
for (lint_re in parser_warning_regexes) {
bad_txt <- re_matches(w, lint_re)$txt[1L]
# at most one regex matches the warning
if (is.na(bad_txt)) {
next
} else {
break
}
}
# use the parse tree to avoid baroque matches to comments, strings
hits <- parsed_content[with(parsed_content, token == "NUM_CONST" & text == bad_txt), ]
lapply(seq_len(nrow(hits)), function(ii) {
Lint(
filename = source_expression$filename,
line_number = hits$line1[ii],
column_number = hits$col1[ii],
type = "warning",
message = w,
line = source_expression$lines[[as.character(hits$line1[ii])]],
ranges = list(c(hits$col1[ii], hits$col2[ii]))
)
})
}

parser_warning_regexes <- list(
int_with_decimal =
rex("integer literal ", capture(anything, name = "txt"), " contains decimal; using numeric value"),
nonint_with_l =
rex("non-integer value ", capture(anything, name = "txt"), " qualified with L; using numeric value"),
unneeded_decimal =
rex("integer literal ", capture(anything, name = "txt"), " contains unnecessary decimal point")
)

#' Ensure a string is valid for printing
#'
#' Helper to ensure a valid string is provided as line where necessary.
Expand Down Expand Up @@ -491,13 +540,21 @@ get_single_source_expression <- function(loc,
get_source_expression <- function(source_expression, error = identity) {
parse_error <- FALSE

parsed_content <- tryCatch(
parse(
text = source_expression$content,
srcfile = source_expression,
keep.source = TRUE
env <- parent.frame() # nolint: object_usage_linter. Used below.
# https://adv-r.hadley.nz/conditions.html
parsed_content <- withCallingHandlers(
tryCatch(
parse(
text = source_expression$content,
srcfile = source_expression,
keep.source = TRUE
),
error = error
),
error = error
warning = function(w) {
env$w <- c(env$w, conditionMessage(w))
invokeRestart("muffleWarning")
}
)

if (is_error(parsed_content) || is_lint(parsed_content)) {
Expand All @@ -521,6 +578,21 @@ get_source_expression <- function(source_expression, error = identity) {
fix_octal_escapes(fix_eq_assigns(fix_tab_indentations(source_expression)), source_expression$lines)
}

maybe_append_expression_xml <- function(expressions, xml_parsed_content) {
if (is.null(xml_parsed_content) || is.na(xml_parsed_content)) {
return(expressions)
}
expression_xmls <- lapply(
xml_find_all(xml_parsed_content, "/exprlist/*"),
function(top_level_expr) xml2::xml_add_parent(xml2::xml_new_root(top_level_expr), "exprlist")
)
for (i in seq_along(expressions)) {
expressions[[i]]$xml_parsed_content <- expression_xmls[[i]]
expressions[[i]]$xml_find_function_calls <- build_xml_find_function_calls(expression_xmls[[i]])
}
expressions
}

get_newline_locs <- function(x) {
newline_search <- re_matches(x, rex("\n"), locations = TRUE, global = TRUE)[[1L]]$start
c(
Expand Down
13 changes: 11 additions & 2 deletions R/lint.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
}
}

lints <- maybe_append_error_lint(lints, source_expressions$error, lint_cache, filename)
lints <- maybe_append_condition_lints(lints, source_expressions, lint_cache, filename)
lints <- reorder_lints(flatten_lints(lints))
class(lints) <- c("lints", "list")

Expand Down Expand Up @@ -670,7 +670,8 @@
paste(collapse = "", rep.int(character, length))
}

maybe_append_error_lint <- function(lints, error, lint_cache, filename) {
maybe_append_condition_lints <- function(lints, source_expression, lint_cache, filename) {
error <- source_expression$error
if (is_lint(error)) {
error$linter <- "error"
lints[[length(lints) + 1L]] <- error
Expand All @@ -679,6 +680,14 @@
cache_lint(lint_cache, list(filename = filename, content = ""), "error", error)
}
}
for (l in source_expression$warning) {
l$linter <- "parser_warning_linter"
lints[[length(lints) + 1L]] <- l

if (!is.null(lint_cache)) {
cache_lint(lint_cache, list(filename = filename, content = ""), "parser_warning_linter", l)

Check warning on line 688 in R/lint.R

View check run for this annotation

Codecov / codecov/patch

R/lint.R#L688

Added line #L688 was not covered by tests
}
}
lints
}

Expand Down
2 changes: 2 additions & 0 deletions R/settings.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ read_config_file <- function(config_file, call = parent.frame()) {
)
}
)
# https://adv-r.hadley.nz/conditions.html
setting_value <- withCallingHandlers(
tryCatch(
eval(parsed_setting),
Expand Down Expand Up @@ -151,6 +152,7 @@ read_config_file <- function(config_file, call = parent.frame()) {
)
}
}
# https://adv-r.hadley.nz/conditions.html
withCallingHandlers(
tryCatch(
load_config(config_file),
Expand Down
1 change: 1 addition & 0 deletions man/get_source_expressions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

90 changes: 89 additions & 1 deletion tests/testthat/test-get_source_expressions.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ with_content_to_parse <- function(content, code) {
content_env <- new.env()
content_env$pc <- lapply(source_expressions[["expressions"]], `[[`, "parsed_content")
content_env$error <- source_expressions$error
content_env$warning <- source_expressions$warning
eval(substitute(code), envir = content_env)
}

Expand Down Expand Up @@ -212,7 +213,7 @@ test_that("returned data structure is complete", {
attr(lines_with_attr, "terminal_newline") <- TRUE

exprs <- get_source_expressions(temp_file)
expect_named(exprs, c("expressions", "error", "lines"))
expect_named(exprs, c("expressions", "error", "warning", "lines"))
expect_length(exprs$expressions, length(lines) + 1L)

for (i in seq_along(lines)) {
Expand Down Expand Up @@ -452,3 +453,90 @@ test_that("Disallowed embedded null gives parser failure lint", {
linters = list()
)
})

test_that("parser warnings are captured in output", {
with_content_to_parse("1e-3L", {
expect_length(warning, 1L)
expect_s3_class(warning, "lints")
})
with_content_to_parse("1e-3L; 1e-3L", {
expect_length(warning, 2L)
})
with_content_to_parse("1e-3L; 1.0L; 1.1L", {
expect_length(warning, 3L)
})
with_content_to_parse("1e-3L\n1.0L\n1.1L", {
expect_length(warning, 3L)
})
with_content_to_parse("1e-3L\n1+1\n1.0L\n2+2\n1.1L", {
expect_length(warning, 3L)
})
with_content_to_parse("1e-3L\nc(", {
expect_length(warning, 1L)
expect_length(error, 8L)
})
})

test_that("parser warnings generate lints", {
expect_lint(
"1e-3L",
"non-integer value 1e-3L",
linters = list()
)
expect_lint(
"1e-3L; 1e-3L",
list(
list("non-integer value 1e-3L", column_number = 1L),
list("non-integer value 1e-3L", column_number = 8L)
),
linters = list()
)
expect_lint(
"1e-3L; 1.0L",
list(
list("non-integer value 1e-3L", column_number = 1L),
list("integer literal 1\\.0L", column_number = 8L)
),
linters = list()
)
expect_lint(
trim_some("
1e-3L
1 + 1
1.0L
2 + 2
1.1L
3 + 3
2.2L
4 + 4
2.0L
5 + 5
2e-3L
# don't match strictly on regex, use parse tree
# 3.0L
# 3e-3L
# 3.3L
'4.0L'
'4e-3L'
'4.4L'
"),
list(
list("non-integer value 1e-3L", line_number = 1L),
list("integer literal 1\\.0L", line_number = 3L),
list("integer literal 1.1L contains decimal", line_number = 5L),
list("integer literal 2\\.2L contains decimal", line_number = 7L),
list("integer literal 2\\.0L", line_number = 9L),
list("non-integer value 2e-3L", line_number = 11L)
),
linters = list()
)
# parser catches warning before erroring
expect_lint(
"1e-3L; c(",
list(
list("non-integer value 1e-3L", type = "warning"),
list("unexpected end of input", type = "error")
),
linters = list()
)
})
Loading