From 0fd92f89a011abce2754790e6f9bdad81c9f4089 Mon Sep 17 00:00:00 2001 From: Jim Hester Date: Thu, 24 Oct 2019 11:29:55 -0400 Subject: [PATCH] Display the full field in the problems result Some parsers, notably the numeric parser modify the SourceIterator pointers when parsing, so using the same object in the problem display only shows part of the original field. We now create a new object when adding warnings to ensure the full field is shown. Fixes https://github.com/tidyverse/readr/issues/444#issuecomment-545920644 --- NEWS.md | 3 +++ src/Collector.cpp | 15 ++++++++++----- tests/testthat/test-problems.R | 7 +++++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index c8f1cb28..bfc8adb6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # readr (development version) +* The full problem field is now displayed in the problems tibble, as intended + (#444). + * `guess_parser()` gains a `na` argument and removes NA values before guessing. `parse_guess()` now passes the `na` argument to `guess_parser()` (#1041). diff --git a/src/Collector.cpp b/src/Collector.cpp index 2ce71ebf..6783b9e7 100644 --- a/src/Collector.cpp +++ b/src/Collector.cpp @@ -176,13 +176,15 @@ void CollectorDouble::setValue(int i, const Token& t) { parseDouble(decimalMark_, str.first, str.second, REAL(column_)[i]); if (!ok) { REAL(column_)[i] = NA_REAL; - warn(t.row(), t.col(), "a double", str); + SourceIterators org_str = t.getString(&buffer); + warn(t.row(), t.col(), "a double", org_str); return; } if (str.first != str.second) { REAL(column_)[i] = NA_REAL; - warn(t.row(), t.col(), "no trailing characters", str); + SourceIterators org_str = t.getString(&buffer); + warn(t.row(), t.col(), "no trailing characters", org_str); return; } @@ -251,12 +253,14 @@ void CollectorInteger::setValue(int i, const Token& t) { bool ok = parseInt(str.first, str.second, INTEGER(column_)[i]); if (!ok) { INTEGER(column_)[i] = NA_INTEGER; - warn(t.row(), t.col(), "an integer", str); + SourceIterators org_str = t.getString(&buffer); + warn(t.row(), t.col(), "an integer", org_str); return; } if (str.first != str.second) { - warn(t.row(), t.col(), "no trailing characters", str); + SourceIterators org_str = t.getString(&buffer); + warn(t.row(), t.col(), "no trailing characters", org_str); INTEGER(column_)[i] = NA_INTEGER; return; } @@ -316,8 +320,9 @@ void CollectorNumeric::setValue(int i, const Token& t) { decimalMark_, groupingMark_, str.first, str.second, REAL(column_)[i]); if (!ok) { + SourceIterators org_str = t.getString(&buffer); REAL(column_)[i] = NA_REAL; - warn(t.row(), t.col(), "a number", str); + warn(t.row(), t.col(), "a number", org_str); return; } diff --git a/tests/testthat/test-problems.R b/tests/testthat/test-problems.R index db3111b2..c6ead430 100644 --- a/tests/testthat/test-problems.R +++ b/tests/testthat/test-problems.R @@ -38,3 +38,10 @@ test_that("problems returns the filename (#581)", { expect_equal(length(files), 28L) expect_equal("mtcars.csv'", basename(files)[[1L]]) }) + +test_that("problems returns full original field (#444)", { + probs <- problems(read_tsv("X\n-$12,500\n$2,000\n-$5,000\n$1,000\n-$3,000\n", col_types = list(.default = col_number()))) + + expect_equal(NROW(probs), 3) + expect_equal(probs$actual, c("-$12,500", "-$5,000", "-$3,000")) +})