Skip to content

Commit

Permalink
Add a trim_ws argument to read_log
Browse files Browse the repository at this point in the history
Fixes #738
  • Loading branch information
jimhester committed May 7, 2021
1 parent 948d2ce commit b2c52c7
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 10 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@

## Additional features and fixes

* `read_log()` gains a `trim_ws` argument (#738)

* `read_rds()` can now read .Rds files from URLs (#1186)

* `read_*()` functions gain a `show_col_types` argument, if set to `FALSE` this turns off showing the column types unconditionally.
Expand Down
3 changes: 2 additions & 1 deletion R/read_log.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
#' @examples
#' read_log(readr_example("example.log"))
read_log <- function(file, col_names = FALSE, col_types = NULL,
trim_ws = TRUE,
skip = 0, n_max = Inf, progress = show_progress()) {
tokenizer <- tokenizer_log()
tokenizer <- tokenizer_log(trim_ws = trim_ws)
read_delimited(file, tokenizer,
col_names = col_names, col_types = col_types,
skip = skip, n_max = n_max, progress = progress
Expand Down
4 changes: 2 additions & 2 deletions R/tokenizer.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ tokenizer_line <- function(na = character(), skip_empty_rows = TRUE) {

#' @export
#' @rdname Tokenizers
tokenizer_log <- function() {
structure(list(), class = "tokenizer_log")
tokenizer_log <- function(trim_ws) {
structure(list(trim_ws = trim_ws), class = "tokenizer_log")
}


Expand Down
2 changes: 1 addition & 1 deletion man/Tokenizers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/read_log.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion src/Tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ TokenizerPtr Tokenizer::create(cpp11::list spec) {
bool skipEmptyRows = cpp11::as_cpp<bool>(spec["skip_empty_rows"]);
return TokenizerPtr(new TokenizerLine(na, skipEmptyRows));
} else if (subclass == "tokenizer_log") {
return TokenizerPtr(new TokenizerLog());
bool trimWs = cpp11::as_cpp<bool>(spec["trim_ws"]);
return TokenizerPtr(new TokenizerLog(trimWs));
} else if (subclass == "tokenizer_ws") {
std::vector<std::string> na =
cpp11::as_cpp<std::vector<std::string>>(spec["na"]);
Expand Down
16 changes: 11 additions & 5 deletions src/TokenizerLog.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@ class TokenizerLog : public Tokenizer {
LogState state_;
int row_, col_;
bool moreTokens_;
bool trimWS_;

public:
TokenizerLog() {}
TokenizerLog(bool trimWS) : trimWS_(trimWS) {}

void tokenize(SourceIterator begin, SourceIterator end) {
cur_ = begin;
Expand Down Expand Up @@ -63,8 +64,7 @@ class TokenizerLog : public Tokenizer {
advanceForLF(&cur_, end_);
return Token(TOKEN_EMPTY, row, col);
} else if (*cur_ == ' ') {
newField();
return Token(TOKEN_EMPTY, row, col);
break;
} else if (*cur_ == '"') {
state_ = LOG_STRING;
} else if (*cur_ == '[') {
Expand Down Expand Up @@ -165,8 +165,14 @@ class TokenizerLog : public Tokenizer {
}

Token fieldToken(SourceIterator begin, SourceIterator end, int row, int col) {
return Token(begin, end, row, col, false)
.flagNA(std::vector<std::string>(1, "-"));
Token t(begin, end, row, col, false);
if (trimWS_) {
t.trim();
}

t.flagNA(std::vector<std::string>(1, "-"));

return t;
}
};

Expand Down
12 changes: 12 additions & 0 deletions tests/testthat/test-read_log.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
test_that("read_log trims whitespace", {
tf <- tempfile()
on.exit(unlink(tf))

writeLines('Nov 4 00:00:55 vrpweb1 httpd: 131.161.8.219 - - [04/Nov/2017:00:00:55 -0400] "GET /wp-includes/js/jquery/jquery-migrate.min.js?ver=1.4.1 HTTP/1.1" 200 10056 "http://www.colby.edu/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36"
Nov 14 00:00:55 vrpweb1 httpd: 131.161.8.216 - - [04/Nov/2017:00:00:55 -0400] "GET /wp-content/plugins/wooslider-AxZp6o/assets/js/jquery.flexslider.min.js?ver=2.4.1-20170608 HTTP/1.1" 200 22414 "http://www.colby.edu/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36"',
tf)

res <- read_log(tf)

expect_equal(res[[2]], c(4, 14))
})

0 comments on commit b2c52c7

Please sign in to comment.