Skip to content

Commit

Permalink
[Fix #560] New argument cutoff_2000 for parsing functions
Browse files Browse the repository at this point in the history
  • Loading branch information
vspinu committed Oct 3, 2017
1 parent 87044f9 commit aefc121
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 24 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ Version 1.6.0.9000
==================

### NEW FEATURES

* [#560](https://github.com/tidyverse/lubridate/issues/560) New argument `cutoff_2000` for parsing functions to indicate 20th century cutoff for `y` format.
* [#257](https://github.com/tidyverse/lubridate/issues/257) New `week_start` parameter in `wday` and `wday<-` to set week start.
* [#401](https://github.com/tidyverse/lubridate/issues/401) New parameter `locale` in `wday`. Labels of the returned factors (when `label=TRUE`) now respect current locale.
* [#485](https://github.com/tidyverse/lubridate/pull/485) `quarter` gained a new argument `fiscal_start` to address the issue of different fiscal conventions.
Expand Down
28 changes: 17 additions & 11 deletions R/parse.r
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,10 @@ parse_date_time <- function(x, orders, tz = "UTC", truncated = 0, quiet = FALSE,
out
}

parse_dt <- function(x, orders, is_format = FALSE, return_lt = FALSE, cutoff_2000 = 68L) {
.Call(C_parse_dt, x, orders, as.logical(is_format), as.logical(return_lt), as.integer(cutoff_2000))
}

##' @description
##' `parse_date_time2()` is a fast C parser of numeric
##' orders.
Expand All @@ -634,18 +638,21 @@ parse_date_time <- function(x, orders, tz = "UTC", truncated = 0, quiet = FALSE,
##' @param lt logical. If TRUE returned object is of class POSIXlt, and POSIXct
##' otherwise. For compatibility with base `strptime` function default is TRUE
##' for `fast_strptime` and FALSE for `parse_date_time2`.
parse_date_time2 <- function(x, orders, tz = "UTC", exact = FALSE, lt = FALSE){
##' @param cutoff_2000 integer. For `y` format, two-digit numbers smaller or equal to
##' `cutoff_2000` are parsed as 20th's century, 19th's otherwise. Available only
##' for functions relying on `lubridate`s internal parser.
parse_date_time2 <- function(x, orders, tz = "UTC", exact = FALSE, lt = FALSE, cutoff_2000 = 68L){
if(length(orders) > 1)
warning("Multiple orders supplied. Only first order is used.")
if(!exact)
orders <- gsub("[^[:alpha:]]+", "", as.character(orders[[1]])) ## remove all separators
if(lt){
.mklt(.Call(C_parse_dt, x, orders, FALSE, TRUE), tz)
.mklt(parse_dt(x, orders, FALSE, TRUE, cutoff_2000), tz)
} else {
if (tz == "UTC"){
.POSIXct(.Call(C_parse_dt, x, orders, FALSE, FALSE), tz = "UTC")
.POSIXct(parse_dt(x, orders, FALSE, FALSE, cutoff_2000), tz = "UTC")
} else {
as.POSIXct(.mklt(.Call(C_parse_dt, x, orders, FALSE, TRUE), tz))
as.POSIXct(.mklt(parse_dt(x, orders, FALSE, TRUE, cutoff_2000), tz))
}
}
}
Expand All @@ -659,23 +666,22 @@ parse_date_time2 <- function(x, orders, tz = "UTC", exact = FALSE, lt = FALSE){
##' @param format a character string of formats. It should include all the
##' separators and each format must be prefixed with %, just as in the format
##' argument of `strptime()`.
fast_strptime <- function(x, format, tz = "UTC", lt = TRUE){
fast_strptime <- function(x, format, tz = "UTC", lt = TRUE, cutoff_2000 = 68L) {
if(length(format) > 1)
warning("Multiple formats supplied. Only first format is used.")
format <- as.character(format[[1]])
if(lt){
.mklt(.Call(C_parse_dt, x, format, TRUE, TRUE), tz)
if(lt) {
.mklt(parse_dt(x, format, TRUE, TRUE, cutoff_2000), tz)
} else{
if(tz == "UTC"){
.POSIXct(.Call(C_parse_dt, x, format, TRUE, FALSE), "UTC")
if (tz == "UTC") {
.POSIXct(parse_dt(x, format, TRUE, FALSE, cutoff_2000), "UTC")
} else {
as.POSIXct(.mklt(.Call(C_parse_dt, x, format, TRUE, TRUE), tz))
as.POSIXct(.mklt(parse_dt(x, format, TRUE, TRUE, cutoff_2000), tz))
}
}
}




### INTERNAL
.mklt <- function(dtlist, tz){
Expand Down
9 changes: 7 additions & 2 deletions man/parse_date_time.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ END_RCPP
}

RcppExport SEXP C_make_d(SEXP, SEXP, SEXP);
RcppExport SEXP C_parse_dt(SEXP, SEXP, SEXP, SEXP);
RcppExport SEXP C_parse_dt(SEXP, SEXP, SEXP, SEXP, SEXP);
RcppExport SEXP C_parse_hms(SEXP, SEXP);
RcppExport SEXP C_parse_period(SEXP);

static const R_CallMethodDef CallEntries[] = {
{"_lubridate_C_update_dt", (DL_FUNC) &_lubridate_C_update_dt, 12},
{"_lubridate_C_force_tz", (DL_FUNC) &_lubridate_C_force_tz, 3},
{"C_make_d", (DL_FUNC) &C_make_d, 3},
{"C_parse_dt", (DL_FUNC) &C_parse_dt, 4},
{"C_parse_dt", (DL_FUNC) &C_parse_dt, 5},
{"C_parse_hms", (DL_FUNC) &C_parse_hms, 2},
{"C_parse_period", (DL_FUNC) &C_parse_period, 1},
{NULL, NULL, 0}
Expand Down
21 changes: 12 additions & 9 deletions src/tparse.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,22 @@ int parse_alpha_month(const char **c){
return (parse_alphanum(c, en_months, 12, TRUE) + 1);
}

SEXP C_parse_dt(SEXP str, SEXP ord, SEXP formats, SEXP lt) {
// STR: character vector of date-times.
// ORD: formats (as in strptime) or orders (as in parse_date_time)
// FORMATS: TRUE if ord is a string of formats (as in strptime)
// LT: TRUE - return POSIXlt type list, FALSE - return POSIXct seconds

if ( !isString(str) ) error("Date-time must be a character vector");
SEXP C_parse_dt(SEXP str, SEXP ord, SEXP formats, SEXP lt, SEXP cutoff_2000) {
// str: character vector of date-times.
// ord: formats (as in strptime) or orders (as in parse_date_time)
// formats: TRUE if ord is a string of formats (as in strptime)
// lt: TRUE - return POSIXlt type list, FALSE - return POSIXct seconds
// cutoff_2000: for `y` format years smaller or equal are read as 20th
// sentry's, otherwise 19ths. R's default is 68.

if ( !isString(str) ) error("Argument to parsing functions must be a character vector.");
if ( !isString(ord) || (LENGTH(ord) > 1))
error("Format argument must be a character vector of length 1");
error("Format/orders argument must be a character vector of length 1");

R_len_t n = LENGTH(str);
int is_fmt = *LOGICAL(formats);
int out_lt = *LOGICAL(lt);
int cut2000 = *INTEGER(cutoff_2000);

SEXP oYEAR, oMONTH, oDAY, oHOUR, oMIN, oSEC;

Expand Down Expand Up @@ -122,7 +125,7 @@ SEXP C_parse_dt(SEXP str, SEXP ord, SEXP formats, SEXP lt) {
y = parse_int(&c, 2, FALSE);
if (y < 0)
succeed = 0;
else if (y <= 68)
else if (y <= cut2000)
y += 2000;
else
y += 1900;
Expand Down
21 changes: 21 additions & 0 deletions tests/testthat/test-parsers.R
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,27 @@ test_that("ymd functions give warning when parsing absurd formats", {
expect_warning(ymd(c(201001024, 20100103)))
})

test_that("cutoff_2000 works as expected", {

dates <- c("20-02-03", "67-02-03", "68-02-03", "69-02-03", "99-02-03", "00-02-03")

expect_equal(parse_date_time2(dates, "ymd"),
ymd(c("2020-02-03", "2067-02-03", "2068-02-03", "1969-02-03", "1999-02-03", "2000-02-03"), tz = "UTC"))

expect_equal(parse_date_time2(dates, "ymd", cutoff_2000 = 67),
ymd(c("2020-02-03", "2067-02-03", "1968-02-03", "1969-02-03", "1999-02-03", "2000-02-03"), tz = "UTC"))

expect_equal(parse_date_time2(dates, "ymd", cutoff_2000 = 20),
ymd(c("2020-02-03", "1967-02-03", "1968-02-03", "1969-02-03", "1999-02-03", "2000-02-03"), tz = "UTC"))

expect_equal(parse_date_time2(dates, "ymd", cutoff_2000 = 0),
ymd(c("1920-02-03", "1967-02-03", "1968-02-03", "1969-02-03", "1999-02-03", "2000-02-03"), tz = "UTC"))

expect_equal(parse_date_time2(dates, "ymd", cutoff_2000 = -1),
ymd(c("1920-02-03", "1967-02-03", "1968-02-03", "1969-02-03", "1999-02-03", "1900-02-03"), tz = "UTC"))

})

test_that("0 month and 0 day in date produces NA",
{
expect_equal(ymd(c("2013-1-1", "2013-0-1"), quiet = TRUE),
Expand Down

0 comments on commit aefc121

Please sign in to comment.