diff --git a/NAMESPACE b/NAMESPACE index c2c095a1d8..2a25a9d577 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,8 @@ export(chmatch, "%chin%", chorder, chgroup) export(rbindlist) export(fifelse) export(fcase) +export(psum) +export(pprod) export(fread) export(fwrite) export(foverlaps) diff --git a/NEWS.md b/NEWS.md index 71fd76aa65..5032634b18 100644 --- a/NEWS.md +++ b/NEWS.md @@ -81,6 +81,8 @@ unit = "s") 14. Added support for `round()` and `trunc()` to extend functionality of `ITime`. `round()` and `trunc()` can be used with argument units: "hours" or "minutes". Thanks to @JensPederM for the suggestion and PR. +15. `psum(..., na.rm=FALSE)` and `pprod(..., na.rm=FALSE)` implemented in C by Morgan Jacob, [#3467](https://github.com/Rdatatable/data.table/issues/3467), are inspired by `base::pmin` and `base::pmax`. These new functions work only for integer and double type and do not recycle vectors. Please see `?psum` for more details. + ## BUG FIXES 1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085). diff --git a/R/wrappers.R b/R/wrappers.R index 5fec33a92f..f442cfd851 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -7,6 +7,8 @@ setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE) fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L]) +psum = function(..., na.rm=FALSE) .Call(CpsumR, na.rm, list(...)) +pprod = function(..., na.rm=FALSE) .Call(CpprodR, na.rm, list(...)) colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups) coerceFill = function(x) .Call(CcoerceFillR, x) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 7cc6819e8f..0ef2423227 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16846,3 +16846,40 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN)) test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) A = data.table(A=as.complex(rep(NA, 5))) test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) + +# psum / pprod, #3467 +x = c(1, 3, NA, 5) +y = c(2, NA, 4, 1) +z = c(3, 4, 4, 1) +# psum +test(2139.001, psum(x, y, z, na.rm = FALSE), c(6, NA, NA, 7)) +test(2139.002, psum(x, y, z, na.rm = TRUE), c(6, 7, 8, 7)) +test(2139.003, psum(as.integer(x), as.integer(y), as.integer(z), na.rm = FALSE), c(6L, NA_integer_, NA_integer_, 7L)) +test(2139.004, psum(as.integer(x), as.integer(y), as.integer(z), na.rm = TRUE), c(6L, 7L, 8L, 7L)) +test(2139.005, psum(as.raw(z), y, na.rm = TRUE), error = "Argument 1 is of type raw. Only integer and double types are supported.") +test(2139.006, psum(x, y, 1:2, na.rm = FALSE), error = "Argument 3 is of length 2 but argument 1 is of length 4. If you wish to 'recycle' your argument, please use rep() to make this intent clear to the readers of your code.") +test(2139.007, psum(1:10, 1:5, na.rm = FALSE), error = "Argument 2 is of length 5 but argument 1 is of length 10. If you wish to 'recycle' your argument, please use rep() to make this intent clear to the readers of your code.") +test(2139.008, psum(x, as.raw(z), y, na.rm = TRUE), error = "Argument 2 is of type raw. Only integer and double types are supported.") +test(2139.009, psum(1:10, 1:10, 21:30), 1:10 + 1:10 + 21:30) +test(2139.010, psum(x, y, z, na.rm = NA), error = "Argument 'na.rm' must be TRUE or FALSE and length 1.") +test(2139.011, psum(x, na.rm = FALSE), x) +test(2139.012, psum(as.integer(x), y, z, na.rm = TRUE), c(6, 7, 8, 7)) +test(2139.013, psum(c(1,3,NA,5,NA), c(2,NA,4,1,NA), na.rm = TRUE), c(3, 3, 4, 6, NA)) +test(2139.014, psum(x, y, as.integer(z), na.rm = FALSE), c(6, NA, NA, 7)) +test(2139.015, psum(na.rm = FALSE), error = "Please supply at least 1 argument. (0 argument supplied)") +# pprod +test(2139.016, pprod(x, y, z, na.rm = FALSE), c(6, NA, NA, 5)) +test(2139.017, pprod(x, y, z, na.rm = TRUE), c(6, 12, 16, 5)) +test(2139.018, pprod(as.integer(x), as.integer(y), as.integer(z), na.rm = FALSE), c(6L, NA_integer_, NA_integer_, 5L)) +test(2139.019, pprod(as.integer(x), as.integer(y), as.integer(z), na.rm = TRUE), c(6L, 12L, 16L, 5L)) +test(2139.020, pprod(as.raw(z), y, na.rm = TRUE), error = "Argument 1 is of type raw. Only integer and double types are supported.") +test(2139.021, pprod(x, y, 1:2, na.rm = FALSE), error = "Argument 3 is of length 2 but argument 1 is of length 4. If you wish to 'recycle' your argument, please use rep() to make this intent clear to the readers of your code.") +test(2139.022, pprod(1:10, 1:5, na.rm = FALSE), error = "Argument 2 is of length 5 but argument 1 is of length 10. If you wish to 'recycle' your argument, please use rep() to make this intent clear to the readers of your code.") +test(2139.023, pprod(x, as.raw(z), y, na.rm = TRUE), error = "Argument 2 is of type raw. Only integer and double types are supported.") +test(2139.024, pprod(1:10, 1:10, 21:30), 1:10 * 1:10 * 21:30) +test(2139.025, pprod(x, y, z, na.rm = NA), error = "Argument 'na.rm' must be TRUE or FALSE and length 1.") +test(2139.026, pprod(x, na.rm = FALSE), x) +test(2139.027, pprod(as.integer(x), y, z, na.rm = TRUE), c(6, 12, 16, 5)) +test(2139.028, pprod(c(1,3,NA,5,NA), c(2,NA,4,1,NA), na.rm = TRUE), c(2, 3, 4, 5, NA)) +test(2139.029, pprod(x, y, as.integer(z), na.rm = FALSE), c(6, NA, NA, 5)) +test(2139.030, pprod(na.rm = FALSE), error = "Please supply at least 1 argument. (0 argument supplied)") \ No newline at end of file diff --git a/man/psum.Rd b/man/psum.Rd new file mode 100644 index 0000000000..fda6688784 --- /dev/null +++ b/man/psum.Rd @@ -0,0 +1,33 @@ +\name{psum} +\alias{psum} +\alias{pprod} +\title{ Sum and Product} +\description{ +Similar to \code{pmin} and \code{pmax} but for sum and product. Only works for integer and double. These functions do not recycle vectors. +} +\usage{ + psum(..., na.rm=FALSE) + pprod(..., na.rm=FALSE) +} +\arguments{ + \item{...}{ Numeric arguments of type integer or double.} + \item{na.rm}{ A logical indicating whether missing values should be removed. Default value is \code{FALSE}.} +} +\value{ +Return the sum or product of all numeric argument. The value returned will be of the type of the highest argument types (integer < double) +} +\examples{ +x = c(1, 3, NA, 5) +y = c(2, NA, 4, 1) +z = c(3, 4, 4, 1) + +# Example 1: psum +psum(x, y, z, na.rm = FALSE) +psum(x, y, z, na.rm = TRUE) + +# Example 2: pprod +pprod(x, y, z, na.rm = FALSE) +pprod(x, y, z, na.rm = TRUE) + +} +\keyword{ data } diff --git a/src/data.table.h b/src/data.table.h index 90ff7fb6fc..94a5b36f7d 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -245,3 +245,5 @@ SEXP testMsgR(SEXP status, SEXP x, SEXP k); //fifelse.c SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na); SEXP fcaseR(SEXP na, SEXP rho, SEXP args); +SEXP psumR(SEXP na, SEXP args); +SEXP pprodR(SEXP na, SEXP args); diff --git a/src/fifelse.c b/src/fifelse.c index 3a05fce6d3..b4dd18b90f 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -344,3 +344,175 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { UNPROTECT(nprotect); return ans; } + +SEXP psumR(SEXP na, SEXP args) { + if (!IS_TRUE_OR_FALSE(na)) { + error(_("Argument 'na.rm' must be TRUE or FALSE and length 1.")); + } + const int n=length(args); + if (n < 1) { + error(_("Please supply at least 1 argument. (%d argument supplied)"), n); + } + if (n == 1) { + return SEXPPTR_RO(args)[0]; + } + SEXPTYPE anstype = TYPEOF(SEXPPTR_RO(args)[0]); + const int64_t len0 = xlength(SEXPPTR_RO(args)[0]); + if (anstype != INTSXP && anstype != REALSXP) { + error(_("Argument %d is of type %s. Only integer and double types are supported."), 1, type2char(anstype)); + } + for (int i = 1; i < n; ++i) { + SEXPTYPE type = TYPEOF(SEXPPTR_RO(args)[i]); + int64_t len1 = xlength(SEXPPTR_RO(args)[i]); + if (type != INTSXP && type != REALSXP) { + error(_("Argument %d is of type %s. Only integer and double types are supported."), + i+1, type2char(type)); + } + if (len1 != len0) { + error(_("Argument %d is of length %"PRId64" but argument %d is of length %"PRId64". " + "If you wish to 'recycle' your argument, please use rep() to make this intent " + "clear to the readers of your code."), i+1, len1, 1, len0); + } + if (typeorder[type] > typeorder[anstype]) { + anstype = type; + } + } + int nprotect = 0; + SEXP ans = PROTECT(duplicate(SEXPPTR_RO(args)[0])); nprotect++; + if (anstype != TYPEOF(ans)) { + ans = coerceVector(ans, anstype); + } + const bool narm = LOGICAL(na)[0]; + switch(anstype) { + case INTSXP: { + int *restrict pans =INTEGER(ans); + SEXP int_a = R_NilValue; + PROTECT_INDEX Iaint; + PROTECT_WITH_INDEX(int_a, &Iaint); nprotect++; + for (int i = 1; i < n; ++i) { + REPROTECT(int_a = duplicate(SEXPPTR_RO(args)[i]), Iaint); + int *pa = INTEGER(int_a); + if (narm) { + for (int64_t j = 0; j < len0; ++j) { + pans[j] = pans[j] == NA_INTEGER ? pa[j] : (pa[j]==NA_INTEGER ? pans[j] : (pans[j] + pa[j])); + } + } else { + for (int64_t j = 0; j < len0; ++j) { + pans[j] = (pans[j] == NA_INTEGER || pa[j] == NA_INTEGER) ? NA_INTEGER : (pans[j] + pa[j]); + } + } + } + } break; + case REALSXP: { + double *restrict pans = REAL(ans); + SEXP dbl_a = R_NilValue; + PROTECT_INDEX Iadbl; + PROTECT_WITH_INDEX(dbl_a, &Iadbl); nprotect++; + for (int i = 1; i < n; ++i) { + if (TYPEOF(SEXPPTR_RO(args)[i]) != anstype) { + REPROTECT(dbl_a = coerceVector(duplicate(SEXPPTR_RO(args)[i]), anstype), Iadbl); + } else { + REPROTECT(dbl_a = duplicate(SEXPPTR_RO(args)[i]), Iadbl); + } + double *pa = REAL(dbl_a); + if (narm) { + for (int64_t j = 0; j < len0; ++j) { + pans[j] = ISNAN(pans[j]) ? pa[j] : (ISNAN(pa[j]) ? pans[j] : (pans[j] + pa[j])); + } + } else { + for (int64_t j = 0; j < len0; ++j) { + pans[j] = (ISNAN(pans[j]) || ISNAN(pa[j])) ? NA_REAL : (pans[j] + pa[j]); + } + } + } + } break; + } + UNPROTECT(nprotect); + return ans; +} + +SEXP pprodR(SEXP na, SEXP args) { + if (!IS_TRUE_OR_FALSE(na)) { + error(_("Argument 'na.rm' must be TRUE or FALSE and length 1.")); + } + const int n=length(args); + if (n < 1) { + error(_("Please supply at least 1 argument. (%d argument supplied)"), n); + } + if (n == 1) { + return SEXPPTR_RO(args)[0]; + } + SEXPTYPE anstype = TYPEOF(SEXPPTR_RO(args)[0]); + const int64_t len0 = xlength(SEXPPTR_RO(args)[0]); + if (anstype != INTSXP && anstype != REALSXP) { + error(_("Argument %d is of type %s. Only integer and double types are supported."), 1, type2char(anstype)); + } + for (int i = 1; i < n; ++i) { + SEXPTYPE type = TYPEOF(SEXPPTR_RO(args)[i]); + int64_t len1 = xlength(SEXPPTR_RO(args)[i]); + if (type != INTSXP && type != REALSXP) { + error(_("Argument %d is of type %s. Only integer and double types are supported."), + i+1, type2char(type)); + } + if (len1 != len0) { + error(_("Argument %d is of length %"PRId64" but argument %d is of length %"PRId64". " + "If you wish to 'recycle' your argument, please use rep() to make this intent " + "clear to the readers of your code."), i+1, len1, 1, len0); + } + if (typeorder[type] > typeorder[anstype]) { + anstype = type; + } + } + int nprotect = 0; + SEXP ans = PROTECT(duplicate(SEXPPTR_RO(args)[0])); nprotect++; + if (anstype != TYPEOF(ans)) { + ans = coerceVector(ans, anstype); + } + const bool narm = LOGICAL(na)[0]; + switch(anstype) { + case INTSXP: { + int *restrict pans =INTEGER(ans); + SEXP int_a = R_NilValue; + PROTECT_INDEX Iaint; + PROTECT_WITH_INDEX(int_a, &Iaint); nprotect++; + for (int i = 1; i < n; ++i) { + REPROTECT(int_a = duplicate(SEXPPTR_RO(args)[i]), Iaint); + int *pa = INTEGER(int_a); + if (narm) { + for (int64_t j = 0; j < len0; ++j) { + pans[j] = pans[j] == NA_INTEGER ? pa[j] : (pa[j]==NA_INTEGER ? pans[j] : (pans[j] * pa[j])); + } + } else { + for (int64_t j = 0; j < len0; ++j) { + pans[j] = (pans[j] == NA_INTEGER || pa[j] == NA_INTEGER) ? NA_INTEGER : (pans[j] * pa[j]); + } + } + } + } break; + case REALSXP: { + double *restrict pans = REAL(ans); + SEXP dbl_a = R_NilValue; + PROTECT_INDEX Iadbl; + PROTECT_WITH_INDEX(dbl_a, &Iadbl); nprotect++; + for (int i = 1; i < n; ++i) { + if (TYPEOF(SEXPPTR_RO(args)[i]) != anstype) { + REPROTECT(dbl_a = coerceVector(duplicate(SEXPPTR_RO(args)[i]), anstype), Iadbl); + } else { + REPROTECT(dbl_a = duplicate(SEXPPTR_RO(args)[i]), Iadbl); + } + double *pa = REAL(dbl_a); + if (narm) { + for (int64_t j = 0; j < len0; ++j) { + pans[j] = ISNAN(pans[j]) ? pa[j] : (ISNAN(pa[j]) ? pans[j] : (pans[j] * pa[j])); + } + } else { + for (int64_t j = 0; j < len0; ++j) { + pans[j] = (ISNAN(pans[j]) || ISNAN(pa[j])) ? NA_REAL : (pans[j] * pa[j]); + } + } + } + } break; + } + UNPROTECT(nprotect); + return ans; +} diff --git a/src/init.c b/src/init.c index aed2da3dbd..559b7620b5 100644 --- a/src/init.c +++ b/src/init.c @@ -53,6 +53,8 @@ SEXP chmatchdup_R(); SEXP chin_R(); SEXP fifelseR(); SEXP fcaseR(); +SEXP psumR(); +SEXP pprodR(); SEXP freadR(); SEXP fwriteR(); SEXP reorder(); @@ -205,6 +207,8 @@ R_CallMethodDef callMethods[] = { {"Ccoalesce", (DL_FUNC) &coalesce, -1}, {"CfifelseR", (DL_FUNC) &fifelseR, -1}, {"CfcaseR", (DL_FUNC) &fcaseR, -1}, +{"CpsumR", (DL_FUNC) &psumR, -1}, +{"CpprodR", (DL_FUNC) &pprodR, -1}, {"C_lock", (DL_FUNC) &lock, -1}, // _ for these 3 to avoid Clock as in time {"C_unlock", (DL_FUNC) &unlock, -1}, {"C_islocked", (DL_FUNC) &islockedR, -1},