Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New function psum/pprod #4188

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ export(chmatch, "%chin%", chorder, chgroup)
export(rbindlist)
export(fifelse)
export(fcase)
export(psum)
export(pprod)
export(fread)
export(fwrite)
export(foverlaps)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ unit = "s")

14. Added support for `round()` and `trunc()` to extend functionality of `ITime`. `round()` and `trunc()` can be used with argument units: "hours" or "minutes". Thanks to @JensPederM for the suggestion and PR.

15. `psum(..., na.rm=FALSE)` and `pprod(..., na.rm=FALSE)` implemented in C by Morgan Jacob, [#3467](https://github.com/Rdatatable/data.table/issues/3467), are inspired by `base::pmin` and `base::pmax`. These new functions work only for integer and double type and do not recycle vectors. Please see `?psum` for more details.

## BUG FIXES

1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085).
Expand Down
2 changes: 2 additions & 0 deletions R/wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE)

fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na)
fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L])
psum = function(..., na.rm=FALSE) .Call(CpsumR, na.rm, list(...))
pprod = function(..., na.rm=FALSE) .Call(CpprodR, na.rm, list(...))

colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups)
coerceFill = function(x) .Call(CcoerceFillR, x)
Expand Down
37 changes: 37 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -16846,3 +16846,40 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN))
test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A)))
A = data.table(A=as.complex(rep(NA, 5)))
test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A)))

# psum / pprod, #3467
x = c(1, 3, NA, 5)
y = c(2, NA, 4, 1)
z = c(3, 4, 4, 1)
# psum
test(2139.001, psum(x, y, z, na.rm = FALSE), c(6, NA, NA, 7))
test(2139.002, psum(x, y, z, na.rm = TRUE), c(6, 7, 8, 7))
test(2139.003, psum(as.integer(x), as.integer(y), as.integer(z), na.rm = FALSE), c(6L, NA_integer_, NA_integer_, 7L))
test(2139.004, psum(as.integer(x), as.integer(y), as.integer(z), na.rm = TRUE), c(6L, 7L, 8L, 7L))
test(2139.005, psum(as.raw(z), y, na.rm = TRUE), error = "Argument 1 is of type raw. Only integer and double types are supported.")
test(2139.006, psum(x, y, 1:2, na.rm = FALSE), error = "Argument 3 is of length 2 but argument 1 is of length 4. If you wish to 'recycle' your argument, please use rep() to make this intent clear to the readers of your code.")
test(2139.007, psum(1:10, 1:5, na.rm = FALSE), error = "Argument 2 is of length 5 but argument 1 is of length 10. If you wish to 'recycle' your argument, please use rep() to make this intent clear to the readers of your code.")
test(2139.008, psum(x, as.raw(z), y, na.rm = TRUE), error = "Argument 2 is of type raw. Only integer and double types are supported.")
test(2139.009, psum(1:10, 1:10, 21:30), 1:10 + 1:10 + 21:30)
test(2139.010, psum(x, y, z, na.rm = NA), error = "Argument 'na.rm' must be TRUE or FALSE and length 1.")
test(2139.011, psum(x, na.rm = FALSE), x)
test(2139.012, psum(as.integer(x), y, z, na.rm = TRUE), c(6, 7, 8, 7))
test(2139.013, psum(c(1,3,NA,5,NA), c(2,NA,4,1,NA), na.rm = TRUE), c(3, 3, 4, 6, NA))
test(2139.014, psum(x, y, as.integer(z), na.rm = FALSE), c(6, NA, NA, 7))
test(2139.015, psum(na.rm = FALSE), error = "Please supply at least 1 argument. (0 argument supplied)")
# pprod
test(2139.016, pprod(x, y, z, na.rm = FALSE), c(6, NA, NA, 5))
test(2139.017, pprod(x, y, z, na.rm = TRUE), c(6, 12, 16, 5))
test(2139.018, pprod(as.integer(x), as.integer(y), as.integer(z), na.rm = FALSE), c(6L, NA_integer_, NA_integer_, 5L))
test(2139.019, pprod(as.integer(x), as.integer(y), as.integer(z), na.rm = TRUE), c(6L, 12L, 16L, 5L))
test(2139.020, pprod(as.raw(z), y, na.rm = TRUE), error = "Argument 1 is of type raw. Only integer and double types are supported.")
test(2139.021, pprod(x, y, 1:2, na.rm = FALSE), error = "Argument 3 is of length 2 but argument 1 is of length 4. If you wish to 'recycle' your argument, please use rep() to make this intent clear to the readers of your code.")
test(2139.022, pprod(1:10, 1:5, na.rm = FALSE), error = "Argument 2 is of length 5 but argument 1 is of length 10. If you wish to 'recycle' your argument, please use rep() to make this intent clear to the readers of your code.")
test(2139.023, pprod(x, as.raw(z), y, na.rm = TRUE), error = "Argument 2 is of type raw. Only integer and double types are supported.")
test(2139.024, pprod(1:10, 1:10, 21:30), 1:10 * 1:10 * 21:30)
test(2139.025, pprod(x, y, z, na.rm = NA), error = "Argument 'na.rm' must be TRUE or FALSE and length 1.")
test(2139.026, pprod(x, na.rm = FALSE), x)
test(2139.027, pprod(as.integer(x), y, z, na.rm = TRUE), c(6, 12, 16, 5))
test(2139.028, pprod(c(1,3,NA,5,NA), c(2,NA,4,1,NA), na.rm = TRUE), c(2, 3, 4, 5, NA))
test(2139.029, pprod(x, y, as.integer(z), na.rm = FALSE), c(6, NA, NA, 5))
test(2139.030, pprod(na.rm = FALSE), error = "Please supply at least 1 argument. (0 argument supplied)")
33 changes: 33 additions & 0 deletions man/psum.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
\name{psum}
\alias{psum}
\alias{pprod}
\title{ Sum and Product}
\description{
Similar to \code{pmin} and \code{pmax} but for sum and product. Only works for integer and double. These functions do not recycle vectors.
}
\usage{
psum(..., na.rm=FALSE)
pprod(..., na.rm=FALSE)
}
\arguments{
\item{...}{ Numeric arguments of type integer or double.}
\item{na.rm}{ A logical indicating whether missing values should be removed. Default value is \code{FALSE}.}
}
\value{
Return the sum or product of all numeric argument. The value returned will be of the type of the highest argument types (integer < double)
}
\examples{
x = c(1, 3, NA, 5)
y = c(2, NA, 4, 1)
z = c(3, 4, 4, 1)

# Example 1: psum
psum(x, y, z, na.rm = FALSE)
psum(x, y, z, na.rm = TRUE)

# Example 2: pprod
pprod(x, y, z, na.rm = FALSE)
pprod(x, y, z, na.rm = TRUE)

}
\keyword{ data }
2 changes: 2 additions & 0 deletions src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,5 @@ SEXP testMsgR(SEXP status, SEXP x, SEXP k);
//fifelse.c
SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na);
SEXP fcaseR(SEXP na, SEXP rho, SEXP args);
SEXP psumR(SEXP na, SEXP args);
SEXP pprodR(SEXP na, SEXP args);
172 changes: 172 additions & 0 deletions src/fifelse.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,3 +344,175 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) {
UNPROTECT(nprotect);
return ans;
}

SEXP psumR(SEXP na, SEXP args) {
if (!IS_TRUE_OR_FALSE(na)) {
error(_("Argument 'na.rm' must be TRUE or FALSE and length 1."));
}
const int n=length(args);
if (n < 1) {
error(_("Please supply at least 1 argument. (%d argument supplied)"), n);
}
if (n == 1) {
return SEXPPTR_RO(args)[0];
}
SEXPTYPE anstype = TYPEOF(SEXPPTR_RO(args)[0]);
const int64_t len0 = xlength(SEXPPTR_RO(args)[0]);
if (anstype != INTSXP && anstype != REALSXP) {
error(_("Argument %d is of type %s. Only integer and double types are supported."), 1, type2char(anstype));
}
for (int i = 1; i < n; ++i) {
SEXPTYPE type = TYPEOF(SEXPPTR_RO(args)[i]);
int64_t len1 = xlength(SEXPPTR_RO(args)[i]);
if (type != INTSXP && type != REALSXP) {
error(_("Argument %d is of type %s. Only integer and double types are supported."),
i+1, type2char(type));
}
if (len1 != len0) {
error(_("Argument %d is of length %"PRId64" but argument %d is of length %"PRId64". "
"If you wish to 'recycle' your argument, please use rep() to make this intent "
"clear to the readers of your code."), i+1, len1, 1, len0);
}
if (typeorder[type] > typeorder[anstype]) {
anstype = type;
}
}
int nprotect = 0;
SEXP ans = PROTECT(duplicate(SEXPPTR_RO(args)[0])); nprotect++;
if (anstype != TYPEOF(ans)) {
ans = coerceVector(ans, anstype);
}
const bool narm = LOGICAL(na)[0];
switch(anstype) {
case INTSXP: {
int *restrict pans =INTEGER(ans);
SEXP int_a = R_NilValue;
PROTECT_INDEX Iaint;
PROTECT_WITH_INDEX(int_a, &Iaint); nprotect++;
for (int i = 1; i < n; ++i) {
REPROTECT(int_a = duplicate(SEXPPTR_RO(args)[i]), Iaint);
int *pa = INTEGER(int_a);
if (narm) {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = pans[j] == NA_INTEGER ? pa[j] : (pa[j]==NA_INTEGER ? pans[j] : (pans[j] + pa[j]));
}
} else {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = (pans[j] == NA_INTEGER || pa[j] == NA_INTEGER) ? NA_INTEGER : (pans[j] + pa[j]);
}
}
}
} break;
case REALSXP: {
double *restrict pans = REAL(ans);
SEXP dbl_a = R_NilValue;
PROTECT_INDEX Iadbl;
PROTECT_WITH_INDEX(dbl_a, &Iadbl); nprotect++;
for (int i = 1; i < n; ++i) {
if (TYPEOF(SEXPPTR_RO(args)[i]) != anstype) {
REPROTECT(dbl_a = coerceVector(duplicate(SEXPPTR_RO(args)[i]), anstype), Iadbl);
} else {
REPROTECT(dbl_a = duplicate(SEXPPTR_RO(args)[i]), Iadbl);
}
double *pa = REAL(dbl_a);
if (narm) {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = ISNAN(pans[j]) ? pa[j] : (ISNAN(pa[j]) ? pans[j] : (pans[j] + pa[j]));
}
} else {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = (ISNAN(pans[j]) || ISNAN(pa[j])) ? NA_REAL : (pans[j] + pa[j]);
}
}
}
} break;
}
UNPROTECT(nprotect);
return ans;
}

SEXP pprodR(SEXP na, SEXP args) {
if (!IS_TRUE_OR_FALSE(na)) {
error(_("Argument 'na.rm' must be TRUE or FALSE and length 1."));
}
const int n=length(args);
if (n < 1) {
error(_("Please supply at least 1 argument. (%d argument supplied)"), n);
}
if (n == 1) {
return SEXPPTR_RO(args)[0];
}
SEXPTYPE anstype = TYPEOF(SEXPPTR_RO(args)[0]);
const int64_t len0 = xlength(SEXPPTR_RO(args)[0]);
if (anstype != INTSXP && anstype != REALSXP) {
error(_("Argument %d is of type %s. Only integer and double types are supported."), 1, type2char(anstype));
}
for (int i = 1; i < n; ++i) {
SEXPTYPE type = TYPEOF(SEXPPTR_RO(args)[i]);
int64_t len1 = xlength(SEXPPTR_RO(args)[i]);
if (type != INTSXP && type != REALSXP) {
error(_("Argument %d is of type %s. Only integer and double types are supported."),
i+1, type2char(type));
}
if (len1 != len0) {
error(_("Argument %d is of length %"PRId64" but argument %d is of length %"PRId64". "
"If you wish to 'recycle' your argument, please use rep() to make this intent "
"clear to the readers of your code."), i+1, len1, 1, len0);
}
if (typeorder[type] > typeorder[anstype]) {
anstype = type;
}
}
int nprotect = 0;
SEXP ans = PROTECT(duplicate(SEXPPTR_RO(args)[0])); nprotect++;
if (anstype != TYPEOF(ans)) {
ans = coerceVector(ans, anstype);
}
const bool narm = LOGICAL(na)[0];
switch(anstype) {
case INTSXP: {
int *restrict pans =INTEGER(ans);
SEXP int_a = R_NilValue;
PROTECT_INDEX Iaint;
PROTECT_WITH_INDEX(int_a, &Iaint); nprotect++;
for (int i = 1; i < n; ++i) {
REPROTECT(int_a = duplicate(SEXPPTR_RO(args)[i]), Iaint);
int *pa = INTEGER(int_a);
if (narm) {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = pans[j] == NA_INTEGER ? pa[j] : (pa[j]==NA_INTEGER ? pans[j] : (pans[j] * pa[j]));
}
} else {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = (pans[j] == NA_INTEGER || pa[j] == NA_INTEGER) ? NA_INTEGER : (pans[j] * pa[j]);
}
}
}
} break;
case REALSXP: {
double *restrict pans = REAL(ans);
SEXP dbl_a = R_NilValue;
PROTECT_INDEX Iadbl;
PROTECT_WITH_INDEX(dbl_a, &Iadbl); nprotect++;
for (int i = 1; i < n; ++i) {
if (TYPEOF(SEXPPTR_RO(args)[i]) != anstype) {
REPROTECT(dbl_a = coerceVector(duplicate(SEXPPTR_RO(args)[i]), anstype), Iadbl);
} else {
REPROTECT(dbl_a = duplicate(SEXPPTR_RO(args)[i]), Iadbl);
}
double *pa = REAL(dbl_a);
if (narm) {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = ISNAN(pans[j]) ? pa[j] : (ISNAN(pa[j]) ? pans[j] : (pans[j] * pa[j]));
}
} else {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = (ISNAN(pans[j]) || ISNAN(pa[j])) ? NA_REAL : (pans[j] * pa[j]);
}
}
}
} break;
}
UNPROTECT(nprotect);
return ans;
}
4 changes: 4 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ SEXP chmatchdup_R();
SEXP chin_R();
SEXP fifelseR();
SEXP fcaseR();
SEXP psumR();
SEXP pprodR();
SEXP freadR();
SEXP fwriteR();
SEXP reorder();
Expand Down Expand Up @@ -205,6 +207,8 @@ R_CallMethodDef callMethods[] = {
{"Ccoalesce", (DL_FUNC) &coalesce, -1},
{"CfifelseR", (DL_FUNC) &fifelseR, -1},
{"CfcaseR", (DL_FUNC) &fcaseR, -1},
{"CpsumR", (DL_FUNC) &psumR, -1},
{"CpprodR", (DL_FUNC) &pprodR, -1},
{"C_lock", (DL_FUNC) &lock, -1}, // _ for these 3 to avoid Clock as in time
{"C_unlock", (DL_FUNC) &unlock, -1},
{"C_islocked", (DL_FUNC) &islockedR, -1},
Expand Down