diff --git a/NEWS.md b/NEWS.md index 021ddbbb6..a84327c6b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -444,6 +444,8 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T 28. `rbindlist()` now avoids the crash when working with many non-UTF-8 column names, [#7452](https://github.com/Rdatatable/data.table/issues/7452). Thanks @aitap for the report and the fix. +29. `gsum()` now handles correctly handles integer64 overflow in data.table aggregations (e.g `DT = data.table(i = c(1L, 1L), x = lim.integer64()`), [#7574](https://github.com/Rdatatable/data.table/issues/7574). Thanks @MichaelChirico for reporting and @Will-78 for the fix. + ### NOTES 1. The following in-progress deprecations have proceeded: diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 443487c6a..cc9f9b135 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21585,3 +21585,8 @@ close(con) file.create(f <- tempfile()) test(2367.6, fread(file(f)), data.table(), warning="Connection has size 0.") unlink(f) + +# test for correct reponse for Datatable sum int64 overflow #7574 +DT = data.table(i = c(1L, 1L), x = lim.integer64()[2L]) +test(2368.1, DT[, sum(x), by = i],data.table(i = 1L, V1 = as.integer64("4895412794951729152")),warning = "The sum of an integer_64 column for a group was more than type 'integer_64' can hold so the result has been coerced to 'numeric' automatically for convenience. Precision has been lost in the result. Consider using 'as.numeric' on the column beforehand to avoid this warning.") +rm(DT) diff --git a/src/gsumm.c b/src/gsumm.c index 5051d72ca..b12da8bb9 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -436,6 +436,7 @@ SEXP gsum(SEXP x, SEXP narmArg) } } break; case REALSXP: { + bool overflow=false; if (!INHERITS(x, char_integer64)) { const double *restrict gx = gather(x, &anyNA); ans = PROTECT(allocVector(REALSXP, ngrp)); @@ -478,7 +479,7 @@ SEXP gsum(SEXP x, SEXP narmArg) int64_t *restrict ansp = (int64_t *)REAL(ans); memset(ansp, 0, ngrp*sizeof(*ansp)); if (!anyNA) { - #pragma omp parallel for num_threads(getDTthreads(highSize, false)) + #pragma omp parallel for num_threads(getDTthreads(highSize, false)) reduction(||:overflow) for (int h=0; h0 && c>MAX_INTEGER64-a) || (a<0 && c0 && c>MAX_INTEGER64-a) || (a<0 && c0 && elem>MAX_INTEGER64-a) || (a<0 && elem