From c307ed8059d823778841591d2c820a18853eb74c Mon Sep 17 00:00:00 2001 From: William Barnett Date: Mon, 20 Apr 2026 15:26:41 -0700 Subject: [PATCH 1/6] Added logic checks mimmicking checks for overflow above REALSXP. Similarly added overflow branch mimicking flow of INTSXP overflow setup --- src/gsumm.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/src/gsumm.c b/src/gsumm.c index 5051d72ca..b0dc0afd5 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -436,6 +436,7 @@ SEXP gsum(SEXP x, SEXP narmArg) } } break; case REALSXP: { + bool overflow=false; if (!INHERITS(x, char_integer64)) { const double *restrict gx = gather(x, &anyNA); ans = PROTECT(allocVector(REALSXP, ngrp)); @@ -478,7 +479,7 @@ SEXP gsum(SEXP x, SEXP narmArg) int64_t *restrict ansp = (int64_t *)REAL(ans); memset(ansp, 0, ngrp*sizeof(*ansp)); if (!anyNA) { - #pragma omp parallel for num_threads(getDTthreads(highSize, false)) + #pragma omp parallel for num_threads(getDTthreads(highSize, false)) reduction(||:overflow) for (int h=0; h0 && b>MAX_INTEGER64-a) || (a<0 && b0 && b>MAX_INTEGER64-a) || (a<0 && b0 && elem>MAX_INTEGER64-a) || (a<0 && elem Date: Mon, 20 Apr 2026 16:55:40 -0700 Subject: [PATCH 2/6] Renamed variables within overflow logic to not confuse with for int b=0 within outer for loop --- src/gsumm.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/gsumm.c b/src/gsumm.c index b0dc0afd5..b12da8bb9 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -489,10 +489,10 @@ SEXP gsum(SEXP x, SEXP narmArg) const uint16_t *my_low = low + b*batchSize + pos; for (int i=0; i0 && b>MAX_INTEGER64-a) || (a<0 && b0 && c>MAX_INTEGER64-a) || (a<0 && c0 && b>MAX_INTEGER64-a) || (a<0 && b0 && c>MAX_INTEGER64-a) || (a<0 && c Date: Wed, 22 Apr 2026 16:04:54 -0700 Subject: [PATCH 3/6] Added test for the int64 overflow, looking for warning message --- inst/tests/tests.Rraw | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b73b2767a..e447bbef5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21577,3 +21577,8 @@ close(con) file.create(f <- tempfile()) test(2367.6, fread(file(f)), data.table(), warning="Connection has size 0.") unlink(f) + +# test for correct reponse for Datatable sum int64 overflow +DT = data.table(i = c(1L, 1L), x = lim.integer64()[2L]) +test(2368.1, ans <- DT[, sum(x), by=i], data.table(i=1L, lim.integer64()[2L]*2), warning="The sum of an integer_64 column for a group was more than type 'integer_64' can hold so the result has been coerced to 'numeric' automatically for convenience. Precision has been lost in the result. Consider using 'as.numeric' on the column beforehand to avoid this warning.") +rm(DT) From 5d9e20c4a002b5bb244d4e40b053618a67634965 Mon Sep 17 00:00:00 2001 From: William Barnett Date: Wed, 22 Apr 2026 16:41:30 -0700 Subject: [PATCH 4/6] Added issue to tests.Rraw and addition to NEWS.md --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 021ddbbb6..a84327c6b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -444,6 +444,8 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T 28. `rbindlist()` now avoids the crash when working with many non-UTF-8 column names, [#7452](https://github.com/Rdatatable/data.table/issues/7452). Thanks @aitap for the report and the fix. +29. `gsum()` now handles correctly handles integer64 overflow in data.table aggregations (e.g `DT = data.table(i = c(1L, 1L), x = lim.integer64()`), [#7574](https://github.com/Rdatatable/data.table/issues/7574). Thanks @MichaelChirico for reporting and @Will-78 for the fix. + ### NOTES 1. The following in-progress deprecations have proceeded: diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ce5680407..2c670beaf 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21586,7 +21586,7 @@ file.create(f <- tempfile()) test(2367.6, fread(file(f)), data.table(), warning="Connection has size 0.") unlink(f) -# test for correct reponse for Datatable sum int64 overflow +# test for correct reponse for Datatable sum int64 overflow #7574 DT = data.table(i = c(1L, 1L), x = lim.integer64()[2L]) test(2368.1, ans <- DT[, sum(x), by=i], data.table(i=1L, lim.integer64()[2L]*2), warning="The sum of an integer_64 column for a group was more than type 'integer_64' can hold so the result has been coerced to 'numeric' automatically for convenience. Precision has been lost in the result. Consider using 'as.numeric' on the column beforehand to avoid this warning.") rm(DT) From 15dbca62e502ab5f9b9ae11c6bf2f7645b01ae64 Mon Sep 17 00:00:00 2001 From: William Barnett Date: Wed, 22 Apr 2026 16:54:10 -0700 Subject: [PATCH 5/6] Changes to fix test made within tests.Rraw --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2c670beaf..aaa24a6d2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21588,5 +21588,5 @@ unlink(f) # test for correct reponse for Datatable sum int64 overflow #7574 DT = data.table(i = c(1L, 1L), x = lim.integer64()[2L]) -test(2368.1, ans <- DT[, sum(x), by=i], data.table(i=1L, lim.integer64()[2L]*2), warning="The sum of an integer_64 column for a group was more than type 'integer_64' can hold so the result has been coerced to 'numeric' automatically for convenience. Precision has been lost in the result. Consider using 'as.numeric' on the column beforehand to avoid this warning.") +test(2368.1, DT[, sum(x), by=i], warning="The sum of an integer_64 column for a group was more than type 'integer_64' can hold so the result has been coerced to 'numeric' automatically for convenience. Precision has been lost in the result. Consider using 'as.numeric' on the column beforehand to avoid this warning.") rm(DT) From 1950bfe1334e87c07cd729bc22fd5aaa7ec9bcdc Mon Sep 17 00:00:00 2001 From: William Barnett Date: Wed, 22 Apr 2026 20:33:39 -0700 Subject: [PATCH 6/6] Added specifications to aggregation within test() --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aaa24a6d2..cc9f9b135 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21588,5 +21588,5 @@ unlink(f) # test for correct reponse for Datatable sum int64 overflow #7574 DT = data.table(i = c(1L, 1L), x = lim.integer64()[2L]) -test(2368.1, DT[, sum(x), by=i], warning="The sum of an integer_64 column for a group was more than type 'integer_64' can hold so the result has been coerced to 'numeric' automatically for convenience. Precision has been lost in the result. Consider using 'as.numeric' on the column beforehand to avoid this warning.") +test(2368.1, DT[, sum(x), by = i],data.table(i = 1L, V1 = as.integer64("4895412794951729152")),warning = "The sum of an integer_64 column for a group was more than type 'integer_64' can hold so the result has been coerced to 'numeric' automatically for convenience. Precision has been lost in the result. Consider using 'as.numeric' on the column beforehand to avoid this warning.") rm(DT)