Rdatatable · Will-78 · Apr 20, 2026 · Apr 20, 2026 · Apr 21, 2026 · Apr 22, 2026
@@ -444,6 +444,8 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T
 
 28. `rbindlist()` now avoids the crash when working with many non-UTF-8 column names, [#7452](https://github.com/Rdatatable/data.table/issues/7452). Thanks @aitap for the report and the fix.
 
+29. `gsum()` now handles correctly handles integer64 overflow in data.table aggregations (e.g `DT = data.table(i = c(1L, 1L), x = lim.integer64()`), [#7574](https://github.com/Rdatatable/data.table/issues/7574). Thanks @MichaelChirico for reporting and @Will-78 for the fix.
-29. `gsum()` now handles correctly handles integer64 overflow in data.table aggregations (e.g `DT = data.table(i = c(1L, 1L), x = lim.integer64()`), [#7574](https://github.com/Rdatatable/data.table/issues/7574). Thanks @MichaelChirico for reporting and @Will-78 for the fix.
+29. `gsum()` now correctly handles integer64 overflow in data.table aggregations (e.g `DT = data.table(i = c(1L, 1L), x = lim.integer64()`), [#7574](https://github.com/Rdatatable/data.table/issues/7574). Thanks @MichaelChirico for reporting and @Will-78 for the fix.
-29. `gsum()` now handles correctly handles integer64 overflow in data.table aggregations (e.g `DT = data.table(i = c(1L, 1L), x = lim.integer64()`), [#7574](https://github.com/Rdatatable/data.table/issues/7574). Thanks @MichaelChirico for reporting and @Will-78 for the fix.
+29. `gsum()` now correctly handles integer64 overflow in data.table aggregations (e.g `DT = data.table(i = c(1L, 1L), x = lim.integer64()`), [#7574](https://github.com/Rdatatable/data.table/issues/7574). Thanks @MichaelChirico for reporting and @Will-78 for the fix.
+
 ### NOTES
 
 1. The following in-progress deprecations have proceeded:

@@ -21585,3 +21585,8 @@ close(con)
 file.create(f <- tempfile())
 test(2367.6, fread(file(f)), data.table(), warning="Connection has size 0.")
 unlink(f)
+
+# test for correct reponse for Datatable sum int64 overflow #7574
+DT = data.table(i = c(1L, 1L), x = lim.integer64()[2L])
+test(2368.1, DT[, sum(x), by = i],data.table(i = 1L, V1 = as.integer64("4895412794951729152")),warning = "The sum of an integer_64 column for a group was more than type 'integer_64' can hold so the result has been coerced to 'numeric' automatically for convenience. Precision has been lost in the result. Consider using 'as.numeric' on the column beforehand to avoid this warning.")
+rm(DT)
@@ -436,6 +436,7 @@ SEXP gsum(SEXP x, SEXP narmArg)
     }
   } break;
   case REALSXP: {
+    bool overflow=false;
     if (!INHERITS(x, char_integer64)) {
       const double *restrict gx = gather(x, &anyNA);
       ans = PROTECT(allocVector(REALSXP, ngrp));
@@ -478,7 +479,7 @@ SEXP gsum(SEXP x, SEXP narmArg)
       int64_t *restrict ansp = (int64_t *)REAL(ans);
       memset(ansp, 0, ngrp*sizeof(*ansp));
       if (!anyNA) {
-        #pragma omp parallel for num_threads(getDTthreads(highSize, false))
+        #pragma omp parallel for num_threads(getDTthreads(highSize, false)) reduction(||:overflow)
         for (int h=0; h<highSize; h++) {
           int64_t *restrict _ans = ansp + (h<<bitshift);
           for (int b=0; b<nBatch; b++) {
@@ -487,13 +488,17 @@ SEXP gsum(SEXP x, SEXP narmArg)
             const int64_t *my_gx = gx + b*batchSize + pos;
             const uint16_t *my_low = low + b*batchSize + pos;
             for (int i=0; i<howMany; i++) {
-              _ans[my_low[i]] += my_gx[i]; // does not propagate INT64 for !narm
+              const int64_t a = _ans[my_low[i]];
+              const int64_t c = my_gx[i];
+
+              if((a>0 && c>MAX_INTEGER64-a) || (a<0 && c<NA_INTEGER64+1-a)) overflow = true;
+              else _ans[my_low[i]] += c; // does not propagate INT64 for !narm
             }
           }
         }
       } else { // narm==true/false and anyNA==true
         if (!narm) {
-          #pragma omp parallel for num_threads(getDTthreads(highSize, false))
+          #pragma omp parallel for num_threads(getDTthreads(highSize, false)) reduction(||:overflow)
           for (int h=0; h<highSize; h++) {
             int64_t *restrict _ans = ansp + (h<<bitshift);
             for (int b=0; b<nBatch; b++) {
@@ -502,18 +507,21 @@ SEXP gsum(SEXP x, SEXP narmArg)
               const int64_t *my_gx = gx + b*batchSize + pos;
               const uint16_t *my_low = low + b*batchSize + pos;
               for (int i=0; i<howMany; i++) {
-                if (_ans[my_low[i]] == INT64_MIN) continue;
-                const int64_t b = my_gx[i];
-                if (b == INT64_MIN) {
+                const int64_t a = _ans[my_low[i]];
+                if (a == INT64_MIN) continue;
+                const int64_t c = my_gx[i];
+                if (c == INT64_MIN) {
                   if (!narm) _ans[my_low[i]] = INT64_MIN;
                   continue;
                 }
-                _ans[my_low[i]] += b;
+
+                if((a>0 && c>MAX_INTEGER64-a) || (a<0 && c<NA_INTEGER64+1-a)) overflow = true;
+                else _ans[my_low[i]] += c;
               }
             }
           }
         } else {
-          #pragma omp parallel for num_threads(getDTthreads(highSize, false))
+          #pragma omp parallel for num_threads(getDTthreads(highSize, false)) reduction(||:overflow)
           for (int h=0; h<highSize; h++) {
             int64_t *restrict _ans = ansp + (h<<bitshift);
             for (int b=0; b<nBatch; b++) {
@@ -523,13 +531,41 @@ SEXP gsum(SEXP x, SEXP narmArg)
               const uint16_t *my_low = low + b*batchSize + pos;
               for (int i=0; i<howMany; i++) {
                 const int64_t elem = my_gx[i];
-                if (elem!=INT64_MIN) _ans[my_low[i]] += elem;
+                const int64_t a = _ans[my_low[i]];
+                if (elem!=INT64_MIN){
+                  if((a>0 && elem>MAX_INTEGER64-a) || (a<0 && elem<NA_INTEGER64+1-a)) overflow = true;
+                  else _ans[my_low[i]] += elem;
+                }
+
               }
             }
           }
         }
       }
     }
+    if (overflow) {
+      UNPROTECT(1); // discard the result with overflow
+      warning(_("The sum of an integer_64 column for a group was more than type 'integer_64' can hold so the result has been coerced to 'numeric' automatically for convenience. Precision has been lost in the result. Consider using 'as.numeric' on the column beforehand to avoid this warning."));
+      const int64_t *restrict gx = gather(x, &anyNA);
+      ans = PROTECT(allocVector(REALSXP, ngrp));
+      double *restrict ansp = REAL(ans);
+      memset(ansp, 0, ngrp*sizeof(double));
+      if (!anyNA) {
+        #pragma omp parallel for num_threads(getDTthreads(highSize, false))
+        for (int h=0; h<highSize; h++) {
+          double *restrict _ans = ansp + (h<<bitshift);
+          for (int b=0; b<nBatch; b++) {
+            const int pos = counts[ b*highSize + h ];
+            const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
+            const int64_t *my_gx = gx + b*batchSize + pos;
+            const uint16_t *my_low = low + b*batchSize + pos;
+            for (int i=0; i<howMany; i++) {
+              _ans[my_low[i]] += (double) my_gx[i];
+            }
+          }
+        }
+      }
+    }
   } break;
   case CPLXSXP: {
     const Rcomplex *restrict gx = gather(x, &anyNA);