-
Notifications
You must be signed in to change notification settings - Fork 1k
Int64 overflow fix #7719
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Int64 overflow fix #7719
Changes from all commits
c307ed8
66bebcf
664c52a
c4083b0
c8b0393
5d9e20c
15dbca6
1950bfe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21585,3 +21585,8 @@ close(con) | |
| file.create(f <- tempfile()) | ||
| test(2367.6, fread(file(f)), data.table(), warning="Connection has size 0.") | ||
| unlink(f) | ||
|
|
||
| # test for correct reponse for Datatable sum int64 overflow #7574 | ||
| DT = data.table(i = c(1L, 1L), x = lim.integer64()[2L]) | ||
| test(2368.1, DT[, sum(x), by = i],data.table(i = 1L, V1 = as.integer64("4895412794951729152")),warning = "The sum of an integer_64 column for a group was more than type 'integer_64' can hold so the result has been coerced to 'numeric' automatically for convenience. Precision has been lost in the result. Consider using 'as.numeric' on the column beforehand to avoid this warning.") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The class of the result is wrong. As the warning says, it must be
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldnt the result be something like |
||
| rm(DT) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Im also missing some test cases e.g.: sum(x, na.rm=TRUE) with NA + overflow |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -436,6 +436,7 @@ SEXP gsum(SEXP x, SEXP narmArg) | |
| } | ||
| } break; | ||
| case REALSXP: { | ||
| bool overflow=false; | ||
| if (!INHERITS(x, char_integer64)) { | ||
| const double *restrict gx = gather(x, &anyNA); | ||
| ans = PROTECT(allocVector(REALSXP, ngrp)); | ||
|
|
@@ -478,7 +479,7 @@ SEXP gsum(SEXP x, SEXP narmArg) | |
| int64_t *restrict ansp = (int64_t *)REAL(ans); | ||
| memset(ansp, 0, ngrp*sizeof(*ansp)); | ||
| if (!anyNA) { | ||
| #pragma omp parallel for num_threads(getDTthreads(highSize, false)) | ||
| #pragma omp parallel for num_threads(getDTthreads(highSize, false)) reduction(||:overflow) | ||
| for (int h=0; h<highSize; h++) { | ||
| int64_t *restrict _ans = ansp + (h<<bitshift); | ||
| for (int b=0; b<nBatch; b++) { | ||
|
|
@@ -487,13 +488,17 @@ SEXP gsum(SEXP x, SEXP narmArg) | |
| const int64_t *my_gx = gx + b*batchSize + pos; | ||
| const uint16_t *my_low = low + b*batchSize + pos; | ||
| for (int i=0; i<howMany; i++) { | ||
| _ans[my_low[i]] += my_gx[i]; // does not propagate INT64 for !narm | ||
| const int64_t a = _ans[my_low[i]]; | ||
| const int64_t c = my_gx[i]; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any particular reason for using |
||
|
|
||
| if((a>0 && c>MAX_INTEGER64-a) || (a<0 && c<NA_INTEGER64+1-a)) overflow = true; | ||
| else _ans[my_low[i]] += c; // does not propagate INT64 for !narm | ||
| } | ||
| } | ||
| } | ||
| } else { // narm==true/false and anyNA==true | ||
| if (!narm) { | ||
| #pragma omp parallel for num_threads(getDTthreads(highSize, false)) | ||
| #pragma omp parallel for num_threads(getDTthreads(highSize, false)) reduction(||:overflow) | ||
| for (int h=0; h<highSize; h++) { | ||
| int64_t *restrict _ans = ansp + (h<<bitshift); | ||
| for (int b=0; b<nBatch; b++) { | ||
|
|
@@ -502,18 +507,21 @@ SEXP gsum(SEXP x, SEXP narmArg) | |
| const int64_t *my_gx = gx + b*batchSize + pos; | ||
| const uint16_t *my_low = low + b*batchSize + pos; | ||
| for (int i=0; i<howMany; i++) { | ||
| if (_ans[my_low[i]] == INT64_MIN) continue; | ||
| const int64_t b = my_gx[i]; | ||
| if (b == INT64_MIN) { | ||
| const int64_t a = _ans[my_low[i]]; | ||
| if (a == INT64_MIN) continue; | ||
| const int64_t c = my_gx[i]; | ||
| if (c == INT64_MIN) { | ||
| if (!narm) _ans[my_low[i]] = INT64_MIN; | ||
| continue; | ||
| } | ||
| _ans[my_low[i]] += b; | ||
|
|
||
| if((a>0 && c>MAX_INTEGER64-a) || (a<0 && c<NA_INTEGER64+1-a)) overflow = true; | ||
| else _ans[my_low[i]] += c; | ||
| } | ||
| } | ||
| } | ||
| } else { | ||
| #pragma omp parallel for num_threads(getDTthreads(highSize, false)) | ||
| #pragma omp parallel for num_threads(getDTthreads(highSize, false)) reduction(||:overflow) | ||
| for (int h=0; h<highSize; h++) { | ||
| int64_t *restrict _ans = ansp + (h<<bitshift); | ||
| for (int b=0; b<nBatch; b++) { | ||
|
|
@@ -523,13 +531,41 @@ SEXP gsum(SEXP x, SEXP narmArg) | |
| const uint16_t *my_low = low + b*batchSize + pos; | ||
| for (int i=0; i<howMany; i++) { | ||
| const int64_t elem = my_gx[i]; | ||
| if (elem!=INT64_MIN) _ans[my_low[i]] += elem; | ||
| const int64_t a = _ans[my_low[i]]; | ||
| if (elem!=INT64_MIN){ | ||
| if((a>0 && elem>MAX_INTEGER64-a) || (a<0 && elem<NA_INTEGER64+1-a)) overflow = true; | ||
| else _ans[my_low[i]] += elem; | ||
| } | ||
|
|
||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| if (overflow) { | ||
| UNPROTECT(1); // discard the result with overflow | ||
| warning(_("The sum of an integer_64 column for a group was more than type 'integer_64' can hold so the result has been coerced to 'numeric' automatically for convenience. Precision has been lost in the result. Consider using 'as.numeric' on the column beforehand to avoid this warning.")); | ||
| const int64_t *restrict gx = gather(x, &anyNA); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need a 2nd gather call? |
||
| ans = PROTECT(allocVector(REALSXP, ngrp)); | ||
| double *restrict ansp = REAL(ans); | ||
| memset(ansp, 0, ngrp*sizeof(double)); | ||
| if (!anyNA) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and what are we doing in the case of anyNA? panic? |
||
| #pragma omp parallel for num_threads(getDTthreads(highSize, false)) | ||
| for (int h=0; h<highSize; h++) { | ||
| double *restrict _ans = ansp + (h<<bitshift); | ||
| for (int b=0; b<nBatch; b++) { | ||
| const int pos = counts[ b*highSize + h ]; | ||
| const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos; | ||
| const int64_t *my_gx = gx + b*batchSize + pos; | ||
| const uint16_t *my_low = low + b*batchSize + pos; | ||
| for (int i=0; i<howMany; i++) { | ||
| _ans[my_low[i]] += (double) my_gx[i]; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } break; | ||
| case CPLXSXP: { | ||
| const Rcomplex *restrict gx = gather(x, &anyNA); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.