From 22e123c403ec5448fb79e04a1895017f01a15566 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sun, 5 Apr 2026 16:56:25 +0300 Subject: [PATCH 01/11] atime: backport functions no longer in R-devel At least the following entry points were used by older data.table versions and need to be available to reproduce benchmarks: void SETLENGTH(SEXP x, R_xlen_t n); R_xlen_t TRUELENGTH(SEXP x); void SET_TRUELENGTH(SEXP x, R_xlen_t n); void SET_GROWABLE_BIT(SEXP); int LEVELS(SEXP); int NAMED(SEXP); #define isFrame(x) isDataFrame(x) #define GetOption(x, none) GetOption1(x) --- .ci/atime/tests.R | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 3cd86bb43..15020711d 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -129,6 +129,77 @@ test.list <- atime::atime_test_list( "NAMESPACE", sprintf('useDynLib\\("?%s"?', Package_regex), paste0('useDynLib(', new.Package_)) + backports = c( + "src/data.table.h" = ' + #if R_VERSION >= R_Version(4, 6, 0) + // backports.c + void SETLENGTH(SEXP x, R_xlen_t n); + R_xlen_t TRUELENGTH(SEXP x); + void SET_TRUELENGTH(SEXP x, R_xlen_t n); + void SET_GROWABLE_BIT(SEXP); + int LEVELS(SEXP); + int NAMED(SEXP); + #define isFrame(x) isDataFrame(x) + #define GetOption(x, none) GetOption1(x) + #endif + ', + "src/backports.c" = ' + #include "data.table.h" + #if R_VERSION >= R_Version(4, 6, 0) + #define NAMED_BITS 16 + struct sxpinfo_struct { + SEXPTYPE type : TYPE_BITS; // in Rinternals.h + unsigned int scalar: 1; + unsigned int obj : 1; + unsigned int alt : 1; + unsigned int gp : 16; + unsigned int mark : 1; + unsigned int debug : 1; + unsigned int trace : 1; + unsigned int spare : 1; + unsigned int gcgen : 1; + unsigned int gccls : 3; + unsigned int named : NAMED_BITS; + unsigned int extra : 32 - NAMED_BITS; + }; + + struct vecsxp_struct { + R_xlen_t length; + R_xlen_t truelength; + }; + + typedef struct VECTOR_SEXPREC { + struct sxpinfo_struct sxpinfo; + SEXP attrib; + SEXP gengc_next_node, gengc_prev_node; + struct vecsxp_struct vecsxp; + } *VECSEXP; + + void SETLENGTH(SEXP x, R_xlen_t n) { + ((VECSEXP)x)->vecsxp.length = n; + } + R_xlen_t TRUELENGTH(SEXP x) { + return ((VECSEXP)x)->vecsxp.truelength; + } + void SET_TRUELENGTH(SEXP x, R_xlen_t n) { + ((VECSEXP)x)->vecsxp.truelength = n; + } + void SET_GROWABLE_BIT(SEXP x) { + ((VECSEXP)x)->sxpinfo.gp |= 0x20; + } + int LEVELS(SEXP x) { + return ((VECSEXP)x)->sxpinfo.gp; + } + int NAMED(SEXP x) { + return ((VECSEXP)x)->sxpinfo.named; + } + #endif + ') + for (n in names(backports)) { + f = file(file.path(new.pkg.path, n), "a") + writeLines(backports[[n]], f) + close(f) + } }, # Constant overhead improvement https://github.com/Rdatatable/data.table/pull/6925 From e3c66f5c707f1e5cab73e881a85c7925e1e57a9c Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 29 Apr 2026 12:50:12 +0300 Subject: [PATCH 02/11] Undo the #ifdef STRING_PTR_RO check Since there's no more USE_RINTERNALS, STRING_PTR_RO is no longer a macro. --- .ci/atime/tests.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 15020711d..df0b32fcf 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -129,6 +129,9 @@ test.list <- atime::atime_test_list( "NAMESPACE", sprintf('useDynLib\\("?%s"?', Package_regex), paste0('useDynLib(', new.Package_)) + pkg_find_replace( + file.path("src", "Makevars.*in"), + "@PKG_CFLAGS@", "@PKG_CFLAGS@ -DSTRING_PTR_RO=STRING_PTR_RO") backports = c( "src/data.table.h" = ' #if R_VERSION >= R_Version(4, 6, 0) From 6254aabeab37542dbce8042b56c4c3b1870a7f11 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 29 Apr 2026 12:56:32 +0300 Subject: [PATCH 03/11] Backport ATTRIB, SET_ATTRIB --- .ci/atime/tests.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index df0b32fcf..ebe71c0e0 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -142,6 +142,8 @@ test.list <- atime::atime_test_list( void SET_GROWABLE_BIT(SEXP); int LEVELS(SEXP); int NAMED(SEXP); + SEXP ATTRIB(SEXP); + void SET_ATTRIB(SEXP, SEXP); #define isFrame(x) isDataFrame(x) #define GetOption(x, none) GetOption1(x) #endif @@ -196,6 +198,12 @@ test.list <- atime::atime_test_list( int NAMED(SEXP x) { return ((VECSEXP)x)->sxpinfo.named; } + SEXP ATTRIB(SEXP x) { + return ((VECSEXP)x)->attrib; + } + void SET_ATTRIB(SEXP x, SEXP att) { + ((VECSEXP)x)->attrib = att; + } #endif ') for (n in names(backports)) { From 4e7564b612c9436f474d246acc4cb523e164f4f7 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 29 Apr 2026 13:02:38 +0300 Subject: [PATCH 04/11] Backport OBJECT, SET_OBJECT --- .ci/atime/tests.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index ebe71c0e0..f10383a4e 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -144,6 +144,8 @@ test.list <- atime::atime_test_list( int NAMED(SEXP); SEXP ATTRIB(SEXP); void SET_ATTRIB(SEXP, SEXP); + int OBJECT(SEXP); + void SET_OBJECT(SEXP, int); #define isFrame(x) isDataFrame(x) #define GetOption(x, none) GetOption1(x) #endif @@ -198,6 +200,12 @@ test.list <- atime::atime_test_list( int NAMED(SEXP x) { return ((VECSEXP)x)->sxpinfo.named; } + int OBJECT(SEXP x) { + return ((VECSEXP)x)->sxpinfo.obj; + } + void SET_OBJECT(SEXP x, int o) { + ((VECSEXP)x)->sxpinfo.obj = o; + } SEXP ATTRIB(SEXP x) { return ((VECSEXP)x)->attrib; } From 09f077f2bf1346d9e50aa6daf9080989f9c2c4a3 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 29 Apr 2026 13:09:36 +0300 Subject: [PATCH 05/11] Backport findVar --- .ci/atime/tests.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index f10383a4e..3346c880e 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -148,6 +148,8 @@ test.list <- atime::atime_test_list( void SET_OBJECT(SEXP, int); #define isFrame(x) isDataFrame(x) #define GetOption(x, none) GetOption1(x) + #undef findVar // Rf_ mapping remains + #define findVar(sym, env) R_getVar(sym, env, FALSE) #endif ', "src/backports.c" = ' From 866e8215f7f8aec38e99f7deec31060888447691 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 29 Apr 2026 13:16:54 +0300 Subject: [PATCH 06/11] Backport {IS,SET,UNSET}_S4_OBJECT --- .ci/atime/tests.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 3346c880e..577b5a58b 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -150,6 +150,10 @@ test.list <- atime::atime_test_list( #define GetOption(x, none) GetOption1(x) #undef findVar // Rf_ mapping remains #define findVar(sym, env) R_getVar(sym, env, FALSE) + #define STRING_PTR(x) ((SEXP *)STRING_PTR_RO(x)) + int IS_S4_OBJECT(SEXP); + void SET_S4_OBJECT(SEXP); + void UNSET_S4_OBJECT(SEXP); #endif ', "src/backports.c" = ' @@ -214,6 +218,16 @@ test.list <- atime::atime_test_list( void SET_ATTRIB(SEXP x, SEXP att) { ((VECSEXP)x)->attrib = att; } + #define S4_OBJECT (1<<4) + int IS_S4_OBJECT(SEXP x) { + return ((VECSEXP)x)->sxpinfo.gp & S4_OBJECT; + } + void SET_S4_OBJECT(SEXP x) { + ((VECSEXP)x)->sxpinfo.gp |= S4_OBJECT; + } + void UNSET_S4_OBJECT(SEXP x) { + ((VECSEXP)x)->sxpinfo.gp &= ~S4_OBJECT; + } #endif ') for (n in names(backports)) { From 206640e83333277a6ff1efb31f53e1f95fd05439 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 29 Apr 2026 13:19:59 +0300 Subject: [PATCH 07/11] Backport SET_TYPEOF --- .ci/atime/tests.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 577b5a58b..b5e131e57 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -154,6 +154,7 @@ test.list <- atime::atime_test_list( int IS_S4_OBJECT(SEXP); void SET_S4_OBJECT(SEXP); void UNSET_S4_OBJECT(SEXP); + void SET_TYPEOF(SEXP, int); #endif ', "src/backports.c" = ' @@ -228,6 +229,9 @@ test.list <- atime::atime_test_list( void UNSET_S4_OBJECT(SEXP x) { ((VECSEXP)x)->sxpinfo.gp &= ~S4_OBJECT; } + void SET_TYPEOF(SEXP x, int type) { + ((VECSEXP)x)->sxpinfo.type = type; + } #endif ') for (n in names(backports)) { From 91a0e57632f95bc0fbc2f8a4be985a9ea494614b Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 29 Apr 2026 13:29:24 +0300 Subject: [PATCH 08/11] Replace VECTOR_ELT Older versions of data.table access elements in [LENGTH(x), max(LENGTH(x), TRUELENGTH(x))), which is not a read overflow but forbidden nowadays. Skip the length checks. --- .ci/atime/tests.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index b5e131e57..05214b56d 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -155,6 +155,8 @@ test.list <- atime::atime_test_list( void SET_S4_OBJECT(SEXP); void UNSET_S4_OBJECT(SEXP); void SET_TYPEOF(SEXP, int); + #define VECTOR_ELT(x, i) VECTOR_ELT_(x, i) + SEXP VECTOR_ELT_(SEXP, R_xlen_t); #endif ', "src/backports.c" = ' @@ -232,6 +234,9 @@ test.list <- atime::atime_test_list( void SET_TYPEOF(SEXP x, int type) { ((VECSEXP)x)->sxpinfo.type = type; } + SEXP VECTOR_ELT_(SEXP x, R_xlen_t i) { + return ALTREP(x) ? (VECTOR_ELT)(x, i) : ((SEXP*)DATAPTR_RO(x))[i]; + } #endif ') for (n in names(backports)) { From aa7785038f3904c54fe6bd3d73466f94302a9ce9 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 29 Apr 2026 13:36:14 +0300 Subject: [PATCH 09/11] Make sure to #include --- .ci/atime/tests.R | 1 + 1 file changed, 1 insertion(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index 05214b56d..d8ae7a1cb 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -134,6 +134,7 @@ test.list <- atime::atime_test_list( "@PKG_CFLAGS@", "@PKG_CFLAGS@ -DSTRING_PTR_RO=STRING_PTR_RO") backports = c( "src/data.table.h" = ' + #include #if R_VERSION >= R_Version(4, 6, 0) // backports.c void SETLENGTH(SEXP x, R_xlen_t n); From d647f4e4cccf98732adac6ce15421cf5b0a14780 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 29 Apr 2026 13:48:20 +0300 Subject: [PATCH 10/11] Backport VECTOR_PTR, DATAPTR --- .ci/atime/tests.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index d8ae7a1cb..a3ccddd16 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -158,6 +158,8 @@ test.list <- atime::atime_test_list( void SET_TYPEOF(SEXP, int); #define VECTOR_ELT(x, i) VECTOR_ELT_(x, i) SEXP VECTOR_ELT_(SEXP, R_xlen_t); + #define VECTOR_PTR(x) ((SEXP*)DATAPTR_RO(x)) + #define DATAPTR(x) ((void*)DATAPTR_RO(x)) #endif ', "src/backports.c" = ' From 6cde87ead73ed6e0e22f72094748612f5f2165e4 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Wed, 29 Apr 2026 13:48:37 +0300 Subject: [PATCH 11/11] Wrap REFCNT() to use the NAMED() backport Since we USE_RINTERNALS, we get old macros for MAYBE_SHARED(), NO_REFERENCES() from Rinternals.h that rely on REFCNT() existing. --- .ci/atime/tests.R | 1 + 1 file changed, 1 insertion(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index a3ccddd16..b564a919c 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -143,6 +143,7 @@ test.list <- atime::atime_test_list( void SET_GROWABLE_BIT(SEXP); int LEVELS(SEXP); int NAMED(SEXP); + #define REFCNT(x) NAMED(x) SEXP ATTRIB(SEXP); void SET_ATTRIB(SEXP, SEXP); int OBJECT(SEXP);