From c2f5e5c9c1e807ff8739e3bc8b5d1a3526865965 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 8 May 2026 14:27:03 +0300 Subject: [PATCH 01/16] CMakeLists.txt: add ENABLE_GCC_ANALYZER option Run GCC -fanalyzer on the project's internal sources, scoped via two new OBJECT libraries so external/ third-party code is not analyzed. Requires GCC >= 13 for both C and C++ to cover the C++ frontend; configuration fails fast otherwise. Enabling the option surfaces -fanalyzer diagnostics across the internal sources; builds are not warning-clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- CMakeLists.txt | 30 +++++++++++++++++++++++++++--- README.md | 15 +++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7fe352b..73485d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,19 @@ set(CMAKE_CXX_STANDARD 11) include(CheckIncludeFile) project(sortstring) + +option(ENABLE_GCC_ANALYZER "Enable GCC -fanalyzer static analysis on internal sources" OFF) +if(ENABLE_GCC_ANALYZER) + if(NOT CMAKE_C_COMPILER_ID STREQUAL "GNU" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + message(FATAL_ERROR "ENABLE_GCC_ANALYZER requires GCC for both C and C++ " + "(got C=${CMAKE_C_COMPILER_ID}, CXX=${CMAKE_CXX_COMPILER_ID}).") + endif() + if(CMAKE_C_COMPILER_VERSION VERSION_LESS 13 OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13) + message(FATAL_ERROR "ENABLE_GCC_ANALYZER requires GCC >= 13 (got " + "C=${CMAKE_C_COMPILER_VERSION}, CXX=${CMAKE_CXX_COMPILER_VERSION}).") + endif() +endif() + include_directories(src src/util) link_libraries(rt) @@ -58,10 +71,21 @@ endif() set_source_files_properties(external/adaptive.c PROPERTIES COMPILE_FLAGS -Wno-sign-compare) set_source_files_properties(external/quicksort.c PROPERTIES COMPILE_FLAGS -Wno-sign-compare) -add_executable(sortstring src/sortstring.c ${INTERNAL_SRCS} ${EXTERNAL_SRCS}) +add_library(sortstring_internal OBJECT ${INTERNAL_SRCS}) +add_library(sortstring_internal_unittest OBJECT ${INTERNAL_SRCS}) +target_compile_definitions(sortstring_internal_unittest PRIVATE UNIT_TEST) + +if(ENABLE_GCC_ANALYZER) + target_compile_options(sortstring_internal PRIVATE -fanalyzer) + target_compile_options(sortstring_internal_unittest PRIVATE -fanalyzer) +endif() + +add_executable(sortstring src/sortstring.c + $ ${EXTERNAL_SRCS}) -add_executable(unit-test unit-test/main.cpp ${INTERNAL_SRCS} ${EXTERNAL_SRCS}) -target_compile_definitions(unit-test PUBLIC UNIT_TEST) +add_executable(unit-test unit-test/main.cpp + $ ${EXTERNAL_SRCS}) +target_compile_definitions(unit-test PRIVATE UNIT_TEST) add_definitions(-Drestrict=__restrict__) set(CMAKE_CXX_FLAGS_RELEASE "-fopenmp -g -DNDEBUG -march=native ${CMAKE_CXX_FLAGS_RELEASE}") diff --git a/README.md b/README.md index f60cbc8..49351ef 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,21 @@ Use a separate debug build for easier debugging: $ cmake -DCMAKE_BUILD_TYPE=Debug ../string-sorting +GCC static analyzer +------------------- + +The build can be configured to run the GCC static analyzer (`-fanalyzer`) on +the project's own sources. Third-party code under `external/` is excluded. + + $ cmake -DENABLE_GCC_ANALYZER=ON ../string-sorting + $ make + +The option requires GCC >= 13 for both C and C++; configuration fails fast +with any other compiler or older version. Analyzer diagnostics are emitted as +build warnings; the build still succeeds. Expect significantly longer compile +times when the option is enabled. + + Huge pages ---------- From 09fb443aef2d58c40f8a7f457c7eaa7fd8e4ffc8 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 8 May 2026 14:26:48 +0300 Subject: [PATCH 02/16] CMakeLists.txt: add ENABLE_CLANG_ANALYZER option Run clang-tidy with the clang-analyzer-* check group on the project's internal sources via CMake's _CLANG_TIDY target property; scope matches ENABLE_GCC_ANALYZER (external/ third-party code excluded). The configured C/C++ compiler does not need to be Clang. Configuration fails fast if clang-tidy is not on PATH. Enabling the option surfaces clang-analyzer-* diagnostics across the internal sources; builds are not warning-clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- CMakeLists.txt | 18 ++++++++++++++++++ README.md | 17 +++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 73485d1..e83dbeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,18 @@ if(ENABLE_GCC_ANALYZER) endif() endif() +option(ENABLE_CLANG_ANALYZER "Enable clang-tidy clang-analyzer-* checks on internal sources" OFF) +if(ENABLE_CLANG_ANALYZER) + find_program(CLANG_TIDY_EXECUTABLE NAMES clang-tidy) + if(NOT CLANG_TIDY_EXECUTABLE) + message(FATAL_ERROR "ENABLE_CLANG_ANALYZER requires clang-tidy on PATH.") + endif() + set(CLANG_TIDY_COMMAND + ${CLANG_TIDY_EXECUTABLE} + -checks=-*,clang-analyzer-* + -header-filter=^${CMAKE_SOURCE_DIR}/src/.*) +endif() + include_directories(src src/util) link_libraries(rt) @@ -80,6 +92,12 @@ if(ENABLE_GCC_ANALYZER) target_compile_options(sortstring_internal_unittest PRIVATE -fanalyzer) endif() +if(ENABLE_CLANG_ANALYZER) + set_target_properties(sortstring_internal sortstring_internal_unittest PROPERTIES + C_CLANG_TIDY "${CLANG_TIDY_COMMAND}" + CXX_CLANG_TIDY "${CLANG_TIDY_COMMAND}") +endif() + add_executable(sortstring src/sortstring.c $ ${EXTERNAL_SRCS}) diff --git a/README.md b/README.md index 49351ef..ca7f689 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,23 @@ build warnings; the build still succeeds. Expect significantly longer compile times when the option is enabled. +Clang static analyzer +--------------------- + +The build can also be configured to run the Clang static analyzer via +`clang-tidy`, scoped to the `clang-analyzer-*` check group. Third-party code +under `external/` is excluded. + + $ cmake -DENABLE_CLANG_ANALYZER=ON ../string-sorting + $ make + +The option requires `clang-tidy` on `PATH` (Ubuntu: `apt install clang-tidy`); +configuration fails fast if it is not found. Diagnostics appear inline like +compiler warnings and the build still succeeds. The configured C/C++ compiler +does not need to be Clang. `ENABLE_GCC_ANALYZER` and `ENABLE_CLANG_ANALYZER` +can be combined when building with GCC >= 13. + + Huge pages ---------- From 10685ba23b2b359f159e632441ab9b6f24826321 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 8 May 2026 14:34:04 +0300 Subject: [PATCH 03/16] CMakeLists.txt: modernize and require cmake 3.16 Bump cmake_minimum_required to 3.16. Move directory-level globals (include_directories, link_libraries, add_definitions) to target-level properties on the OBJECT libraries, propagating to the executables via target_link_libraries. Replace hard-coded -fopenmp with find_package(OpenMP) and OpenMP::OpenMP_C/CXX. Use C_STANDARD/CXX_STANDARD per target instead of mutating CMAKE_*_FLAGS. Gate -march=native behind a new BUILD_NATIVE option (default ON). Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 1 + CMakeLists.txt | 61 +++++++++++++++++++++++++++++--------------------- README.md | 24 +++++++++----------- 3 files changed, 46 insertions(+), 40 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0ec9e7f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/build* diff --git a/CMakeLists.txt b/CMakeLists.txt index e83dbeb..95490b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,7 @@ -cmake_minimum_required(VERSION 3.1) -set(CMAKE_CXX_STANDARD 11) -include(CheckIncludeFile) +cmake_minimum_required(VERSION 3.16) +project(sortstring C CXX) -project(sortstring) +include(CheckIncludeFile) option(ENABLE_GCC_ANALYZER "Enable GCC -fanalyzer static analysis on internal sources" OFF) if(ENABLE_GCC_ANALYZER) @@ -28,9 +27,17 @@ if(ENABLE_CLANG_ANALYZER) -header-filter=^${CMAKE_SOURCE_DIR}/src/.*) endif() -include_directories(src src/util) +check_include_file(sys/sdt.h HAVE_SYS_SDT_H) + +find_package(OpenMP REQUIRED) + +add_compile_options(-Wall -Wextra -march=native) + +string(APPEND CMAKE_C_FLAGS_RELEASE " -g") +string(APPEND CMAKE_CXX_FLAGS_RELEASE " -g") -link_libraries(rt) +string(APPEND CMAKE_C_FLAGS_DEBUG " -O1 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2") +string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O1 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2") set(INTERNAL_SRCS src/funnelsort.cpp @@ -75,18 +82,27 @@ set(EXTERNAL_SRCS external/forward16.c external/parallel_string_radix_sort.cpp) -check_include_file(sys/sdt.h HAVE_SYS_SDT_H) -if(HAVE_SYS_SDT_H) - add_definitions(-DHAVE_SYS_SDT_H=1) -endif() - -set_source_files_properties(external/adaptive.c PROPERTIES COMPILE_FLAGS -Wno-sign-compare) +set_source_files_properties(external/adaptive.c PROPERTIES COMPILE_FLAGS -Wno-sign-compare) set_source_files_properties(external/quicksort.c PROPERTIES COMPILE_FLAGS -Wno-sign-compare) add_library(sortstring_internal OBJECT ${INTERNAL_SRCS}) add_library(sortstring_internal_unittest OBJECT ${INTERNAL_SRCS}) target_compile_definitions(sortstring_internal_unittest PRIVATE UNIT_TEST) +foreach(tgt sortstring_internal sortstring_internal_unittest) + target_include_directories(${tgt} PUBLIC + ${CMAKE_SOURCE_DIR}/src + ${CMAKE_SOURCE_DIR}/src/util) + target_compile_definitions(${tgt} PUBLIC restrict=__restrict__) + if(HAVE_SYS_SDT_H) + target_compile_definitions(${tgt} PUBLIC HAVE_SYS_SDT_H=1) + endif() + target_link_libraries(${tgt} PUBLIC OpenMP::OpenMP_C OpenMP::OpenMP_CXX) + set_target_properties(${tgt} PROPERTIES + C_STANDARD 99 C_STANDARD_REQUIRED YES + CXX_STANDARD 11 CXX_STANDARD_REQUIRED YES) +endforeach() + if(ENABLE_GCC_ANALYZER) target_compile_options(sortstring_internal PRIVATE -fanalyzer) target_compile_options(sortstring_internal_unittest PRIVATE -fanalyzer) @@ -98,20 +114,13 @@ if(ENABLE_CLANG_ANALYZER) CXX_CLANG_TIDY "${CLANG_TIDY_COMMAND}") endif() -add_executable(sortstring src/sortstring.c - $ ${EXTERNAL_SRCS}) +add_executable(sortstring src/sortstring.c ${EXTERNAL_SRCS}) +target_link_libraries(sortstring PRIVATE sortstring_internal rt) -add_executable(unit-test unit-test/main.cpp - $ ${EXTERNAL_SRCS}) +add_executable(unit-test unit-test/main.cpp ${EXTERNAL_SRCS}) +target_link_libraries(unit-test PRIVATE sortstring_internal_unittest rt) target_compile_definitions(unit-test PRIVATE UNIT_TEST) -add_definitions(-Drestrict=__restrict__) -set(CMAKE_CXX_FLAGS_RELEASE "-fopenmp -g -DNDEBUG -march=native ${CMAKE_CXX_FLAGS_RELEASE}") -set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-fopenmp -g -DNDEBUG -march=native ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") -set(CMAKE_C_FLAGS_RELEASE "-fopenmp -g -DNDEBUG -march=native ${CMAKE_C_FLAGS_RELEASE}") -set(CMAKE_C_FLAGS_RELWITHDEBINFO "-fopenmp -g -DNDEBUG -march=native ${CMAKE_C_FLAGS_RELWITHDEBINFO}") -set(CMAKE_CXX_FLAGS "-Wall -Wextra ${CMAKE_CXX_FLAGS}") -set(CMAKE_C_FLAGS "-Wall -Wextra -std=c99 ${CMAKE_C_FLAGS}") - -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O1 -g -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2") -set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O1 -g -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2") +set_target_properties(sortstring unit-test PROPERTIES + C_STANDARD 99 C_STANDARD_REQUIRED YES + CXX_STANDARD 11 CXX_STANDARD_REQUIRED YES) diff --git a/README.md b/README.md index ca7f689..25c82f5 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,9 @@ Requirements ------------ * C++11 - * CMake + * CMake >= 3.16 + * OpenMP + * Ninja (optional; the default Make generator also works) Compilation @@ -49,18 +51,14 @@ Compilation Default compilation with GCC: - $ git clone git://github.com/rantala/string-sorting.git - $ mkdir string-sorting-build - $ cd string-sorting-build - $ cmake -DCMAKE_BUILD_TYPE=Release ../string-sorting - $ make - $ ./sortstring + $ git clone https://github.com/rantala/string-sorting.git + $ cd string-sorting + $ cmake -B build -G Ninja && ninja -C build + $ ./build/sortstring Use a separate debug build for easier debugging: - $ mkdir debug-build - $ cd debug-build - $ cmake -DCMAKE_BUILD_TYPE=Debug ../string-sorting + $ cmake -B build-debug -G Ninja -DCMAKE_BUILD_TYPE=Debug && ninja -C build-debug GCC static analyzer @@ -69,8 +67,7 @@ GCC static analyzer The build can be configured to run the GCC static analyzer (`-fanalyzer`) on the project's own sources. Third-party code under `external/` is excluded. - $ cmake -DENABLE_GCC_ANALYZER=ON ../string-sorting - $ make + $ cmake -B build-gcc-analyzer -G Ninja -DENABLE_GCC_ANALYZER=ON && ninja -C build-gcc-analyzer The option requires GCC >= 13 for both C and C++; configuration fails fast with any other compiler or older version. Analyzer diagnostics are emitted as @@ -85,8 +82,7 @@ The build can also be configured to run the Clang static analyzer via `clang-tidy`, scoped to the `clang-analyzer-*` check group. Third-party code under `external/` is excluded. - $ cmake -DENABLE_CLANG_ANALYZER=ON ../string-sorting - $ make + $ cmake -B build-clang-analyzer -G Ninja -DENABLE_CLANG_ANALYZER=ON && ninja -C build-clang-analyzer The option requires `clang-tidy` on `PATH` (Ubuntu: `apt install clang-tidy`); configuration fails fast if it is not found. Diagnostics appear inline like From 5390c0387f59d0003fea2b983758cb309262ed32 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 8 May 2026 14:38:34 +0300 Subject: [PATCH 04/16] build.yml: refresh runners, dependencies, and actions Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/build.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 622c678..3d3b5c4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,11 +10,11 @@ jobs: fail-fast: false matrix: build_type: [Release, Debug] - os: [ubuntu-20.04, ubuntu-18.04] + os: [ubuntu-22.04, ubuntu-24.04] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Install dependencies - run: sudo apt-get install cmake + run: sudo apt-get install -y cmake systemtap-sdt-dev - name: cmake run: cmake -B builddir -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - name: make @@ -28,11 +28,11 @@ jobs: fail-fast: false matrix: build_type: [Release, Debug] - os: [ubuntu-20.04] + os: [ubuntu-22.04, ubuntu-24.04] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v6 - name: Install dependencies - run: sudo apt-get install cmake clang systemtap-sdt-dev + run: sudo apt-get install -y cmake clang libomp-dev systemtap-sdt-dev - name: cmake run: CC=clang CXX=clang++ cmake -B builddir -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - name: make From 07d75ee3855ecfdb2cf908390ecf369b8da53f19 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Sun, 10 May 2026 11:31:50 +0300 Subject: [PATCH 05/16] Explicit abort() on malloc failures Check each dynamic memory allocation and for simplicity abort() on failure. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/burstsort_mkq.cpp | 1 + src/funnelsort.cpp | 1 + src/losertree.h | 1 + src/mergesort.cpp | 6 ++++++ src/mergesort_lcp.cpp | 28 ++++++++++++++++++++++++++++ src/mergesort_losertree.cpp | 10 ++++++++++ src/mergesort_unstable.cpp | 3 +++ src/msd_a.cpp | 5 +++++ src/msd_a2.cpp | 4 ++++ src/msd_ce.cpp | 25 +++++++++++++++++++++++++ src/msd_ci.cpp | 3 +++ src/msd_dyn_vector.cpp | 1 + src/msd_lsd.cpp | 4 ++++ src/multikey_cache.cpp | 1 + src/multikey_multipivot.cpp | 2 ++ src/multikey_simd.cpp | 10 ++++++++++ src/vector_bagwell.h | 1 + src/vector_block.h | 1 + src/vector_brodnik.h | 1 + src/vector_malloc.h | 4 ++++ src/vector_realloc.h | 4 ++++ 21 files changed, 116 insertions(+) diff --git a/src/burstsort_mkq.cpp b/src/burstsort_mkq.cpp index 53d7be0..c141c44 100644 --- a/src/burstsort_mkq.cpp +++ b/src/burstsort_mkq.cpp @@ -254,6 +254,7 @@ burst_insert(TSTNode* root, unsigned char** strings, size_t N) } CharT* oracle = static_cast( malloc(buck->size()*sizeof(CharT))); + if (!oracle) abort(); for (unsigned j=0; j < buck->size(); ++j) { oracle[j] = get_char((*buck)[j], depth); } diff --git a/src/funnelsort.cpp b/src/funnelsort.cpp index 673b2e8..109018d 100644 --- a/src/funnelsort.cpp +++ b/src/funnelsort.cpp @@ -697,6 +697,7 @@ void funnelsort_Kway(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); funnelsort(strings, n, tmp); free(tmp); } diff --git a/src/losertree.h b/src/losertree.h index 315ad8f..cb16bb1 100644 --- a/src/losertree.h +++ b/src/losertree.h @@ -83,6 +83,7 @@ struct loser_tree assert(_nonempty_streams>1); void* raw = malloc(_stream_offset*sizeof(unsigned) + _stream_offset*sizeof(Stream)); + if (!raw) abort(); _nodes = static_cast(raw); _streams = reinterpret_cast( static_cast(raw) + diff --git a/src/mergesort.cpp b/src/mergesort.cpp index d38f1d6..854a5f6 100644 --- a/src/mergesort.cpp +++ b/src/mergesort.cpp @@ -115,6 +115,7 @@ void mergesort_2way(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_2way(strings, n, tmp); free(tmp); } @@ -144,6 +145,7 @@ void mergesort_2way_parallel(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_2way_parallel(strings, n, tmp); free(tmp); } @@ -317,6 +319,7 @@ void mergesort_3way(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_3way(strings, n, tmp); free(tmp); } @@ -350,6 +353,7 @@ void mergesort_3way_parallel(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_3way_parallel(strings, n, tmp); free(tmp); } @@ -480,6 +484,7 @@ void mergesort_4way(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_4way(strings, n, tmp); free(tmp); } @@ -518,6 +523,7 @@ void mergesort_4way_parallel(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_4way_parallel(strings, n, tmp); free(tmp); } diff --git a/src/mergesort_lcp.cpp b/src/mergesort_lcp.cpp index 352e86a..906aef4 100644 --- a/src/mergesort_lcp.cpp +++ b/src/mergesort_lcp.cpp @@ -264,9 +264,12 @@ void mergesort_lcp_2way(unsigned char** strings, size_t n) { lcp_t* lcp_input = static_cast(malloc(n*sizeof(lcp_t))); + if (!lcp_input) abort(); lcp_t* lcp_output = static_cast(malloc(n*sizeof(lcp_t))); + if (!lcp_output) abort(); unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); const MergeResult m = mergesort_lcp_2way(strings, tmp, lcp_input, lcp_output, n); if (m == SortedInTemp) { @@ -335,9 +338,12 @@ void mergesort_lcp_2way_parallel(unsigned char** strings, size_t n) { lcp_t* lcp_input = static_cast(malloc(n*sizeof(lcp_t))); + if (!lcp_input) abort(); lcp_t* lcp_output = static_cast(malloc(n*sizeof(lcp_t))); + if (!lcp_output) abort(); unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); #pragma omp parallel { #pragma omp single @@ -820,9 +826,12 @@ mergesort_lcp_3way(unsigned char** strings, size_t n) { debug() << __func__ << '\n'; lcp_t* lcp_input = (lcp_t*) malloc(n*sizeof(lcp_t)); + if (!lcp_input) abort(); lcp_t* lcp_tmp = (lcp_t*) malloc(n*sizeof(lcp_t)); + if (!lcp_tmp) abort(); unsigned char** input_tmp = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!input_tmp) abort(); const MergeResult m = mergesort_lcp_3way(strings, input_tmp, lcp_input, lcp_tmp, n); if (m == SortedInTemp) { @@ -942,9 +951,12 @@ mergesort_lcp_3way_parallel(unsigned char** strings, size_t n) { debug() << __func__ << '\n'; lcp_t* lcp_input = (lcp_t*) malloc(n*sizeof(lcp_t)); + if (!lcp_input) abort(); lcp_t* lcp_tmp = (lcp_t*) malloc(n*sizeof(lcp_t)); + if (!lcp_tmp) abort(); unsigned char** input_tmp = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!input_tmp) abort(); #pragma omp parallel { #pragma omp single @@ -1408,10 +1420,15 @@ static void mergesort_cache_lcp_2way(unsigned char** strings, size_t n) { lcp_t* lcp_input = (lcp_t*) malloc(n*sizeof(lcp_t)); + if (!lcp_input) abort(); lcp_t* lcp_tmp = (lcp_t*) malloc(n*sizeof(lcp_t)); + if (!lcp_tmp) abort(); unsigned char** input_tmp = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!input_tmp) abort(); CharT* cache = (CharT*) malloc(n*sizeof(CharT)); + if (!cache) abort(); CharT* cache_tmp = (CharT*) malloc(n*sizeof(CharT)); + if (!cache_tmp) abort(); MergeResult m = mergesort_cache_lcp_2way(strings, input_tmp, lcp_input, lcp_tmp, cache, cache_tmp, n); if (m == SortedInTemp) { @@ -1524,10 +1541,15 @@ static void mergesort_cache_lcp_2way_parallel(unsigned char** strings, size_t n) { lcp_t* lcp_input = (lcp_t*) malloc(n*sizeof(lcp_t)); + if (!lcp_input) abort(); lcp_t* lcp_tmp = (lcp_t*) malloc(n*sizeof(lcp_t)); + if (!lcp_tmp) abort(); unsigned char** input_tmp = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!input_tmp) abort(); CharT* cache = (CharT*) malloc(n*sizeof(CharT)); + if (!cache) abort(); CharT* cache_tmp = (CharT*) malloc(n*sizeof(CharT)); + if (!cache_tmp) abort(); MergeResult m = mergesort_cache_lcp_2way_parallel(strings, input_tmp, lcp_input, lcp_tmp, cache, cache_tmp, n); if (m == SortedInTemp) { @@ -1738,9 +1760,12 @@ void mergesort_lcp_2way_unstable(unsigned char** strings, size_t n) { lcp_t* lcp_input = static_cast(malloc(n*sizeof(lcp_t))); + if (!lcp_input) abort(); lcp_t* lcp_output = static_cast(malloc(n*sizeof(lcp_t))); + if (!lcp_output) abort(); unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); const MergeResult m = mergesort_lcp_2way_unstable(strings, tmp, lcp_input, lcp_output, n); if (m == SortedInTemp) { @@ -1811,9 +1836,12 @@ void mergesort_lcp_2way_unstable_parallel(unsigned char** strings, size_t n) { lcp_t* lcp_input = static_cast(malloc(n*sizeof(lcp_t))); + if (!lcp_input) abort(); lcp_t* lcp_output = static_cast(malloc(n*sizeof(lcp_t))); + if (!lcp_output) abort(); unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); #pragma omp parallel { #pragma omp single diff --git a/src/mergesort_losertree.cpp b/src/mergesort_losertree.cpp index 8a2a473..d226b5e 100644 --- a/src/mergesort_losertree.cpp +++ b/src/mergesort_losertree.cpp @@ -71,6 +71,7 @@ void mergesort_losertree_64way(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_losertree<64>(strings, n, tmp); free(tmp); } @@ -78,6 +79,7 @@ void mergesort_losertree_128way(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_losertree<128>(strings, n, tmp); free(tmp); } @@ -85,6 +87,7 @@ void mergesort_losertree_256way(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_losertree<256>(strings, n, tmp); free(tmp); } @@ -92,6 +95,7 @@ void mergesort_losertree_512way(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_losertree<512>(strings, n, tmp); free(tmp); } @@ -99,6 +103,7 @@ void mergesort_losertree_1024way(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_losertree<1024>(strings, n, tmp); free(tmp); } @@ -146,6 +151,7 @@ void mergesort_losertree_64way_parallel(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_losertree_parallel<64>(strings, n, tmp); free(tmp); } @@ -153,6 +159,7 @@ void mergesort_losertree_128way_parallel(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_losertree_parallel<128>(strings, n, tmp); free(tmp); } @@ -160,6 +167,7 @@ void mergesort_losertree_256way_parallel(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_losertree_parallel<256>(strings, n, tmp); free(tmp); } @@ -167,6 +175,7 @@ void mergesort_losertree_512way_parallel(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_losertree_parallel<512>(strings, n, tmp); free(tmp); } @@ -174,6 +183,7 @@ void mergesort_losertree_1024way_parallel(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_losertree_parallel<1024>(strings, n, tmp); free(tmp); } diff --git a/src/mergesort_unstable.cpp b/src/mergesort_unstable.cpp index e8bb30d..0a1115a 100644 --- a/src/mergesort_unstable.cpp +++ b/src/mergesort_unstable.cpp @@ -149,6 +149,7 @@ void mergesort_2way_unstable(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_2way_unstable(strings, n, tmp); free(tmp); } @@ -330,6 +331,7 @@ void mergesort_3way_unstable(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_3way_unstable(strings, n, tmp); free(tmp); } @@ -1873,6 +1875,7 @@ void mergesort_4way_unstable(unsigned char** strings, size_t n) { unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); + if (!tmp) abort(); mergesort_4way_unstable(strings, n, tmp); free(tmp); } diff --git a/src/msd_a.cpp b/src/msd_a.cpp index edadb87..cc0b51b 100644 --- a/src/msd_a.cpp +++ b/src/msd_a.cpp @@ -94,6 +94,7 @@ msd_A(cacheblock_t* cache, size_t N, size_t cache_depth, size_t true_depth) ++bucketsize[cache[i].bytes[cache_depth]]; cacheblock_t* sorted = (cacheblock_t*) malloc(N*sizeof(cacheblock_t)); + if (!sorted) abort(); static size_t bucketindex[256]; bucketindex[0] = 0; for (unsigned i=1; i < 256; ++i) @@ -127,6 +128,7 @@ msd_A_adaptive(cacheblock_t* cache, cache_depth = 0; } size_t* bucketsize = (size_t*) calloc(0x10000, sizeof(size_t)); + if (!bucketsize) abort(); for (size_t i=0; i < N; ++i) { uint16_t bucket = (cache[i].bytes[cache_depth] << 8) | @@ -135,6 +137,7 @@ msd_A_adaptive(cacheblock_t* cache, } cacheblock_t* sorted = (cacheblock_t*) malloc(N*sizeof(cacheblock_t)); + if (!sorted) abort(); static size_t bucketindex[0x10000]; bucketindex[0] = 0; for (unsigned i=1; i < 0x10000; ++i) @@ -161,6 +164,7 @@ void msd_A(unsigned char** strings, size_t N) { cacheblock_t* cache = (cacheblock_t*) malloc(N*sizeof(cacheblock_t)); + if (!cache) abort(); for (size_t i=0; i < N; ++i) cache[i].ptr = strings[i]; fill_cache(cache, N, 0); msd_A(cache, N, 0, 0); @@ -173,6 +177,7 @@ void msd_A_adaptive(unsigned char** strings, size_t N) { cacheblock_t* cache = (cacheblock_t*) malloc(N*sizeof(cacheblock_t)); + if (!cache) abort(); for (size_t i=0; i < N; ++i) cache[i].ptr = strings[i]; fill_cache(cache, N, 0); msd_A_adaptive(cache, N, 0, 0); diff --git a/src/msd_a2.cpp b/src/msd_a2.cpp index 5c80eac..70f3798 100644 --- a/src/msd_a2.cpp +++ b/src/msd_a2.cpp @@ -117,6 +117,7 @@ struct TempSpace allocated = static_cast( malloc((elems-elements_in_strings) * sizeof(cacheblock_t))); + if (!allocated) abort(); } } void deallocate() @@ -196,6 +197,7 @@ msd_A2_adaptive(cacheblock_t* cache, tmp.allocate(N); size_t* bucketsize = static_cast(calloc(0x10000, sizeof(size_t))); + if (!bucketsize) abort(); for (size_t i=0; i < N; ++i) { uint16_t bucket = (cache[i].bytes[cache_depth] << 8) | @@ -228,6 +230,7 @@ msd_A2(unsigned char** strings, size_t N) { cacheblock_t* cache = static_cast(malloc(N*sizeof(cacheblock_t))); + if (!cache) abort(); for (size_t i=0; i < N; ++i) cache[i].ptr = strings[i]; TempSpace tmp(strings, N); fill_cache(cache, N, 0); @@ -242,6 +245,7 @@ msd_A2_adaptive(unsigned char** strings, size_t N) { cacheblock_t* cache = static_cast(malloc(N*sizeof(cacheblock_t))); + if (!cache) abort(); for (size_t i=0; i < N; ++i) cache[i].ptr = strings[i]; TempSpace tmp(strings, N); fill_cache(cache, N, 0); diff --git a/src/msd_ce.cpp b/src/msd_ce.cpp index 594b5a4..60eba56 100644 --- a/src/msd_ce.cpp +++ b/src/msd_ce.cpp @@ -85,6 +85,7 @@ msd_CE0(unsigned char** strings, size_t n, size_t depth) ++bucketsize[strings[i][depth]]; unsigned char** sorted = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!sorted) abort(); static size_t bucketindex[256]; bucketindex[0] = 0; for (size_t i=1; i < 256; ++i) @@ -115,10 +116,12 @@ msd_CE1(unsigned char** strings, size_t n, size_t depth) size_t bucketsize[256] = {0}; unsigned char* restrict oracle = (unsigned char*) malloc(n); + if (!oracle) abort(); for (size_t i=0; i < n; ++i) ++bucketsize[oracle[i] = strings[i][depth]]; unsigned char** restrict sorted = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!sorted) abort(); size_t bucketindex[256]; bucketindex[0] = 0; for (size_t i=1; i < 256; ++i) @@ -150,12 +153,14 @@ msd_CE2(unsigned char** strings, size_t n, size_t depth) size_t bucketsize[256] = {0}; unsigned char* restrict oracle = (unsigned char*) malloc(n); + if (!oracle) abort(); for (size_t i=0; i < n; ++i) oracle[i] = strings[i][depth]; for (size_t i=0; i < n; ++i) ++bucketsize[oracle[i]]; unsigned char** restrict sorted = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!sorted) abort(); size_t bucketindex[256]; bucketindex[0] = 0; for (size_t i=1; i < 256; ++i) @@ -187,12 +192,14 @@ msd_CE2_16bit(unsigned char** strings, size_t n, size_t depth) uint16_t bucketsize[256] = {0}; unsigned char* restrict oracle = (unsigned char*) malloc(n); + if (!oracle) abort(); for (size_t i=0; i < n; ++i) oracle[i] = strings[i][depth]; for (size_t i=0; i < n; ++i) ++bucketsize[oracle[i]]; unsigned char** restrict sorted = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!sorted) abort(); uint16_t bucketindex[256]; bucketindex[0] = 0; for (size_t i=1; i < 256; ++i) @@ -219,14 +226,17 @@ msd_CE3(unsigned char** strings, size_t n, size_t depth) } uint16_t* restrict oracle = (uint16_t*) malloc(n*sizeof(uint16_t)); + if (!oracle) abort(); for (size_t i=0; i < n; ++i) oracle[i] = get_char(strings[i], depth); size_t* restrict bucketsize = (size_t*) calloc(0x10000, sizeof(size_t)); + if (!bucketsize) abort(); for (size_t i=0; i < n; ++i) ++bucketsize[oracle[i]]; unsigned char** sorted = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!sorted) abort(); static size_t bucketindex[0x10000]; bucketindex[0] = 0; for (size_t i=1; i < 0x10000; ++i) @@ -259,14 +269,17 @@ msd_CE4(unsigned char** strings, size_t n, size_t depth) } uint16_t* restrict oracle = (uint16_t*) malloc(n*sizeof(uint16_t)); + if (!oracle) abort(); for (size_t i=0; i < n; ++i) oracle[i] = get_char(strings[i], depth); size_t* restrict bucketsize = (size_t*) calloc(0x10000, sizeof(size_t)); + if (!bucketsize) abort(); for (size_t i=0; i < n; ++i) ++bucketsize[oracle[i]]; unsigned char** sorted = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!sorted) abort(); static size_t bucketindex[0x10000]; bucketindex[0] = 0; for (size_t i=1; i < 0x10000; ++i) @@ -332,6 +345,7 @@ msd_CE5(unsigned char** strings, size_t n, size_t depth, oracle[i] = get_char(strings[i], depth); size_t* restrict bucketsize = (size_t*) calloc(0x10000, sizeof(size_t)); + if (!bucketsize) abort(); for (size_t i=0; i < n; ++i) ++bucketsize[oracle[i]]; static size_t bucketindex[0x10000]; @@ -355,8 +369,10 @@ void msd_CE5(unsigned char** strings, size_t n) { uint16_t* restrict oracle = (uint16_t*) malloc(n*sizeof(uint16_t)); + if (!oracle) abort(); unsigned char** sorted = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!sorted) abort(); msd_CE5(strings, n, 0, oracle, sorted); free(oracle); free(sorted); @@ -387,6 +403,7 @@ msd_CE6(unsigned char** strings, size_t n, size_t depth, } size_t* restrict bucketsize = (size_t*) calloc(0x10000, sizeof(size_t)); + if (!bucketsize) abort(); for (size_t i=0; i < n; ++i) ++bucketsize[oracle[i]]; static size_t bucketindex[0x10000]; @@ -409,8 +426,10 @@ void msd_CE6(unsigned char** strings, size_t n) { uint16_t* restrict oracle = (uint16_t*) malloc(n*sizeof(uint16_t)); + if (!oracle) abort(); unsigned char** sorted = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!sorted) abort(); msd_CE6(strings, n, 0, oracle, sorted); free(oracle); free(sorted); @@ -441,6 +460,7 @@ msd_CE7_(unsigned char** strings, size_t n, size_t depth, } size_t* restrict bucketsize = (size_t*) calloc(0x10000, sizeof(size_t)); + if (!bucketsize) abort(); int is_sorted = 1; { size_t i; @@ -482,8 +502,10 @@ void msd_CE7(unsigned char** strings, size_t n) { uint16_t* restrict oracle = (uint16_t*) malloc(n*sizeof(uint16_t)); + if (!oracle) abort(); unsigned char** sorted = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!sorted) abort(); msd_CE7_(strings, n, 0, oracle, sorted); free(oracle); free(sorted); @@ -517,6 +539,7 @@ msd_CE8_(unsigned char** strings, size_t n, size_t depth, } size_t* restrict bucketsize = (size_t*) calloc(0x10000, sizeof(size_t)); + if (!bucketsize) abort(); int is_sorted = 1; { size_t i; @@ -566,8 +589,10 @@ void msd_CE8(unsigned char** strings, size_t n) { uint16_t* restrict oracle = (uint16_t*) malloc(n*sizeof(uint16_t)); + if (!oracle) abort(); unsigned char** sorted = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!sorted) abort(); msd_CE8_(strings, n, 0, oracle, sorted); free(oracle); free(sorted); diff --git a/src/msd_ci.cpp b/src/msd_ci.cpp index 69d1643..6bcb9fa 100644 --- a/src/msd_ci.cpp +++ b/src/msd_ci.cpp @@ -56,6 +56,7 @@ msd_ci(unsigned char** strings, size_t n, size_t depth) BucketsizeType bucketsize[256] = {0}; unsigned char* restrict oracle = (unsigned char*) malloc(n); + if (!oracle) abort(); for (size_t i=0; i < n; ++i) oracle[i] = strings[i][depth]; for (size_t i=0; i < n; ++i) @@ -107,10 +108,12 @@ msd_ci_adaptive(unsigned char** strings, size_t n, size_t depth) } uint16_t* restrict oracle = (uint16_t*) malloc(n*sizeof(uint16_t)); + if (!oracle) abort(); for (size_t i=0; i < n; ++i) oracle[i] = get_char(strings[i], depth); size_t* restrict bucketsize = (size_t*) calloc(0x10000, sizeof(size_t)); + if (!bucketsize) abort(); for (size_t i=0; i < n; ++i) ++bucketsize[oracle[i]]; static ssize_t bucketindex[0x10000]; diff --git a/src/msd_dyn_vector.cpp b/src/msd_dyn_vector.cpp index ce8b723..11a805e 100644 --- a/src/msd_dyn_vector.cpp +++ b/src/msd_dyn_vector.cpp @@ -137,6 +137,7 @@ msd_D_adaptive(unsigned char** strings, size_t n, size_t depth, Bucket* buckets) return; } size_t* bucketsize = (size_t*) malloc(0x10000 * sizeof(size_t)); + if (!bucketsize) abort(); size_t i=0; for (; i < n-n%16; i+=16) { uint16_t cache[16]; diff --git a/src/msd_lsd.cpp b/src/msd_lsd.cpp index 3bb68bc..5b387da 100644 --- a/src/msd_lsd.cpp +++ b/src/msd_lsd.cpp @@ -93,6 +93,7 @@ msd_lsd(Cacheblock* cache, size_t N, size_t depth) ++bucketsize[cache[i].chars[byte]]; Cacheblock* sorted = (Cacheblock*) malloc(N*sizeof(Cacheblock)); + if (!sorted) abort(); size_t bucketindex[256]; bucketindex[0] = 0; for (size_t i=1; i < 256; ++i) @@ -142,6 +143,7 @@ msd_lsd_adaptive(Cacheblock* cache, size_t N, size_t depth) } Cacheblock* sorted = (Cacheblock*) malloc(N*sizeof(Cacheblock)); + if (!sorted) abort(); static size_t bucketindex[0x10000]; bucketindex[0] = 0; for (size_t i=1; i < 0x10000; ++i) @@ -180,6 +182,7 @@ msd_A_lsd(unsigned char** strings, size_t N) { Cacheblock* cache = static_cast*>( malloc(N*sizeof(Cacheblock))); + if (!cache) abort(); for (size_t i=0; i < N; ++i) cache[i].ptr = strings[i]; fill_cache(cache, N, 0); msd_lsd(cache, N, 0); @@ -193,6 +196,7 @@ msd_A_lsd_adaptive(unsigned char** strings, size_t N) { Cacheblock* cache = static_cast*>( malloc(N*sizeof(Cacheblock))); + if (!cache) abort(); for (size_t i=0; i < N; ++i) cache[i].ptr = strings[i]; fill_cache(cache, N, 0); msd_lsd_adaptive(cache, N, 0); diff --git a/src/multikey_cache.cpp b/src/multikey_cache.cpp index a945a0c..281133c 100644 --- a/src/multikey_cache.cpp +++ b/src/multikey_cache.cpp @@ -218,6 +218,7 @@ multikey_cache(unsigned char** strings, size_t n, size_t depth) Cacheblock* cache = static_cast*>( malloc(n*sizeof(Cacheblock))); + if (!cache) abort(); for (size_t i=0; i < n; ++i) { cache[i].ptr = strings[i]; } diff --git a/src/multikey_multipivot.cpp b/src/multikey_multipivot.cpp index 11d7bcc..fe8df5d 100644 --- a/src/multikey_multipivot.cpp +++ b/src/multikey_multipivot.cpp @@ -403,6 +403,7 @@ multikey_multipivot(unsigned char** strings, size_t n, size_t depth) pivots[i] = sample_array[step*i]; } uint8_t* restrict oracle = static_cast(_mm_malloc(n, 16)); + if (!oracle) abort(); fill_oracle(strings, n, oracle, pivots, depth); std::array bucketsize; bucketsize.fill(0); @@ -423,6 +424,7 @@ multikey_multipivot(unsigned char** strings, size_t n, size_t depth) if (not sorted) { unsigned char** sorted = (unsigned char**) malloc(n*sizeof(unsigned char*)); + if (!sorted) abort(); static std::array bucketindex; bucketindex[0] = 0; for (unsigned i=1; i < total_buckets(Pivots); ++i) diff --git a/src/multikey_simd.cpp b/src/multikey_simd.cpp index 336c63f..d37de88 100644 --- a/src/multikey_simd.cpp +++ b/src/multikey_simd.cpp @@ -282,6 +282,7 @@ multikey_simd(unsigned char** strings, size_t N, size_t depth) CharT partval = pseudo_median(strings, N, depth); uint8_t* const restrict oracle = static_cast(_mm_malloc(N, 16)); + if (!oracle) abort(); std::array bucketsize; bucketsize.fill(0); size_t i=N-N%16; @@ -295,6 +296,7 @@ multikey_simd(unsigned char** strings, size_t N, size_t depth) assert(bucketsize[0] + bucketsize[1] + bucketsize[2] == N); unsigned char** sorted = static_cast(malloc(N*sizeof(unsigned char*))); + if (!sorted) abort(); size_t bucketindex[3]; bucketindex[0] = 0; bucketindex[1] = bucketsize[0]; @@ -379,8 +381,10 @@ void multikey_simd_b_1(unsigned char** strings, size_t n) { unsigned char** sorted = static_cast(malloc(n*sizeof(unsigned char*))); + if (!sorted) abort(); uint8_t* const restrict oracle = static_cast(_mm_malloc(n, 16)); + if (!oracle) abort(); multikey_simd_b(strings, n, 0, sorted, oracle); _mm_free(oracle); free(sorted); @@ -390,8 +394,10 @@ void multikey_simd_b_2(unsigned char** strings, size_t n) { unsigned char** sorted = static_cast(malloc(n*sizeof(unsigned char*))); + if (!sorted) abort(); uint8_t* const restrict oracle = static_cast(_mm_malloc(n, 16)); + if (!oracle) abort(); multikey_simd_b(strings, n, 0, sorted, oracle); _mm_free(oracle); free(sorted); @@ -401,8 +407,10 @@ void multikey_simd_b_4(unsigned char** strings, size_t n) { unsigned char** sorted = static_cast(malloc(n*sizeof(unsigned char*))); + if (!sorted) abort(); uint8_t* const restrict oracle = static_cast(_mm_malloc(n, 16)); + if (!oracle) abort(); multikey_simd_b(strings, n, 0, sorted, oracle); _mm_free(oracle); free(sorted); @@ -426,6 +434,7 @@ multikey_simd_parallel(unsigned char** strings, size_t N, size_t depth) CharT partval = pseudo_median(strings, N, depth); uint8_t* const restrict oracle = static_cast(_mm_malloc(N, 16)); + if (!oracle) abort(); std::array bucketsize; bucketsize.fill(0); size_t i=N-N%32; @@ -450,6 +459,7 @@ multikey_simd_parallel(unsigned char** strings, size_t N, size_t depth) assert(bucketsize[0] + bucketsize[1] + bucketsize[2] == N); unsigned char** sorted = static_cast(malloc(N*sizeof(unsigned char*))); + if (!sorted) abort(); size_t bucketindex[3]; bucketindex[0] = 0; bucketindex[1] = bucketsize[0]; diff --git a/src/vector_bagwell.h b/src/vector_bagwell.h index e109ae0..3c28a35 100644 --- a/src/vector_bagwell.h +++ b/src/vector_bagwell.h @@ -59,6 +59,7 @@ struct vector_bagwell _left_in_block = Initial << _index_block.size(); _insertpos = static_cast( malloc(_left_in_block*sizeof(T))); + if (!_insertpos) abort(); _index_block.push_back(_insertpos); } *_insertpos++ = t; diff --git a/src/vector_block.h b/src/vector_block.h index 10483df..09939b6 100644 --- a/src/vector_block.h +++ b/src/vector_block.h @@ -45,6 +45,7 @@ struct vector_block { if (__builtin_expect(is_full(), false)) { _insertpos = static_cast(malloc(B*sizeof(T))); + if (!_insertpos) abort(); _index_block.push_back(_insertpos); _left_in_block = B; } diff --git a/src/vector_brodnik.h b/src/vector_brodnik.h index 5f88258..077f18c 100644 --- a/src/vector_brodnik.h +++ b/src/vector_brodnik.h @@ -91,6 +91,7 @@ struct vector_brodnik _left_in_superblock = _superblock_size; } _insertpos = static_cast(malloc(_block_size*sizeof(T))); + if (!_insertpos) abort(); _index_block.push_back(_insertpos); _left_in_block = _block_size; --_left_in_superblock; diff --git a/src/vector_malloc.h b/src/vector_malloc.h index b2e76f9..4313f9c 100644 --- a/src/vector_malloc.h +++ b/src/vector_malloc.h @@ -62,9 +62,11 @@ class vector_malloc if (_capacity == 0) { _capacity = InitialSize; _data = static_cast(malloc(_capacity*sizeof(T))); + if (!_data) abort(); } else { _capacity <<= 1; T* t = static_cast(malloc(_capacity*sizeof(T))); + if (!t) abort(); (void) memcpy(t, _data, _size*sizeof(T)); free(_data); _data = t; @@ -103,9 +105,11 @@ class vector_malloc_counter_clear if (_capacity == 0) { _capacity = InitialSize; _data = static_cast(malloc(_capacity*sizeof(T))); + if (!_data) abort(); } else { _capacity <<= 1; T* t = static_cast(malloc(_capacity*sizeof(T))); + if (!t) abort(); (void) memcpy(t, _data, _size*sizeof(T)); free(_data); _data = t; diff --git a/src/vector_realloc.h b/src/vector_realloc.h index 6542b2f..5a7cf78 100644 --- a/src/vector_realloc.h +++ b/src/vector_realloc.h @@ -64,6 +64,7 @@ class vector_realloc _capacity = InitialSize; } _data = static_cast(realloc(_data, _capacity*sizeof(T))); + if (!_data) abort(); } T* _data; size_t _size; @@ -100,6 +101,7 @@ class vector_realloc_counter_clear _capacity = InitialSize; } _data = static_cast(realloc(_data, _capacity*sizeof(T))); + if (!_data) abort(); } T* _data; size_t _size; @@ -140,6 +142,7 @@ class vector_realloc_shrink_clear _capacity = InitialSize; } _data = static_cast(realloc(_data, _capacity*sizeof(T))); + if (!_data) abort(); } void shrink() { @@ -147,6 +150,7 @@ class vector_realloc_shrink_clear _capacity = _capacity / 2; _data = static_cast( realloc(_data, _capacity*sizeof(T))); + if (!_data) abort(); } } T* _data; From a372fa7b89621077bac6de07876a98ba4db2f939 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Sat, 16 May 2026 22:20:17 +0300 Subject: [PATCH 06/16] Handle empty input (n == 0) in every sort routine Commit 07d75ee added "if (!ptr) abort()" guards after every malloc() in the algorithm files, but several of the affected callsites do "malloc(n * sizeof(...))" where n is the input size received directly from main(). On a conforming allocator, "malloc(0)" may legally return NULL without indicating OOM, in which case the new guard fires and aborts on an empty input. The exposed sites are all public sort entry points -- the wrapper functions that take (unsigned char** strings, size_t n) and allocate a workspace immediately, before any "n < threshold" early-out can absorb the empty case. Internal recursive helpers are unaffected because they already start with an "if (n < 32) insertion_sort; return;" guard that handles n == 0. Add "if (n == 0) return;" at the very top of every affected entry: mergesort.cpp: 6 entries mergesort_lcp.cpp: 8 entries mergesort_losertree.cpp: 10 entries mergesort_unstable.cpp: 3 entries funnelsort_impl.h: 1 (funnelsort_Kway template) multikey_simd.cpp: 3 (multikey_simd_b_1/_2/_4) msd_a.cpp: 2 (msd_A, msd_A_adaptive) msd_a2.cpp: 2 (msd_A2, msd_A2_adaptive) msd_ce.cpp: 4 (msd_CE5/6/7/8) msd_lsd.cpp: 2 (msd_A_lsd, msd_A_lsd_adaptive templates) Sortstring's main() rejects empty input before reaching any routine, and the existing unit-test loops started at k=1, so this bug was not exercised in practice. Extend test_routines() to start each k-loop at 0, giving every registered routine n=0 coverage and locking in the contract that "sorting an empty array succeeds." Running the extended test exposed a related n == 0 bug in burstsort_mkq_simpleburst and burstsort_mkq_recursiveburst (the templates behind all six burstsort_mkq_* registrations): the first operation is "pseudo_median(strings, N, 0)", which dereferences strings[N/2] -- segfault when N == 0. Same fix: "if (N == 0) return;" at the top of both templates. Also fix a latent underflow in util/debug.h's check_result: the loop "for (size_t i=0; i < n-1; ++i)" wraps to SIZE_MAX iterations when n == 0. Guard with "if (n < 2) return 0;" at the top. With these fixes, ./build/unit-test passes end-to-end against the broadened k=0 coverage. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/burstsort_mkq.cpp | 2 ++ src/funnelsort.cpp | 1 + src/mergesort.cpp | 6 ++++++ src/mergesort_lcp.cpp | 8 ++++++++ src/mergesort_losertree.cpp | 10 ++++++++++ src/mergesort_unstable.cpp | 3 +++ src/msd_a.cpp | 2 ++ src/msd_a2.cpp | 2 ++ src/msd_ce.cpp | 4 ++++ src/msd_lsd.cpp | 2 ++ src/multikey_simd.cpp | 3 +++ src/util/debug.h | 1 + unit-test/main.cpp | 6 +++--- 13 files changed, 47 insertions(+), 3 deletions(-) diff --git a/src/burstsort_mkq.cpp b/src/burstsort_mkq.cpp index c141c44..c0cd44f 100644 --- a/src/burstsort_mkq.cpp +++ b/src/burstsort_mkq.cpp @@ -328,6 +328,7 @@ template static inline void burstsort_mkq_simpleburst(unsigned char** strings, size_t N) { + if (N == 0) return; typedef std::vector BucketT; typedef BurstSimple BurstImpl; TSTNode root; @@ -356,6 +357,7 @@ template static inline void burstsort_mkq_recursiveburst(unsigned char** strings, size_t N) { + if (N == 0) return; typedef std::vector BucketT; typedef BurstRecursive BurstImpl; TSTNode root; diff --git a/src/funnelsort.cpp b/src/funnelsort.cpp index 109018d..fdbfc51 100644 --- a/src/funnelsort.cpp +++ b/src/funnelsort.cpp @@ -695,6 +695,7 @@ template <> void funnelsort<4,buffer_layout_dfs>(unsigned char** strings, template class BufferLayout> void funnelsort_Kway(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); diff --git a/src/mergesort.cpp b/src/mergesort.cpp index 854a5f6..ab12df0 100644 --- a/src/mergesort.cpp +++ b/src/mergesort.cpp @@ -113,6 +113,7 @@ mergesort_2way(unsigned char** strings, size_t n, unsigned char** tmp) } void mergesort_2way(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -143,6 +144,7 @@ mergesort_2way_parallel(unsigned char** strings, size_t n, unsigned char** tmp) } void mergesort_2way_parallel(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -317,6 +319,7 @@ mergesort_3way(unsigned char** strings, size_t n, unsigned char** tmp) } void mergesort_3way(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -351,6 +354,7 @@ mergesort_3way_parallel(unsigned char** strings, size_t n, unsigned char** tmp) } void mergesort_3way_parallel(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -482,6 +486,7 @@ mergesort_4way(unsigned char** strings, size_t n, unsigned char** tmp) } void mergesort_4way(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -521,6 +526,7 @@ mergesort_4way_parallel(unsigned char** strings, size_t n, unsigned char** tmp) } void mergesort_4way_parallel(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); diff --git a/src/mergesort_lcp.cpp b/src/mergesort_lcp.cpp index 906aef4..dd06ff3 100644 --- a/src/mergesort_lcp.cpp +++ b/src/mergesort_lcp.cpp @@ -263,6 +263,7 @@ mergesort_lcp_2way(unsigned char** restrict strings_input, void mergesort_lcp_2way(unsigned char** strings, size_t n) { + if (n == 0) return; lcp_t* lcp_input = static_cast(malloc(n*sizeof(lcp_t))); if (!lcp_input) abort(); lcp_t* lcp_output = static_cast(malloc(n*sizeof(lcp_t))); @@ -337,6 +338,7 @@ mergesort_lcp_2way_parallel( void mergesort_lcp_2way_parallel(unsigned char** strings, size_t n) { + if (n == 0) return; lcp_t* lcp_input = static_cast(malloc(n*sizeof(lcp_t))); if (!lcp_input) abort(); lcp_t* lcp_output = static_cast(malloc(n*sizeof(lcp_t))); @@ -824,6 +826,7 @@ mergesort_lcp_3way(unsigned char** restrict strings_input, void mergesort_lcp_3way(unsigned char** strings, size_t n) { + if (n == 0) return; debug() << __func__ << '\n'; lcp_t* lcp_input = (lcp_t*) malloc(n*sizeof(lcp_t)); if (!lcp_input) abort(); @@ -949,6 +952,7 @@ mergesort_lcp_3way_parallel(unsigned char** restrict strings_input, void mergesort_lcp_3way_parallel(unsigned char** strings, size_t n) { + if (n == 0) return; debug() << __func__ << '\n'; lcp_t* lcp_input = (lcp_t*) malloc(n*sizeof(lcp_t)); if (!lcp_input) abort(); @@ -1419,6 +1423,7 @@ template static void mergesort_cache_lcp_2way(unsigned char** strings, size_t n) { + if (n == 0) return; lcp_t* lcp_input = (lcp_t*) malloc(n*sizeof(lcp_t)); if (!lcp_input) abort(); lcp_t* lcp_tmp = (lcp_t*) malloc(n*sizeof(lcp_t)); @@ -1540,6 +1545,7 @@ template static void mergesort_cache_lcp_2way_parallel(unsigned char** strings, size_t n) { + if (n == 0) return; lcp_t* lcp_input = (lcp_t*) malloc(n*sizeof(lcp_t)); if (!lcp_input) abort(); lcp_t* lcp_tmp = (lcp_t*) malloc(n*sizeof(lcp_t)); @@ -1759,6 +1765,7 @@ mergesort_lcp_2way_unstable(unsigned char** restrict strings_input, void mergesort_lcp_2way_unstable(unsigned char** strings, size_t n) { + if (n == 0) return; lcp_t* lcp_input = static_cast(malloc(n*sizeof(lcp_t))); if (!lcp_input) abort(); lcp_t* lcp_output = static_cast(malloc(n*sizeof(lcp_t))); @@ -1835,6 +1842,7 @@ mergesort_lcp_2way_unstable_parallel(unsigned char** restrict strings_input, void mergesort_lcp_2way_unstable_parallel(unsigned char** strings, size_t n) { + if (n == 0) return; lcp_t* lcp_input = static_cast(malloc(n*sizeof(lcp_t))); if (!lcp_input) abort(); lcp_t* lcp_output = static_cast(malloc(n*sizeof(lcp_t))); diff --git a/src/mergesort_losertree.cpp b/src/mergesort_losertree.cpp index d226b5e..b20db3c 100644 --- a/src/mergesort_losertree.cpp +++ b/src/mergesort_losertree.cpp @@ -69,6 +69,7 @@ mergesort_losertree(unsigned char** strings, size_t n, unsigned char** tmp) void mergesort_losertree_64way(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -77,6 +78,7 @@ void mergesort_losertree_64way(unsigned char** strings, size_t n) } void mergesort_losertree_128way(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -85,6 +87,7 @@ void mergesort_losertree_128way(unsigned char** strings, size_t n) } void mergesort_losertree_256way(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -93,6 +96,7 @@ void mergesort_losertree_256way(unsigned char** strings, size_t n) } void mergesort_losertree_512way(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -101,6 +105,7 @@ void mergesort_losertree_512way(unsigned char** strings, size_t n) } void mergesort_losertree_1024way(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -149,6 +154,7 @@ mergesort_losertree_parallel(unsigned char** strings, size_t n, unsigned char** void mergesort_losertree_64way_parallel(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -157,6 +163,7 @@ void mergesort_losertree_64way_parallel(unsigned char** strings, size_t n) } void mergesort_losertree_128way_parallel(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -165,6 +172,7 @@ void mergesort_losertree_128way_parallel(unsigned char** strings, size_t n) } void mergesort_losertree_256way_parallel(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -173,6 +181,7 @@ void mergesort_losertree_256way_parallel(unsigned char** strings, size_t n) } void mergesort_losertree_512way_parallel(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -181,6 +190,7 @@ void mergesort_losertree_512way_parallel(unsigned char** strings, size_t n) } void mergesort_losertree_1024way_parallel(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); diff --git a/src/mergesort_unstable.cpp b/src/mergesort_unstable.cpp index 0a1115a..afa1560 100644 --- a/src/mergesort_unstable.cpp +++ b/src/mergesort_unstable.cpp @@ -147,6 +147,7 @@ mergesort_2way_unstable(unsigned char** strings, size_t n, unsigned char** tmp) } void mergesort_2way_unstable(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -329,6 +330,7 @@ mergesort_3way_unstable(unsigned char** strings, size_t n, unsigned char** tmp) } void mergesort_3way_unstable(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); @@ -1873,6 +1875,7 @@ mergesort_4way_unstable(unsigned char** strings, size_t n, unsigned char** tmp) } void mergesort_4way_unstable(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** tmp = static_cast( malloc(n*sizeof(unsigned char*))); if (!tmp) abort(); diff --git a/src/msd_a.cpp b/src/msd_a.cpp index cc0b51b..366197f 100644 --- a/src/msd_a.cpp +++ b/src/msd_a.cpp @@ -163,6 +163,7 @@ msd_A_adaptive(cacheblock_t* cache, void msd_A(unsigned char** strings, size_t N) { + if (N == 0) return; cacheblock_t* cache = (cacheblock_t*) malloc(N*sizeof(cacheblock_t)); if (!cache) abort(); for (size_t i=0; i < N; ++i) cache[i].ptr = strings[i]; @@ -176,6 +177,7 @@ ROUTINE_REGISTER_SINGLECORE(msd_A, "msd_A") void msd_A_adaptive(unsigned char** strings, size_t N) { + if (N == 0) return; cacheblock_t* cache = (cacheblock_t*) malloc(N*sizeof(cacheblock_t)); if (!cache) abort(); for (size_t i=0; i < N; ++i) cache[i].ptr = strings[i]; diff --git a/src/msd_a2.cpp b/src/msd_a2.cpp index 70f3798..3e07c41 100644 --- a/src/msd_a2.cpp +++ b/src/msd_a2.cpp @@ -228,6 +228,7 @@ msd_A2_adaptive(cacheblock_t* cache, void msd_A2(unsigned char** strings, size_t N) { + if (N == 0) return; cacheblock_t* cache = static_cast(malloc(N*sizeof(cacheblock_t))); if (!cache) abort(); @@ -243,6 +244,7 @@ ROUTINE_REGISTER_SINGLECORE(msd_A2, "msd_A2") void msd_A2_adaptive(unsigned char** strings, size_t N) { + if (N == 0) return; cacheblock_t* cache = static_cast(malloc(N*sizeof(cacheblock_t))); if (!cache) abort(); diff --git a/src/msd_ce.cpp b/src/msd_ce.cpp index 60eba56..0fb623b 100644 --- a/src/msd_ce.cpp +++ b/src/msd_ce.cpp @@ -367,6 +367,7 @@ msd_CE5(unsigned char** strings, size_t n, size_t depth, void msd_CE5(unsigned char** strings, size_t n) { + if (n == 0) return; uint16_t* restrict oracle = (uint16_t*) malloc(n*sizeof(uint16_t)); if (!oracle) abort(); @@ -424,6 +425,7 @@ msd_CE6(unsigned char** strings, size_t n, size_t depth, } void msd_CE6(unsigned char** strings, size_t n) { + if (n == 0) return; uint16_t* restrict oracle = (uint16_t*) malloc(n*sizeof(uint16_t)); if (!oracle) abort(); @@ -500,6 +502,7 @@ msd_CE7_(unsigned char** strings, size_t n, size_t depth, } void msd_CE7(unsigned char** strings, size_t n) { + if (n == 0) return; uint16_t* restrict oracle = (uint16_t*) malloc(n*sizeof(uint16_t)); if (!oracle) abort(); @@ -587,6 +590,7 @@ msd_CE8_(unsigned char** strings, size_t n, size_t depth, } void msd_CE8(unsigned char** strings, size_t n) { + if (n == 0) return; uint16_t* restrict oracle = (uint16_t*) malloc(n*sizeof(uint16_t)); if (!oracle) abort(); diff --git a/src/msd_lsd.cpp b/src/msd_lsd.cpp index 5b387da..fb3a402 100644 --- a/src/msd_lsd.cpp +++ b/src/msd_lsd.cpp @@ -180,6 +180,7 @@ template static void msd_A_lsd(unsigned char** strings, size_t N) { + if (N == 0) return; Cacheblock* cache = static_cast*>( malloc(N*sizeof(Cacheblock))); if (!cache) abort(); @@ -194,6 +195,7 @@ template static void msd_A_lsd_adaptive(unsigned char** strings, size_t N) { + if (N == 0) return; Cacheblock* cache = static_cast*>( malloc(N*sizeof(Cacheblock))); if (!cache) abort(); diff --git a/src/multikey_simd.cpp b/src/multikey_simd.cpp index d37de88..a77dec3 100644 --- a/src/multikey_simd.cpp +++ b/src/multikey_simd.cpp @@ -379,6 +379,7 @@ multikey_simd_b(unsigned char** strings, size_t N, size_t depth, void multikey_simd_b_1(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** sorted = static_cast(malloc(n*sizeof(unsigned char*))); if (!sorted) abort(); @@ -392,6 +393,7 @@ void multikey_simd_b_1(unsigned char** strings, size_t n) void multikey_simd_b_2(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** sorted = static_cast(malloc(n*sizeof(unsigned char*))); if (!sorted) abort(); @@ -405,6 +407,7 @@ void multikey_simd_b_2(unsigned char** strings, size_t n) void multikey_simd_b_4(unsigned char** strings, size_t n) { + if (n == 0) return; unsigned char** sorted = static_cast(malloc(n*sizeof(unsigned char*))); if (!sorted) abort(); diff --git a/src/util/debug.h b/src/util/debug.h index 6b3b1eb..6eed602 100644 --- a/src/util/debug.h +++ b/src/util/debug.h @@ -41,6 +41,7 @@ static std::string __debug_indent_str; static inline int check_result(unsigned char **strings, size_t n) { + if (n < 2) return 0; size_t wrong = 0; size_t identical = 0; size_t invalid = 0; diff --git a/unit-test/main.cpp b/unit-test/main.cpp index 4fa731d..b1bea77 100644 --- a/unit-test/main.cpp +++ b/unit-test/main.cpp @@ -217,7 +217,7 @@ test_routines() std::cerr << __PRETTY_FUNCTION__ << " [" << routines[i]->name << ']' << std::endl; - for (size_t k=1; k < 2000; k += 200) { + for (size_t k=0; k < 2000; k += 200) { std::vector input; for (size_t i=0; i < k; ++i) input.push_back(strdup("aaa")); @@ -231,7 +231,7 @@ test_routines() for (size_t i=0; i < n; ++i) free(input[i]); } - for (size_t k=1; k < 1000; k += 200) { + for (size_t k=0; k < 1000; k += 200) { std::vector input; for (size_t i=0; i < k; ++i) { input.push_back(strdup("bb")); @@ -252,7 +252,7 @@ test_routines() for (size_t i=0; i < n; ++i) free(input[i]); } - for (size_t k=1; k < 10000; k += 2000) { + for (size_t k=0; k < 10000; k += 2000) { std::vector input; for (size_t i=0; i < k; ++i) { char buf[10]; From a585ec7a6fcd1750bfd19b5887ed89c66db866d8 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Mon, 11 May 2026 09:17:54 +0300 Subject: [PATCH 07/16] funnelsort: split by buffer layout for parallel compilation Idea is to speed up parallel builds, previously funnelsort.cpp was one of slowing files to compile. Move template machinery to a new header src/funnelsort_impl.h, and split the per-K instantiations and routine registrations into one TU per BufferLayout: src/funnelsort_bfs.cpp and src/funnelsort_dfs.cpp. Each TU owns its layout's K=4 specialization too. Co-Authored-By: Claude Opus 4.7 (1M context) --- CMakeLists.txt | 11 +++- src/funnelsort_bfs_128way.cpp | 32 ++++++++++ src/funnelsort_bfs_16way.cpp | 32 ++++++++++ src/funnelsort_bfs_32way.cpp | 32 ++++++++++ src/funnelsort_bfs_64way.cpp | 32 ++++++++++ src/funnelsort_bfs_8way.cpp | 35 +++++++++++ src/funnelsort_dfs_128way.cpp | 32 ++++++++++ src/funnelsort_dfs_16way.cpp | 32 ++++++++++ src/funnelsort_dfs_32way.cpp | 32 ++++++++++ src/funnelsort_dfs_64way.cpp | 32 ++++++++++ src/funnelsort_dfs_8way.cpp | 35 +++++++++++ src/{funnelsort.cpp => funnelsort_impl.h} | 77 ++++++++--------------- 12 files changed, 361 insertions(+), 53 deletions(-) create mode 100644 src/funnelsort_bfs_128way.cpp create mode 100644 src/funnelsort_bfs_16way.cpp create mode 100644 src/funnelsort_bfs_32way.cpp create mode 100644 src/funnelsort_bfs_64way.cpp create mode 100644 src/funnelsort_bfs_8way.cpp create mode 100644 src/funnelsort_dfs_128way.cpp create mode 100644 src/funnelsort_dfs_16way.cpp create mode 100644 src/funnelsort_dfs_32way.cpp create mode 100644 src/funnelsort_dfs_64way.cpp create mode 100644 src/funnelsort_dfs_8way.cpp rename src/{funnelsort.cpp => funnelsort_impl.h} (90%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 95490b4..cd63223 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,16 @@ string(APPEND CMAKE_C_FLAGS_DEBUG " -O1 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2" string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O1 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2") set(INTERNAL_SRCS - src/funnelsort.cpp + src/funnelsort_bfs_128way.cpp + src/funnelsort_bfs_16way.cpp + src/funnelsort_bfs_32way.cpp + src/funnelsort_bfs_64way.cpp + src/funnelsort_bfs_8way.cpp + src/funnelsort_dfs_128way.cpp + src/funnelsort_dfs_16way.cpp + src/funnelsort_dfs_32way.cpp + src/funnelsort_dfs_64way.cpp + src/funnelsort_dfs_8way.cpp src/msd_a.cpp src/msd_a2.cpp src/msd_lsd.cpp diff --git a/src/funnelsort_bfs_128way.cpp b/src/funnelsort_bfs_128way.cpp new file mode 100644 index 0000000..3226595 --- /dev/null +++ b/src/funnelsort_bfs_128way.cpp @@ -0,0 +1,32 @@ +/* + * Copyright 2008 by Tommi Rantala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "funnelsort_impl.h" + +template void funnelsort<128, buffer_layout_bfs>( + unsigned char**, size_t, unsigned char**); + +void funnelsort_128way_bfs(unsigned char** strings, size_t n) +{ funnelsort_Kway<128, buffer_layout_bfs>(strings, n); } + +ROUTINE_REGISTER_SINGLECORE(funnelsort_128way_bfs, + "funnelsort_128way_bfs") diff --git a/src/funnelsort_bfs_16way.cpp b/src/funnelsort_bfs_16way.cpp new file mode 100644 index 0000000..d1d41d6 --- /dev/null +++ b/src/funnelsort_bfs_16way.cpp @@ -0,0 +1,32 @@ +/* + * Copyright 2008 by Tommi Rantala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "funnelsort_impl.h" + +template void funnelsort<16, buffer_layout_bfs>( + unsigned char**, size_t, unsigned char**); + +void funnelsort_16way_bfs(unsigned char** strings, size_t n) +{ funnelsort_Kway<16, buffer_layout_bfs>(strings, n); } + +ROUTINE_REGISTER_SINGLECORE(funnelsort_16way_bfs, + "funnelsort_16way_bfs") diff --git a/src/funnelsort_bfs_32way.cpp b/src/funnelsort_bfs_32way.cpp new file mode 100644 index 0000000..01ae016 --- /dev/null +++ b/src/funnelsort_bfs_32way.cpp @@ -0,0 +1,32 @@ +/* + * Copyright 2008 by Tommi Rantala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "funnelsort_impl.h" + +template void funnelsort<32, buffer_layout_bfs>( + unsigned char**, size_t, unsigned char**); + +void funnelsort_32way_bfs(unsigned char** strings, size_t n) +{ funnelsort_Kway<32, buffer_layout_bfs>(strings, n); } + +ROUTINE_REGISTER_SINGLECORE(funnelsort_32way_bfs, + "funnelsort_32way_bfs") diff --git a/src/funnelsort_bfs_64way.cpp b/src/funnelsort_bfs_64way.cpp new file mode 100644 index 0000000..fd9bd3f --- /dev/null +++ b/src/funnelsort_bfs_64way.cpp @@ -0,0 +1,32 @@ +/* + * Copyright 2008 by Tommi Rantala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "funnelsort_impl.h" + +template void funnelsort<64, buffer_layout_bfs>( + unsigned char**, size_t, unsigned char**); + +void funnelsort_64way_bfs(unsigned char** strings, size_t n) +{ funnelsort_Kway<64, buffer_layout_bfs>(strings, n); } + +ROUTINE_REGISTER_SINGLECORE(funnelsort_64way_bfs, + "funnelsort_64way_bfs") diff --git a/src/funnelsort_bfs_8way.cpp b/src/funnelsort_bfs_8way.cpp new file mode 100644 index 0000000..725f9d3 --- /dev/null +++ b/src/funnelsort_bfs_8way.cpp @@ -0,0 +1,35 @@ +/* + * Copyright 2008 by Tommi Rantala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "funnelsort_impl.h" + +template <> void funnelsort<4, buffer_layout_bfs>(unsigned char** strings, + size_t n, unsigned char** tmp) { mergesort_4way(strings, n, tmp); } + +template void funnelsort<8, buffer_layout_bfs>( + unsigned char**, size_t, unsigned char**); + +void funnelsort_8way_bfs(unsigned char** strings, size_t n) +{ funnelsort_Kway<8, buffer_layout_bfs>(strings, n); } + +ROUTINE_REGISTER_SINGLECORE(funnelsort_8way_bfs, + "funnelsort_8way_bfs") diff --git a/src/funnelsort_dfs_128way.cpp b/src/funnelsort_dfs_128way.cpp new file mode 100644 index 0000000..88a3e4a --- /dev/null +++ b/src/funnelsort_dfs_128way.cpp @@ -0,0 +1,32 @@ +/* + * Copyright 2008 by Tommi Rantala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "funnelsort_impl.h" + +template void funnelsort<128, buffer_layout_dfs>( + unsigned char**, size_t, unsigned char**); + +void funnelsort_128way_dfs(unsigned char** strings, size_t n) +{ funnelsort_Kway<128, buffer_layout_dfs>(strings, n); } + +ROUTINE_REGISTER_SINGLECORE(funnelsort_128way_dfs, + "funnelsort_128way_dfs") diff --git a/src/funnelsort_dfs_16way.cpp b/src/funnelsort_dfs_16way.cpp new file mode 100644 index 0000000..7517b50 --- /dev/null +++ b/src/funnelsort_dfs_16way.cpp @@ -0,0 +1,32 @@ +/* + * Copyright 2008 by Tommi Rantala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "funnelsort_impl.h" + +template void funnelsort<16, buffer_layout_dfs>( + unsigned char**, size_t, unsigned char**); + +void funnelsort_16way_dfs(unsigned char** strings, size_t n) +{ funnelsort_Kway<16, buffer_layout_dfs>(strings, n); } + +ROUTINE_REGISTER_SINGLECORE(funnelsort_16way_dfs, + "funnelsort_16way_dfs") diff --git a/src/funnelsort_dfs_32way.cpp b/src/funnelsort_dfs_32way.cpp new file mode 100644 index 0000000..575b023 --- /dev/null +++ b/src/funnelsort_dfs_32way.cpp @@ -0,0 +1,32 @@ +/* + * Copyright 2008 by Tommi Rantala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "funnelsort_impl.h" + +template void funnelsort<32, buffer_layout_dfs>( + unsigned char**, size_t, unsigned char**); + +void funnelsort_32way_dfs(unsigned char** strings, size_t n) +{ funnelsort_Kway<32, buffer_layout_dfs>(strings, n); } + +ROUTINE_REGISTER_SINGLECORE(funnelsort_32way_dfs, + "funnelsort_32way_dfs") diff --git a/src/funnelsort_dfs_64way.cpp b/src/funnelsort_dfs_64way.cpp new file mode 100644 index 0000000..fd8d732 --- /dev/null +++ b/src/funnelsort_dfs_64way.cpp @@ -0,0 +1,32 @@ +/* + * Copyright 2008 by Tommi Rantala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "funnelsort_impl.h" + +template void funnelsort<64, buffer_layout_dfs>( + unsigned char**, size_t, unsigned char**); + +void funnelsort_64way_dfs(unsigned char** strings, size_t n) +{ funnelsort_Kway<64, buffer_layout_dfs>(strings, n); } + +ROUTINE_REGISTER_SINGLECORE(funnelsort_64way_dfs, + "funnelsort_64way_dfs") diff --git a/src/funnelsort_dfs_8way.cpp b/src/funnelsort_dfs_8way.cpp new file mode 100644 index 0000000..ff804ee --- /dev/null +++ b/src/funnelsort_dfs_8way.cpp @@ -0,0 +1,35 @@ +/* + * Copyright 2008 by Tommi Rantala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "funnelsort_impl.h" + +template <> void funnelsort<4, buffer_layout_dfs>(unsigned char** strings, + size_t n, unsigned char** tmp) { mergesort_4way(strings, n, tmp); } + +template void funnelsort<8, buffer_layout_dfs>( + unsigned char**, size_t, unsigned char**); + +void funnelsort_8way_dfs(unsigned char** strings, size_t n) +{ funnelsort_Kway<8, buffer_layout_dfs>(strings, n); } + +ROUTINE_REGISTER_SINGLECORE(funnelsort_8way_dfs, + "funnelsort_8way_dfs") diff --git a/src/funnelsort.cpp b/src/funnelsort_impl.h similarity index 90% rename from src/funnelsort.cpp rename to src/funnelsort_impl.h index fdbfc51..6f760ea 100644 --- a/src/funnelsort.cpp +++ b/src/funnelsort_impl.h @@ -77,6 +77,9 @@ * } */ +#ifndef FUNNELSORT_IMPL_H +#define FUNNELSORT_IMPL_H + #include "routine.h" #include "util/debug.h" #include "util/insertion_sort.h" @@ -650,7 +653,7 @@ struct fill<16,I,BufferLayout> // splitting the input into K streams, and using a fixed size K-merger. // Then use K/4 or K/2 on the next level of recursion. template class BufferLayout> -static void +void funnelsort(unsigned char** strings, size_t n, unsigned char** restrict tmp) { debug() << __func__ << "(), n=" << n << "\n"; @@ -683,14 +686,26 @@ funnelsort(unsigned char** strings, size_t n, unsigned char** restrict tmp) // Switch to 4-way mergesort on small inputs/lower levels of recursion. void mergesort_4way(unsigned char**, size_t, unsigned char**); -//template <> void funnelsort<2,buffer_layout_bfs>(unsigned char** strings, -// size_t n, unsigned char** tmp) { mergesort_4way(strings, n, tmp); } -//template <> void funnelsort<2,buffer_layout_dfs>(unsigned char** strings, -// size_t n, unsigned char** tmp) { mergesort_4way(strings, n, tmp); } -template <> void funnelsort<4,buffer_layout_bfs>(unsigned char** strings, - size_t n, unsigned char** tmp) { mergesort_4way(strings, n, tmp); } -template <> void funnelsort<4,buffer_layout_dfs>(unsigned char** strings, - size_t n, unsigned char** tmp) { mergesort_4way(strings, n, tmp); } + +template <> void funnelsort<4, buffer_layout_bfs>( + unsigned char**, size_t, unsigned char**); +template <> void funnelsort<4, buffer_layout_dfs>( + unsigned char**, size_t, unsigned char**); + +#define FUNNELSORT_EXTERN(K, LAYOUT) \ + extern template void funnelsort( \ + unsigned char**, size_t, unsigned char**); +FUNNELSORT_EXTERN(8, buffer_layout_bfs) +FUNNELSORT_EXTERN(16, buffer_layout_bfs) +FUNNELSORT_EXTERN(32, buffer_layout_bfs) +FUNNELSORT_EXTERN(64, buffer_layout_bfs) +FUNNELSORT_EXTERN(128, buffer_layout_bfs) +FUNNELSORT_EXTERN(8, buffer_layout_dfs) +FUNNELSORT_EXTERN(16, buffer_layout_dfs) +FUNNELSORT_EXTERN(32, buffer_layout_dfs) +FUNNELSORT_EXTERN(64, buffer_layout_dfs) +FUNNELSORT_EXTERN(128, buffer_layout_dfs) +#undef FUNNELSORT_EXTERN template class BufferLayout> void funnelsort_Kway(unsigned char** strings, size_t n) @@ -703,46 +718,4 @@ void funnelsort_Kway(unsigned char** strings, size_t n) free(tmp); } -void funnelsort_8way_bfs(unsigned char** strings, size_t n) -{ funnelsort_Kway<8, buffer_layout_bfs>(strings, n); } -void funnelsort_16way_bfs(unsigned char** strings, size_t n) -{ funnelsort_Kway<16, buffer_layout_bfs>(strings, n); } -void funnelsort_32way_bfs(unsigned char** strings, size_t n) -{ funnelsort_Kway<32, buffer_layout_bfs>(strings, n); } -void funnelsort_64way_bfs(unsigned char** strings, size_t n) -{ funnelsort_Kway<64, buffer_layout_bfs>(strings, n); } -void funnelsort_128way_bfs(unsigned char** strings, size_t n) -{ funnelsort_Kway<128, buffer_layout_bfs>(strings, n); } - -void funnelsort_8way_dfs(unsigned char** strings, size_t n) -{ funnelsort_Kway<8, buffer_layout_dfs>(strings, n); } -void funnelsort_16way_dfs(unsigned char** strings, size_t n) -{ funnelsort_Kway<16, buffer_layout_dfs>(strings, n); } -void funnelsort_32way_dfs(unsigned char** strings, size_t n) -{ funnelsort_Kway<32, buffer_layout_dfs>(strings, n); } -void funnelsort_64way_dfs(unsigned char** strings, size_t n) -{ funnelsort_Kway<64, buffer_layout_dfs>(strings, n); } -void funnelsort_128way_dfs(unsigned char** strings, size_t n) -{ funnelsort_Kway<128, buffer_layout_dfs>(strings, n); } - -ROUTINE_REGISTER_SINGLECORE(funnelsort_8way_bfs, - "funnelsort_8way_bfs") -ROUTINE_REGISTER_SINGLECORE(funnelsort_16way_bfs, - "funnelsort_16way_bfs") -ROUTINE_REGISTER_SINGLECORE(funnelsort_32way_bfs, - "funnelsort_32way_bfs") -ROUTINE_REGISTER_SINGLECORE(funnelsort_64way_bfs, - "funnelsort_64way_bfs") -ROUTINE_REGISTER_SINGLECORE(funnelsort_128way_bfs, - "funnelsort_128way_bfs") - -ROUTINE_REGISTER_SINGLECORE(funnelsort_8way_dfs, - "funnelsort_8way_dfs") -ROUTINE_REGISTER_SINGLECORE(funnelsort_16way_dfs, - "funnelsort_16way_dfs") -ROUTINE_REGISTER_SINGLECORE(funnelsort_32way_dfs, - "funnelsort_32way_dfs") -ROUTINE_REGISTER_SINGLECORE(funnelsort_64way_dfs, - "funnelsort_64way_dfs") -ROUTINE_REGISTER_SINGLECORE(funnelsort_128way_dfs, - "funnelsort_128way_dfs") +#endif From 7abb863735cfc3e16fed843d97c60c26b7d7bda9 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Wed, 13 May 2026 23:28:00 +0300 Subject: [PATCH 08/16] vector_realloc: avoid losing realloc pointers on failure Fix GCC analyzer findings where realloc results were assigned directly to vector storage before checking for failure. Store realloc results in temporary pointers first so the original allocation remains reachable if realloc fails and aborts. Co-authored-by: OpenAI Codex --- src/vector_realloc.h | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/vector_realloc.h b/src/vector_realloc.h index 5a7cf78..dd7e3cd 100644 --- a/src/vector_realloc.h +++ b/src/vector_realloc.h @@ -63,8 +63,9 @@ class vector_realloc if (_capacity == 0) { _capacity = InitialSize; } - _data = static_cast(realloc(_data, _capacity*sizeof(T))); - if (!_data) abort(); + T* data = static_cast(realloc(_data, _capacity*sizeof(T))); + if (!data) abort(); + _data = data; } T* _data; size_t _size; @@ -100,8 +101,9 @@ class vector_realloc_counter_clear if (_capacity == 0) { _capacity = InitialSize; } - _data = static_cast(realloc(_data, _capacity*sizeof(T))); - if (!_data) abort(); + T* data = static_cast(realloc(_data, _capacity*sizeof(T))); + if (!data) abort(); + _data = data; } T* _data; size_t _size; @@ -141,16 +143,19 @@ class vector_realloc_shrink_clear if (_capacity == 0) { _capacity = InitialSize; } - _data = static_cast(realloc(_data, _capacity*sizeof(T))); - if (!_data) abort(); + T* data = static_cast(realloc(_data, _capacity*sizeof(T))); + if (!data) abort(); + _data = data; } void shrink() { if (_capacity > 0x80000) { - _capacity = _capacity / 2; - _data = static_cast( - realloc(_data, _capacity*sizeof(T))); - if (!_data) abort(); + const size_t capacity = _capacity / 2; + T* data = static_cast( + realloc(_data, capacity*sizeof(T))); + if (!data) abort(); + _data = data; + _capacity = capacity; } } T* _data; From 1798ccbd09288a7129b3ffa6cc37f6ef5e6b1517 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Wed, 13 May 2026 23:39:17 +0300 Subject: [PATCH 09/16] CMakeLists.txt: add MARCH cache variable Default stays "native". Override the -march= value to target a different ISA level. Running unit_test executable under Valgrind can trip on AVX-512 instructions, can be avoided for example by building with `-DMARCH=x86-64`. ``` ==1437032== valgrind: Unrecognised instruction at address 0x40c34ed. ==1437032== at 0x40C34ED: mergesort_4way_unstable(unsigned char**, unsigned long, unsigned char**) (mergesort_unstable.cpp:1862) ==1437032== by 0x40C3648: mergesort_4way_unstable(unsigned char**, unsigned long) (mergesort_unstable.cpp:1879) ==1437032== by 0x40DD641: test_routines (main.cpp:226) ==1437032== by 0x40DD641: main (main.cpp:302) ==1437032== Your program just tried to execute an instruction that Valgrind ==1437032== did not recognise. There are two possible reasons for this. ==1437032== 1. Your program has a bug and erroneously jumped to a non-code ==1437032== location. If you are running Memcheck and you just saw a ==1437032== warning about a bad jump, it's probably your program's fault. ==1437032== 2. The instruction is legitimate but Valgrind doesn't handle it, ==1437032== i.e. it's Valgrind's fault. If you think this is the case or ==1437032== you are not sure, please let us know and we'll try to fix it. ==1437032== Either way, Valgrind will now raise a SIGILL signal which will ==1437032== probably kill your program. ==1437032== ==1437032== Process terminating with default action of signal 4 (SIGILL) ==1437032== Illegal opcode at address 0x40C34ED ==1437032== at 0x40C34ED: mergesort_4way_unstable(unsigned char**, unsigned long, unsigned char**) (mergesort_unstable.cpp:1862) ==1437032== by 0x40C3648: mergesort_4way_unstable(unsigned char**, unsigned long) (mergesort_unstable.cpp:1879) ==1437032== by 0x40DD641: test_routines (main.cpp:226) ==1437032== by 0x40DD641: main (main.cpp:302) ``` Co-Authored-By: Claude Opus 4.7 (1M context) --- CMakeLists.txt | 5 ++++- README.md | 12 ++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cd63223..63c5ef9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,11 +27,14 @@ if(ENABLE_CLANG_ANALYZER) -header-filter=^${CMAKE_SOURCE_DIR}/src/.*) endif() +set(MARCH "native" CACHE STRING + "Value passed to -march=. Override to target a different ISA level.") + check_include_file(sys/sdt.h HAVE_SYS_SDT_H) find_package(OpenMP REQUIRED) -add_compile_options(-Wall -Wextra -march=native) +add_compile_options(-Wall -Wextra -march=${MARCH}) string(APPEND CMAKE_C_FLAGS_RELEASE " -g") string(APPEND CMAKE_CXX_FLAGS_RELEASE " -g") diff --git a/README.md b/README.md index 25c82f5..afbd398 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,18 @@ does not need to be Clang. `ENABLE_GCC_ANALYZER` and `ENABLE_CLANG_ANALYZER` can be combined when building with GCC >= 13. +Target architecture +------------------- + +The compiler `-march=` value can be overridden via the `MARCH` cache +variable (default `native`): + + $ cmake -B build-v3 -G Ninja -DMARCH=x86-64-v3 && ninja -C build-v3 + +Useful for targeting a portable ISA level or restricting the instructions +emitted by the compiler. + + Huge pages ---------- From de469acb4b30cd0878cdc8d8076dfebb333c98e0 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 14 May 2026 23:29:29 +0300 Subject: [PATCH 10/16] median.h: rewrite value-form med3char as min/max identity Replace the cascading-if implementation of med3char(a, b, c) with the branchless median identity max(min(a, b), min(max(a, b), c)). Semantics match on all inputs including ties (a==b -> a, c==a -> a, c==b -> b, strict order picks the middle). Clears a clang-analyzer false positive at src/util/median.h:67 where the analyzer could not prove that every path through the cascading-if returned an initialized value (three clang-analyzer-core.CallAndMessage warnings on the pseudo_median call site). std::min and std::max are fully modeled. The reference-and-comparator overload is not flagged and stays as-is. Assembly impact (release build, gcc -O2 -march=native): the value-form med3char is inlined into pseudo_median; the three instantiations shrink consistently across all four TUs that call them (burstsort_mkq, multikey_block, multikey_dynamic, multikey_simd): pseudo_median: 146 -> 98 insns (-33%) pseudo_median: 301 -> 222 insns (-26%) pseudo_median: 491 -> 388 insns (-21%) The cascading-if produced ~8 insns and 4 conditional jumps per nested med3char call (three string-equality early-outs plus a slow/fast split the compiler could not fuse). The min/max identity compiles to mostly straight-line cmp/cmov pairs with a single branch, so the hot path of pseudo_median is now four nested cmp+cmov sequences followed by ret. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/util/median.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/util/median.h b/src/util/median.h index f4018da..bc70d15 100644 --- a/src/util/median.h +++ b/src/util/median.h @@ -24,21 +24,13 @@ #define UTIL_H #include "get_char.h" +#include template CharT med3char(CharT a, CharT b, CharT c) { - if (a == b) return a; - if (c == a || c == b) return c; - if (a < b) { - if (b < c) return b; - if (a < c) return c; - return a; - } - if (b > c) return b; - if (a < c) return a; - return c; + return std::max(std::min(a, b), std::min(std::max(a, b), c)); } template From fbc4bf73c6d8d23005eab3f9adf96e7b0a25278a Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 14 May 2026 23:29:06 +0300 Subject: [PATCH 11/16] vma_info: rewrite in C++ Reimplement src/util/vmainfo.c as src/util/vmainfo.cpp using std::ifstream, std::string, and std::vector. The C ABI is preserved via extern "C" so the sole caller (src/sortstring.c) keeps working unchanged. Output layout matches the original byte-for-byte. The rewrite incidentally clears the clang-analyzer diagnostics flagged on the C version: - 16 clang-analyzer-security.insecureAPI.strcpy (unbounded strcat) - 8 clang-analyzer-unix.Stream (getline after EOF) - 2 clang-analyzer-unix.Malloc (getline buffer leaked at done:) Co-Authored-By: Claude Opus 4.7 (1M context) --- CMakeLists.txt | 2 +- src/util/{vmainfo.c => vmainfo.cpp} | 128 +++++++++++++--------------- src/util/vmainfo.h | 8 ++ 3 files changed, 70 insertions(+), 68 deletions(-) rename src/util/{vmainfo.c => vmainfo.cpp} (59%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 63c5ef9..064870b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,7 +75,7 @@ set(INTERNAL_SRCS src/routines.c src/util/timing.c src/util/cpus_allowed.c - src/util/vmainfo.c) + src/util/vmainfo.cpp) set(EXTERNAL_SRCS external/lcp-quicksort.cpp diff --git a/src/util/vmainfo.c b/src/util/vmainfo.cpp similarity index 59% rename from src/util/vmainfo.c rename to src/util/vmainfo.cpp index 46887c3..3ffd5b1 100644 --- a/src/util/vmainfo.c +++ b/src/util/vmainfo.cpp @@ -20,11 +20,15 @@ * IN THE SOFTWARE. */ -#define _GNU_SOURCE #include "vmainfo.h" -#include -#include -#include + +#include +#include +#include +#include +#include +#include +#include /* Format the /proc/pid/smaps key-value pairs into two columns: * @@ -53,77 +57,67 @@ * Private_Clean: 0 kB | MMUPageSize: 4 kB * Private_Dirty: 390636 kB | Locked: 0 kB */ -static void -add_smaps(char *buf, char **pairs, unsigned pairs_cnt) + +namespace { + +struct Entry { + std::string header; + std::vector pairs; +}; + +Entry +find_entry(unsigned long target) { - unsigned i, j; - for (i=0, j=pairs_cnt/2; i < pairs_cnt/2; ++i, ++j) { - pairs[i][strlen(pairs[i])-1] = '\0'; - strcat(buf, " "); - strcat(buf, pairs[i]); - strcat(buf, " | "); - strcat(buf, pairs[j]); - } - if (j < pairs_cnt) { - strcat(buf, " "); - strcat(buf, pairs[j]); + Entry e; + std::ifstream f("/proc/self/smaps"); + if (!f) return e; + for (std::string line; std::getline(f, line); ) { + unsigned long a, b; + if (std::sscanf(line.c_str(), "%lx-%lx", &a, &b) != 2) + continue; + if (!(a <= target && target < b)) + continue; + e.header = std::move(line); + while (std::getline(f, line)) { + if (line.empty()) break; + if (line[0] < 'A' || line[0] > 'Z') break; + if (line.find(':') == std::string::npos) break; + e.pairs.push_back(std::move(line)); + } + break; } + return e; } -static void -free_pairs(char **pairs, unsigned pairs_cnt) +std::string +format(const Entry &e) { - unsigned i; - for (i=0; i < pairs_cnt; ++i) - free(pairs[i]); - free(pairs); + if (e.header.empty()) return std::string(); + std::ostringstream out; + out << " " << e.header << '\n'; + const size_t n = e.pairs.size(); + const size_t half = n / 2; + for (size_t i = 0, j = half; i < half; ++i, ++j) + out << " " << e.pairs[i] << " | " << e.pairs[j] << '\n'; + if (half * 2 < n) + out << " " << e.pairs.back() << '\n'; + return out.str(); } char * +strdup_malloc(const std::string &s) +{ + char *p = static_cast(std::malloc(s.size() + 1)); + if (!p) std::abort(); + std::memcpy(p, s.c_str(), s.size() + 1); + return p; +} + +} // namespace + +extern "C" char * vma_info(void *ptr) { - FILE *fp = NULL; - char *buf = NULL; - char *line = NULL; - char **pairs = NULL, **tmp = NULL; - unsigned pairs_cnt = 0; - size_t line_n = 0; - buf = malloc(2048); - if (!buf) - goto done; - buf[0] = 0; - fp = fopen("/proc/self/smaps", "r"); - if (!fp) - goto done; - while (getline(&line, &line_n, fp) != -1) { - unsigned long a, b; - if (sscanf(line, "%lx-%lx", &a, &b) != 2) - continue; - if (a <= (unsigned long)ptr && (unsigned long)ptr < b) { - /* OK, found it! */ - strcat(buf, " "); - strcat(buf, line); - while (getline(&line, &line_n, fp) != -1) { - if (line[0] >= 'A' && line[0] <= 'Z' - && strchr(line, ':') != NULL) { - tmp = realloc(pairs, (pairs_cnt+1) * sizeof(char *)); - if (!tmp) - goto done; - pairs = tmp; - pairs[pairs_cnt++] = line; - line = NULL; - line_n = 0; - } else { - free(line); - goto done; - } - } - } - } -done: - add_smaps(buf, pairs, pairs_cnt); - free_pairs(pairs, pairs_cnt); - if (fp) - fclose(fp); - return buf; + auto target = reinterpret_cast(ptr); + return strdup_malloc(format(find_entry(target))); } diff --git a/src/util/vmainfo.h b/src/util/vmainfo.h index 1e6b5a3..0b280d7 100644 --- a/src/util/vmainfo.h +++ b/src/util/vmainfo.h @@ -23,7 +23,15 @@ #ifndef VMAINFO_H #define VMAINFO_H +#ifdef __cplusplus +extern "C" { +#endif + /* Release return value with free() when no longer needed. */ char *vma_info(void *ptr); +#ifdef __cplusplus +} +#endif + #endif /* VMAINFO_H */ From 893a11ea8456810d6ccc0be533a190638fc3333a Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 15 May 2026 12:23:51 +0300 Subject: [PATCH 12/16] cpus_allowed: rewrite in C++ Mirror the vmainfo C++ rewrite for the remaining /proc-parsing helper. std::ifstream / std::string / std::ostringstream replace the manual getline + goto-cleanup + asprintf chains. C linkage is preserved via extern "C" so src/sortstring.c keeps building unchanged. Incidental correctness fix: CPU_ALLOC returns uninitialized memory, so the original cpus_allowed left bits for cleared mask positions in indeterminate state. The new implementation CPU_ZERO_S's the set before populating it from the hex string. Output verified identical to the C version on the unrestricted CPU set and a taskset-restricted subset. Co-Authored-By: Claude Opus 4.7 (1M context) --- CMakeLists.txt | 2 +- src/util/cpus_allowed.c | 198 -------------------------------------- src/util/cpus_allowed.cpp | 168 ++++++++++++++++++++++++++++++++ src/util/cpus_allowed.h | 8 ++ 4 files changed, 177 insertions(+), 199 deletions(-) delete mode 100644 src/util/cpus_allowed.c create mode 100644 src/util/cpus_allowed.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 064870b..001c291 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,7 +74,7 @@ set(INTERNAL_SRCS src/mergesort_lcp.cpp src/routines.c src/util/timing.c - src/util/cpus_allowed.c + src/util/cpus_allowed.cpp src/util/vmainfo.cpp) set(EXTERNAL_SRCS diff --git a/src/util/cpus_allowed.c b/src/util/cpus_allowed.c deleted file mode 100644 index b17cee5..0000000 --- a/src/util/cpus_allowed.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright 2012 by Tommi Rantala - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "cpus_allowed.h" - -#include -#include -#include - -static char * -status_entry(const char *key) -{ - char *result = NULL; - char *line = NULL; - size_t line_n = 0; - FILE *fp; - fp = fopen("/proc/self/status", "r"); - if (!fp) - goto done; - while (getline(&line, &line_n, fp) != -1) { - char *v; - v = strchr(line, ':'); - if (!v || *v == '\0') - continue; - *v = '\0'; - if (strcmp(line, key) != 0) - continue; - ++v; - while (*v == ' ' || *v == '\t') - ++v; - if (strlen(v) > 1) - v[strlen(v)-1] = '\0'; - if (*v == '\0') - goto done; - result = line; - while ((*line++ = *v++)) - ; - goto done; - } -done: - if (!result) - free(line); - if (fp) - fclose(fp); - return result; -} - -char * -cpus_allowed_list(void) -{ - return status_entry("Cpus_allowed_list"); -} - -static int -ishexdigit(char ch) -{ - return (ch >= '0' && ch <= '9') - || (ch >= 'a' && ch <= 'f'); -} - -static int -hex2int(char ch) -{ - if (ch >= '0' && ch <= '9') - return ch - '0'; - if (ch >= 'a' && ch <= 'f') - return ch - 'a' + 10; - abort(); - return 0; -} - -static int -high_bit_order(char *allowed) -{ - int order = -1; - int i = 0; - int k = strlen(allowed)-1; - for (; k >= 0; --k) { - char ch = allowed[k]; - if (!ishexdigit(ch)) - continue; - int mask = hex2int(ch); - if (mask) { - int neworder; - if (mask & 8) - neworder = 4 + i; - else if (mask & 4) - neworder = 3 + i; - else if (mask & 2) - neworder = 2 + i; - else - neworder = 1 + i; - if (neworder > order) - order = neworder; - } - i += 4; - } - return order; -} - -static void -set_cpu_bits(char *allowed, cpu_set_t *c, size_t setsize) -{ - int i = 0; - int k = strlen(allowed)-1; - for (; k >= 0; --k) { - char ch = allowed[k]; - if (!ishexdigit(ch)) - continue; - int mask = hex2int(ch); - if (mask & 1) CPU_SET_S(i+0, setsize, c); - if (mask & 2) CPU_SET_S(i+1, setsize, c); - if (mask & 4) CPU_SET_S(i+2, setsize, c); - if (mask & 8) CPU_SET_S(i+3, setsize, c); - i += 4; - } -} - -cpu_set_t * -cpus_allowed(size_t *setsize, int *maxcpu) -{ - cpu_set_t *c = NULL; - char *allowed = status_entry("Cpus_allowed"); - if (!allowed || strlen(allowed) == 0) - goto done; - *maxcpu = high_bit_order(allowed); - if (*maxcpu == -1) - goto done; - c = CPU_ALLOC(*maxcpu+1); - if (!c) - goto done; - *setsize = CPU_ALLOC_SIZE(*maxcpu+1); - set_cpu_bits(allowed, c, *setsize); -done: - free(allowed); - return c; -} - -int -cpu_scaling_min_freq(int cpu) -{ - int min_freq; - FILE *fp; - char *filename = NULL; - if (asprintf(&filename, - "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq", - cpu) == -1) { - return -1; - } - fp = fopen(filename, "r"); - free(filename); - if (!fp) - return -1; - if (fscanf(fp, "%d", &min_freq) != 1) - min_freq = -1; - fclose(fp); - return min_freq; -} - -int -cpu_scaling_max_freq(int cpu) -{ - int max_freq; - FILE *fp; - char *filename = NULL; - if (asprintf(&filename, - "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", - cpu) == -1) { - return -1; - } - fp = fopen(filename, "r"); - free(filename); - if (!fp) - return -1; - if (fscanf(fp, "%d", &max_freq) != 1) - max_freq = -1; - fclose(fp); - return max_freq; -} diff --git a/src/util/cpus_allowed.cpp b/src/util/cpus_allowed.cpp new file mode 100644 index 0000000..d884398 --- /dev/null +++ b/src/util/cpus_allowed.cpp @@ -0,0 +1,168 @@ +/* + * Copyright 2012 by Tommi Rantala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "cpus_allowed.h" + +#include +#include +#include +#include +#include +#include + +namespace { + +char * +strdup_malloc(const std::string &s) +{ + char *p = static_cast(std::malloc(s.size() + 1)); + if (!p) std::abort(); + std::memcpy(p, s.c_str(), s.size() + 1); + return p; +} + +/* Read /proc/self/status, return the (whitespace-trimmed) value of the + * requested key, or an empty string if not present. */ +std::string +status_entry(const std::string &key) +{ + std::ifstream f("/proc/self/status"); + if (!f) return std::string(); + for (std::string line; std::getline(f, line); ) { + auto colon = line.find(':'); + if (colon == std::string::npos) continue; + if (line.compare(0, colon, key) != 0) continue; + auto v = colon + 1; + while (v < line.size() && (line[v] == ' ' || line[v] == '\t')) + ++v; + auto end = line.size(); + while (end > v && std::isspace(static_cast(line[end-1]))) + --end; + return line.substr(v, end - v); + } + return std::string(); +} + +bool +ishex(char ch) +{ + return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f'); +} + +int +hex_val(char ch) +{ + return (ch >= '0' && ch <= '9') ? ch - '0' : ch - 'a' + 10; +} + +/* Highest bit index set in the hex bitmask string `allowed` (low-order + * digits on the right). Non-hex characters (e.g. comma separators) are + * skipped. Returns -1 if no bits are set. */ +int +high_bit_order(const std::string &allowed) +{ + int order = -1; + int i = 0; + for (auto it = allowed.rbegin(); it != allowed.rend(); ++it) { + char ch = *it; + if (!ishex(ch)) continue; + int mask = hex_val(ch); + if (mask) { + int neworder; + if (mask & 8) neworder = 4 + i; + else if (mask & 4) neworder = 3 + i; + else if (mask & 2) neworder = 2 + i; + else neworder = 1 + i; + if (neworder > order) order = neworder; + } + i += 4; + } + return order; +} + +void +set_cpu_bits(const std::string &allowed, cpu_set_t *c, size_t setsize) +{ + int i = 0; + for (auto it = allowed.rbegin(); it != allowed.rend(); ++it) { + char ch = *it; + if (!ishex(ch)) continue; + int mask = hex_val(ch); + if (mask & 1) CPU_SET_S(i+0, setsize, c); + if (mask & 2) CPU_SET_S(i+1, setsize, c); + if (mask & 4) CPU_SET_S(i+2, setsize, c); + if (mask & 8) CPU_SET_S(i+3, setsize, c); + i += 4; + } +} + +int +read_int_file(const std::string &path) +{ + std::ifstream f(path); + if (!f) return -1; + int v; + if (!(f >> v)) return -1; + return v; +} + +} // namespace + +extern "C" char * +cpus_allowed_list(void) +{ + std::string s = status_entry("Cpus_allowed_list"); + if (s.empty()) return nullptr; + return strdup_malloc(s); +} + +extern "C" cpu_set_t * +cpus_allowed(size_t *setsize, int *maxcpu) +{ + std::string allowed = status_entry("Cpus_allowed"); + if (allowed.empty()) return nullptr; + int top = high_bit_order(allowed); + if (top == -1) return nullptr; + *maxcpu = top; + cpu_set_t *c = CPU_ALLOC(top + 1); + if (!c) return nullptr; + *setsize = CPU_ALLOC_SIZE(top + 1); + CPU_ZERO_S(*setsize, c); + set_cpu_bits(allowed, c, *setsize); + return c; +} + +extern "C" int +cpu_scaling_min_freq(int cpu) +{ + std::ostringstream path; + path << "/sys/devices/system/cpu/cpu" << cpu << "/cpufreq/scaling_min_freq"; + return read_int_file(path.str()); +} + +extern "C" int +cpu_scaling_max_freq(int cpu) +{ + std::ostringstream path; + path << "/sys/devices/system/cpu/cpu" << cpu << "/cpufreq/scaling_max_freq"; + return read_int_file(path.str()); +} diff --git a/src/util/cpus_allowed.h b/src/util/cpus_allowed.h index ed6fc9e..a0e8cda 100644 --- a/src/util/cpus_allowed.h +++ b/src/util/cpus_allowed.h @@ -26,9 +26,17 @@ #define _GNU_SOURCE #include +#ifdef __cplusplus +extern "C" { +#endif + char *cpus_allowed_list(void); cpu_set_t *cpus_allowed(size_t *, int *maxcpu); int cpu_scaling_max_freq(int cpu); int cpu_scaling_min_freq(int cpu); +#ifdef __cplusplus +} +#endif + #endif /* CPUS_ALLOWED_H */ From 690574e039fab1880866c129ec3e014175dc12c5 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 15 May 2026 12:33:45 +0300 Subject: [PATCH 13/16] timing: rewrite in C++ Mirror the vmainfo / cpus_allowed C++ rewrite for the remaining util. Anonymous-namespace statics replace file-scope statics; a small ms_between() helper deduplicates the per-clock delta computation (four call sites collapse to one expression each). C linkage is preserved via extern "C". Incidental precision fix: the C version divided tv_nsec by an integer 1000000, truncating sub-millisecond resolution for wall-clock and PROCESS_CPUTIME. user/sys already had microsecond resolution via tv_usec/1e3. Switching to /1000000.0 makes all five reported timings consistent at microsecond precision. Co-Authored-By: Claude Opus 4.7 (1M context) --- CMakeLists.txt | 2 +- src/util/timing.c | 87 --------------------------------------- src/util/timing.cpp | 99 +++++++++++++++++++++++++++++++++++++++++++++ src/util/timing.h | 8 ++++ 4 files changed, 108 insertions(+), 88 deletions(-) delete mode 100644 src/util/timing.c create mode 100644 src/util/timing.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 001c291..1de4388 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,7 +73,7 @@ set(INTERNAL_SRCS src/mergesort_losertree.cpp src/mergesort_lcp.cpp src/routines.c - src/util/timing.c + src/util/timing.cpp src/util/cpus_allowed.cpp src/util/vmainfo.cpp) diff --git a/src/util/timing.c b/src/util/timing.c deleted file mode 100644 index f072214..0000000 --- a/src/util/timing.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright 2007-2008,2011 by Tommi Rantala - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#define _GNU_SOURCE -#include -#include -#include -#include - -static struct timespec process_cputime_start; -static struct timespec process_cputime_stop; -static struct timespec monotonic_start; -static struct timespec monotonic_stop; -static struct rusage startclock; -static struct rusage stopclock; - -void timing_start(void) -{ - getrusage(RUSAGE_SELF, &startclock); - clock_gettime(CLOCK_MONOTONIC, &monotonic_start); - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &process_cputime_start); -} - -void timing_stop(void) -{ - getrusage(RUSAGE_SELF, &stopclock); - clock_gettime(CLOCK_MONOTONIC, &monotonic_stop); - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &process_cputime_stop); -} - -double gettime_wall_clock(void) -{ - double msecs_1 = monotonic_start.tv_nsec/1000000 + 1000*monotonic_start.tv_sec; - double msecs_2 = monotonic_stop.tv_nsec/1000000 + 1000*monotonic_stop.tv_sec; - return msecs_2 - msecs_1; -} - -double gettime_user(void) -{ - struct timeval result; - timersub(&stopclock.ru_utime, &startclock.ru_utime, &result); - return (double)(result.tv_sec*1000)+(double)(result.tv_usec)/1e3; -} - -double gettime_sys(void) -{ - struct timeval result; - timersub(&stopclock.ru_stime, &startclock.ru_stime, &result); - return (double)(result.tv_sec*1000)+(double)(result.tv_usec)/1e3; -} - -double gettime_user_sys(void) -{ - struct timeval result_user; - struct timeval result_sys; - struct timeval result; - timersub(&stopclock.ru_utime, &startclock.ru_utime, &result_user); - timersub(&stopclock.ru_stime, &startclock.ru_stime, &result_sys); - timeradd(&result_user, &result_sys, &result); - return (double)(result.tv_sec*1000)+(double)(result.tv_usec)/1e3; -} - -double gettime_process_cputime(void) -{ - double msecs_1 = process_cputime_start.tv_nsec/1000000 + 1000*process_cputime_start.tv_sec; - double msecs_2 = process_cputime_stop.tv_nsec/1000000 + 1000*process_cputime_stop.tv_sec; - return msecs_2 - msecs_1; -} diff --git a/src/util/timing.cpp b/src/util/timing.cpp new file mode 100644 index 0000000..5243fb6 --- /dev/null +++ b/src/util/timing.cpp @@ -0,0 +1,99 @@ +/* + * Copyright 2007-2008,2011 by Tommi Rantala + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "timing.h" + +#include +#include +#include + +namespace { + +timespec process_cputime_start; +timespec process_cputime_stop; +timespec monotonic_start; +timespec monotonic_stop; +rusage startclock; +rusage stopclock; + +double +ms_between(const timespec &a, const timespec &b) +{ + return (b.tv_nsec / 1000000.0 + 1000.0 * b.tv_sec) + - (a.tv_nsec / 1000000.0 + 1000.0 * a.tv_sec); +} + +double +ms_between(const timeval &a, const timeval &b) +{ + timeval d; + timersub(&b, &a, &d); + return d.tv_sec * 1000.0 + d.tv_usec / 1000.0; +} + +} // namespace + +extern "C" void +timing_start(void) +{ + getrusage(RUSAGE_SELF, &startclock); + clock_gettime(CLOCK_MONOTONIC, &monotonic_start); + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &process_cputime_start); +} + +extern "C" void +timing_stop(void) +{ + getrusage(RUSAGE_SELF, &stopclock); + clock_gettime(CLOCK_MONOTONIC, &monotonic_stop); + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &process_cputime_stop); +} + +extern "C" double +gettime_wall_clock(void) +{ + return ms_between(monotonic_start, monotonic_stop); +} + +extern "C" double +gettime_user(void) +{ + return ms_between(startclock.ru_utime, stopclock.ru_utime); +} + +extern "C" double +gettime_sys(void) +{ + return ms_between(startclock.ru_stime, stopclock.ru_stime); +} + +extern "C" double +gettime_user_sys(void) +{ + return gettime_user() + gettime_sys(); +} + +extern "C" double +gettime_process_cputime(void) +{ + return ms_between(process_cputime_start, process_cputime_stop); +} diff --git a/src/util/timing.h b/src/util/timing.h index 0eedca1..1139e48 100644 --- a/src/util/timing.h +++ b/src/util/timing.h @@ -23,6 +23,10 @@ #ifndef TIMING_H #define TIMING_H +#ifdef __cplusplus +extern "C" { +#endif + void timing_start(void); void timing_stop(void); @@ -32,4 +36,8 @@ double gettime_user_sys(void); double gettime_process_cputime(void); double gettime_wall_clock(void); +#ifdef __cplusplus +} +#endif + #endif /* TIMING_H */ From bca0083209463ebb78d27b0fff5370d0e49582ad Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 15 May 2026 12:51:29 +0300 Subject: [PATCH 14/16] sortstring + routines: rewrite in C++ Finish the src/ migration: the executable entry point and the routine registry are the last two C translation units. Convert them and drop the C-compat scaffolding that was carrying their callers. Mechanical translation: - File-scope statics move into anonymous namespaces. - C-style void-pointer casts become static_cast<>. - Implicit conversions through munmap drop their casts. - Stdlib includes use the form. - nullptr, std::strcmp, std::qsort, std::fopen etc. Linkage cleanup: - routine_register stays extern "C" (called by external/*.c at 149 ROUTINE_REGISTER sites; src/routine.h keeps its extern "C" guard). - routine_from_name and routine_get_all become plain C++ linkage (only C++ callers remain). - src/util/{vmainfo,cpus_allowed,timing}.h drop their extern "C" guards along with the extern "C" specifiers on the .cpp definitions. - _GNU_SOURCE defines drop too: g++ predefines it for libstdc++. Behavior preserved: output of -L / -A / --help / a full sort run (modulo ASLR mmap addresses and timing values) is byte-identical to the pre-rewrite binary. Error-path stderr and exit codes match. external/*.c routines still self-register correctly via the ROUTINE_REGISTER macro. clang-analyzer warning count: 29 -> 0. Co-Authored-By: Claude Opus 4.7 (1M context) --- CMakeLists.txt | 4 +- src/{routines.c => routines.cpp} | 69 ++--- src/routines.h | 12 +- src/{sortstring.c => sortstring.cpp} | 396 +++++++++++++-------------- src/util/cpus_allowed.cpp | 8 +- src/util/cpus_allowed.h | 9 - src/util/timing.cpp | 14 +- src/util/timing.h | 8 - src/util/vmainfo.cpp | 2 +- src/util/vmainfo.h | 8 - 10 files changed, 245 insertions(+), 285 deletions(-) rename src/{routines.c => routines.cpp} (68%) rename src/{sortstring.c => sortstring.cpp} (70%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1de4388..990c527 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,7 @@ set(INTERNAL_SRCS src/mergesort_unstable.cpp src/mergesort_losertree.cpp src/mergesort_lcp.cpp - src/routines.c + src/routines.cpp src/util/timing.cpp src/util/cpus_allowed.cpp src/util/vmainfo.cpp) @@ -126,7 +126,7 @@ if(ENABLE_CLANG_ANALYZER) CXX_CLANG_TIDY "${CLANG_TIDY_COMMAND}") endif() -add_executable(sortstring src/sortstring.c ${EXTERNAL_SRCS}) +add_executable(sortstring src/sortstring.cpp ${EXTERNAL_SRCS}) target_link_libraries(sortstring PRIVATE sortstring_internal rt) add_executable(unit-test unit-test/main.cpp ${EXTERNAL_SRCS}) diff --git a/src/routines.c b/src/routines.cpp similarity index 68% rename from src/routines.c rename to src/routines.cpp index d9fa44a..3f69b21 100644 --- a/src/routines.c +++ b/src/routines.cpp @@ -20,56 +20,61 @@ * IN THE SOFTWARE. */ -#include "routine.h" -#include +#include "routines.h" -#define ROUTINES_MAX 256 +#include +#include -static const struct routine *routines[ROUTINES_MAX]; -static unsigned routine_cnt; +namespace { -void -routine_register(const struct routine *r) +constexpr unsigned ROUTINES_MAX = 256; + +const routine *routines[ROUTINES_MAX]; +unsigned routine_cnt; + +int +routine_cmp(const void *a, const void *b) +{ + const routine *aa = *static_cast(a); + const routine *bb = *static_cast(b); + if (aa->f == bb->f) + return 0; + if (aa->multicore < bb->multicore) + return -1; + if (aa->multicore > bb->multicore) + return 1; + return std::strcmp(aa->name, bb->name); +} + +} // namespace + +extern "C" void +routine_register(const routine *r) { if (!r) - abort(); + std::abort(); if (!r->name) - abort(); + std::abort(); if (!r->desc) - abort(); + std::abort(); if (routine_cnt >= ROUTINES_MAX) - abort(); + std::abort(); routines[routine_cnt++] = r; } -const struct routine * +const routine * routine_from_name(const char *name) { - unsigned i; - for (i=0; i < routine_cnt; ++i) - if (strcmp(name, routines[i]->name) == 0) + for (unsigned i = 0; i < routine_cnt; ++i) + if (std::strcmp(name, routines[i]->name) == 0) return routines[i]; - return NULL; -} - -static int -routine_cmp(const void *a, const void *b) -{ - const struct routine *aa = *(const struct routine **)a; - const struct routine *bb = *(const struct routine **)b; - if (aa->f == bb->f) - return 0; - if (aa->multicore < bb->multicore) - return -1; - if (aa->multicore > bb->multicore) - return 1; - return strcmp(aa->name, bb->name); + return nullptr; } void -routine_get_all(const struct routine ***r, unsigned *cnt) +routine_get_all(const routine ***r, unsigned *cnt) { *r = routines; *cnt = routine_cnt; - qsort(*r, *cnt, sizeof(struct routine *), routine_cmp); + std::qsort(*r, *cnt, sizeof(routine *), routine_cmp); } diff --git a/src/routines.h b/src/routines.h index 1c43f3a..aff46fb 100644 --- a/src/routines.h +++ b/src/routines.h @@ -25,15 +25,7 @@ #include "routine.h" -#ifdef __cplusplus -extern "C" { -#endif - -const struct routine *routine_from_name(const char *); -void routine_get_all(const struct routine ***, unsigned *); - -#ifdef __cplusplus -} -#endif +const routine *routine_from_name(const char *); +void routine_get_all(const routine ***, unsigned *); #endif /* ROUTINES_H */ diff --git a/src/sortstring.c b/src/sortstring.cpp similarity index 70% rename from src/sortstring.c rename to src/sortstring.cpp index 6b5710b..af586fe 100644 --- a/src/sortstring.c +++ b/src/sortstring.cpp @@ -20,8 +20,6 @@ * IN THE SOFTWARE. */ -#define _GNU_SOURCE - #include "timing.h" #include "vmainfo.h" #include "routines.h" @@ -29,12 +27,11 @@ #include "util/debug.h" #include "util/sdt.h" -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include #include @@ -42,9 +39,12 @@ #include #include #include +#include + +namespace { -static struct { - const struct routine *r; +struct { + const routine *r; char *write_filename; unsigned suffixsorting : 1; unsigned check_result : 1; @@ -57,177 +57,172 @@ static struct { int perf_control_fd; } opts; -static FILE *log_file; +FILE *log_file; -static void +void open_log_file(void) { - char *log_fn = NULL; - char *host = NULL; + char *log_fn = nullptr; + char *host = nullptr; if (log_file) return; - host = getenv("HOSTNAME"); + host = std::getenv("HOSTNAME"); if (host && *host) if (asprintf(&log_fn, "sortstring_log_%s", host) == -1) - log_fn = NULL; + log_fn = nullptr; if (log_fn) - log_file = fopen(log_fn, "a"); + log_file = std::fopen(log_fn, "a"); else - log_file = fopen("sortstring_log", "a"); - free(log_fn); + log_file = std::fopen("sortstring_log", "a"); + std::free(log_fn); if (log_file) setlinebuf(log_file); } -static void +void perf_control_write(int fd, const char *msg) { - ssize_t len = (ssize_t)strlen(msg); + ssize_t len = (ssize_t)std::strlen(msg); ssize_t ret = write(fd, msg, len); if (ret != len) { int err = errno; - fprintf(stderr, + std::fprintf(stderr, "ERROR: perf control fd write %s failed (ret=%zd, errno=%d): %s\n", - msg, ret, err, strerror(err)); + msg, ret, err, std::strerror(err)); if (log_file) - fprintf(log_file, + std::fprintf(log_file, "FATAL: perf control fd write %s failed (ret=%zd, errno=%d): %s\n", - msg, ret, err, strerror(err)); - exit(1); + msg, ret, err, std::strerror(err)); + std::exit(1); } } -static void +void perf_control_enable(int fd) { perf_control_write(fd, "enable\n"); } -static void +void perf_control_disable(int fd) { perf_control_write(fd, "disable\n"); } -static void +void opcontrol_start(void) { - int ret = system("opcontrol --start"); + int ret = std::system("opcontrol --start"); if (ret == -1 || WIFEXITED(ret) == 0 || WEXITSTATUS(ret) != 0) { - fprintf(stderr, "ERROR: opcontrol --start failed.\n"); + std::fprintf(stderr, "ERROR: opcontrol --start failed.\n"); if (log_file) - fprintf(log_file, + std::fprintf(log_file, "FATAL: opcontrol --start failed. " "ret=%d, WIFEXITED=%d, WEXITSTATUS=%d\n", ret, WIFEXITED(ret), WEXITSTATUS(ret)); - exit(1); + std::exit(1); } } -static void +void opcontrol_stop(void) { - int ret = system("opcontrol --stop"); + int ret = std::system("opcontrol --stop"); if (ret == -1 || WIFEXITED(ret) == 0 || WEXITSTATUS(ret) != 0) { - fprintf(stderr, "ERROR: opcontrol --stop failed."); + std::fprintf(stderr, "ERROR: opcontrol --stop failed."); if (log_file) - fprintf(log_file, + std::fprintf(log_file, "FATAL: opcontrol --stop failed. " "ret=%d, WIFEXITED=%d, WEXITSTATUS=%d\n", ret, WIFEXITED(ret), WEXITSTATUS(ret)); - exit(1); + std::exit(1); } } -static char * +char * bazename(const char *fname) { static char buf[300]; - strcpy(buf, fname); + std::strcpy(buf, fname); return basename(buf); } -static void * +void * alloc_bytes(size_t bytes, int hugetlb) { - int map_flags; - void *p; - map_flags = MAP_ANONYMOUS | MAP_PRIVATE; + int map_flags = MAP_ANONYMOUS | MAP_PRIVATE; if (hugetlb) map_flags |= MAP_HUGETLB; - p = mmap(NULL, bytes, PROT_READ | PROT_WRITE, map_flags, -1, 0); + void *p = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, map_flags, -1, 0); if (p == MAP_FAILED) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: unable to mmap memory for input: %s.\n", - strerror(errno)); - exit(1); + std::strerror(errno)); + std::exit(1); } return p; } -static unsigned char * +unsigned char * alloc_text(size_t bytes) { - void *p = alloc_bytes(bytes, opts.hugetlb_text); - return (unsigned char *)p; + return static_cast(alloc_bytes(bytes, opts.hugetlb_text)); } -static unsigned char ** +unsigned char ** alloc_pointers(size_t num) { - void *p = alloc_bytes(num*sizeof(unsigned char *), - opts.hugetlb_pointers); - return (unsigned char **)p; + return static_cast( + alloc_bytes(num * sizeof(unsigned char *), opts.hugetlb_pointers)); } -static void +void free_text(unsigned char *text, size_t text_len) { - munmap((void *)text, text_len); + munmap(text, text_len); } -static void +void free_pointers(unsigned char **strings, size_t strings_len) { - munmap((void *)strings, strings_len); + munmap(strings, strings_len); } -static off_t +off_t file_size(int fd) { - off_t size; - size = lseek(fd, 0, SEEK_END); + off_t size = lseek(fd, 0, SEEK_END); if (size == -1) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: unable to lseek() input file: %s.\n", - strerror(errno)); - exit(1); + std::strerror(errno)); + std::exit(1); } if (lseek(fd, 0, SEEK_SET) == -1) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: unable to lseek() input file: %s.\n", - strerror(errno)); - exit(1); + std::strerror(errno)); + std::exit(1); } return size; } -static void +void input_copy(const char *fname, unsigned char **text_, size_t *text_len_) { int fd = open(fname, O_RDONLY); if (fd == -1) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: unable to open() input file '%s': %s.\n", - fname, strerror(errno)); - exit(1); + fname, std::strerror(errno)); + std::exit(1); } off_t filesize = file_size(fd); if (filesize <= 0) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: input file '%s' empty.\n", fname); - exit(1); + std::exit(1); } unsigned char *text = alloc_text(filesize); const size_t block_size = 128*1024; @@ -239,23 +234,23 @@ input_copy(const char *fname, unsigned char **text_, size_t *text_len_) if (ret < 0) { if (errno == EINTR) continue; - fprintf(stderr, "ERROR: failed read() " + std::fprintf(stderr, "ERROR: failed read() " "from input file '%s': %s.\n", - fname, strerror(errno)); - exit(1); + fname, std::strerror(errno)); + std::exit(1); } else if (ret == 0) { - fprintf(stderr, "ERROR: EOF read() before reading " + std::fprintf(stderr, "ERROR: EOF read() before reading " "whole input file '%s': %s.\n", - fname, strerror(errno)); - exit(1); + fname, std::strerror(errno)); + std::exit(1); } i += ret; } if (close(fd) == -1) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: unable to close() input file '%s': %s.\n", - fname, strerror(errno)); - exit(1); + fname, std::strerror(errno)); + std::exit(1); } *text_ = text; *text_len_ = filesize; @@ -263,41 +258,41 @@ input_copy(const char *fname, unsigned char **text_, size_t *text_len_) /* mmap() input data that is in raw format (uses NULL bytes for delimiting * strings). */ -static void +void input_mmap(const char *fname, unsigned char **text, size_t *text_len) { int fd = open(fname, O_RDONLY); if (fd == -1) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: unable to open() input file '%s': %s.\n", - fname, strerror(errno)); - exit(1); + fname, std::strerror(errno)); + std::exit(1); } off_t filesize = file_size(fd); if (filesize <= 0) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: input file '%s' empty.\n", fname); - exit(1); + std::exit(1); } void *raw = mmap(0, filesize, PROT_READ, MAP_PRIVATE, fd, 0); if (raw == MAP_FAILED) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: unable to mmap input file '%s': %s.\n", - fname, strerror(errno)); - exit(1); + fname, std::strerror(errno)); + std::exit(1); } if (close(fd) == -1) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: unable to close() input file '%s': %s.\n", - fname, strerror(errno)); - exit(1); + fname, std::strerror(errno)); + std::exit(1); } - *text = (unsigned char *)raw; + *text = static_cast(raw); *text_len = filesize; } -static void +void readbytes(const char *fname, unsigned char **text, size_t *text_len) { /* mapping file with MAP_HUGETLB does not work. */ @@ -307,7 +302,7 @@ readbytes(const char *fname, unsigned char **text, size_t *text_len) return input_copy(fname, text, text_len); } -static void +void create_strings_delim(unsigned char *text, size_t text_len, int delim, unsigned char ***strings, size_t *strings_cnt) { @@ -316,10 +311,10 @@ create_strings_delim(unsigned char *text, size_t text_len, int delim, if (text[i] == delim) ++strs_cnt; if (strs_cnt == 0) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: unable to read any lines from the input " "file.\n"); - exit(1); + std::exit(1); } unsigned char **strs = alloc_pointers(strs_cnt); unsigned char *line_start = text; @@ -334,7 +329,7 @@ create_strings_delim(unsigned char *text, size_t text_len, int delim, *strings_cnt = strs_cnt; } -static void +void create_strings(unsigned char *text, size_t text_len, unsigned char ***strings, size_t *strings_cnt) { @@ -346,7 +341,7 @@ create_strings(unsigned char *text, size_t text_len, strings, strings_cnt); } -static void +void create_suffixes(unsigned char *text, size_t text_len, unsigned char ***strings, size_t *strings_cnt) { @@ -357,40 +352,34 @@ create_suffixes(unsigned char *text, size_t text_len, *strings_cnt = text_len; } -static void +void write_result(unsigned char **strings, size_t n) { FILE *fp; if (!opts.write_filename) { - const char *username = getenv("USERNAME"); + const char *username = std::getenv("USERNAME"); if (!username) username = ""; if (asprintf(&opts.write_filename, "/tmp/%s/alg.out", username) == -1) - opts.write_filename = NULL; + opts.write_filename = nullptr; } - fp = fopen(opts.write_filename, "w"); + fp = std::fopen(opts.write_filename, "w"); if (!fp) { - fprintf(stderr, + std::fprintf(stderr, "WARNING: --write failed: " "unable to open file for writing!\n"); return; } for (size_t i=0; i < n; ++i) { - fputs((const char *)strings[i], fp); - fputc('\n', fp); + std::fputs(reinterpret_cast(strings[i]), fp); + std::fputc('\n', fp); } - fclose(fp); - fprintf(stderr, "Wrote sorted output to '%s'.\n", + std::fclose(fp); + std::fprintf(stderr, "Wrote sorted output to '%s'.\n", opts.write_filename); - /* - std::cout << "\n"; - system("md5sum " + opts.write_filename); - system("cat " + opts.input_filename + ".md5sum"); - std::cout << "\n"; - */ } -static void +void print_timing_results_xml(void) { /* @@ -402,17 +391,17 @@ print_timing_results_xml(void) */ } -static void +void print_timing_results_human(void) { - printf("%10.2f ms : wall-clock\n", gettime_wall_clock()); - printf("%10.2f ms : user\n", gettime_user()); - printf("%10.2f ms : sys\n", gettime_sys()); - printf("%10.2f ms : user+sys\n", gettime_user_sys()); - printf("%10.2f ms : PROCESS_CPUTIME\n", gettime_process_cputime()); + std::printf("%10.2f ms : wall-clock\n", gettime_wall_clock()); + std::printf("%10.2f ms : user\n", gettime_user()); + std::printf("%10.2f ms : sys\n", gettime_sys()); + std::printf("%10.2f ms : user+sys\n", gettime_user_sys()); + std::printf("%10.2f ms : PROCESS_CPUTIME\n", gettime_process_cputime()); } -static void +void print_timing_results(void) { if (opts.xml_stats) @@ -422,10 +411,10 @@ print_timing_results(void) } int -run(const struct routine *r, unsigned char **strings, size_t n) +run(const routine *r, unsigned char **strings, size_t n) { int ret = 0; - puts("Timing ..."); + std::puts("Timing ..."); if (opts.oprofile) opcontrol_start(); if (opts.perf_control_fd > 0) @@ -443,128 +432,126 @@ run(const struct routine *r, unsigned char **strings, size_t n) if (opts.check_result) { ret = check_result(strings, n); if (ret == 0) - fprintf(stderr, "Check: GOOD\n"); + std::fprintf(stderr, "Check: GOOD\n"); } if (opts.write) write_result(strings, n); return ret; } -static void +void print_alg_names_and_descs(void) { - const struct routine **routines; + const routine **routines; unsigned i = 0, routines_cnt; routine_get_all(&routines, &routines_cnt); if (routines[0]->multicore == 0) { - puts(":: SINGLE CORE ROUTINES ::::::::::::::::::::::::::::::::::::::::::::::::::::::::"); - puts(":: NAME :::::::::::::::::::::: DESCRIPTION :::::::::::::::::::::::::::::::::::::"); + std::puts(":: SINGLE CORE ROUTINES ::::::::::::::::::::::::::::::::::::::::::::::::::::::::"); + std::puts(":: NAME :::::::::::::::::::::: DESCRIPTION :::::::::::::::::::::::::::::::::::::"); for (i=0; i < routines_cnt && routines[i]->multicore == 0; ++i) - if (strlen(routines[i]->name) > 30) { - printf("%s\n", routines[i]->name); - printf("%30s %s\n", "", routines[i]->desc); + if (std::strlen(routines[i]->name) > 30) { + std::printf("%s\n", routines[i]->name); + std::printf("%30s %s\n", "", routines[i]->desc); } else - printf("%-30s %s\n", routines[i]->name, routines[i]->desc); + std::printf("%-30s %s\n", routines[i]->name, routines[i]->desc); } if (i < routines_cnt && routines[i]->multicore) { if (i) - puts(""); - puts(":: MULTI CORE ROUTINES :::::::::::::::::::::::::::::::::::::::::::::::::::::::::"); - puts(":: NAME :::::::::::::::::::::: DESCRIPTION :::::::::::::::::::::::::::::::::::::"); + std::puts(""); + std::puts(":: MULTI CORE ROUTINES :::::::::::::::::::::::::::::::::::::::::::::::::::::::::"); + std::puts(":: NAME :::::::::::::::::::::: DESCRIPTION :::::::::::::::::::::::::::::::::::::"); for (; i < routines_cnt; ++i) - if (strlen(routines[i]->name) > 30) { - printf("%s\n", routines[i]->name); - printf("%30s %s\n", "", routines[i]->desc); + if (std::strlen(routines[i]->name) > 30) { + std::printf("%s\n", routines[i]->name); + std::printf("%30s %s\n", "", routines[i]->desc); } else - printf("%-30s %s\n", routines[i]->name, routines[i]->desc); + std::printf("%-30s %s\n", routines[i]->name, routines[i]->desc); } } -static void +void print_alg_names(void) { - const struct routine **routines; - unsigned i, routines_cnt; + const routine **routines; + unsigned routines_cnt; routine_get_all(&routines, &routines_cnt); - for (i=0; i < routines_cnt; ++i) - puts(routines[i]->name); + for (unsigned i=0; i < routines_cnt; ++i) + std::puts(routines[i]->name); } -static void -routine_information(const struct routine *r) +void +routine_information(const routine *r) { - printf("Routine (%s): %s\n", + std::printf("Routine (%s): %s\n", r->multicore ? "multi core" : "single core", r->name); - printf(" \"%s\"\n", r->desc); - printf("\n"); + std::printf(" \"%s\"\n", r->desc); + std::printf("\n"); } -static void +void input_information(unsigned char *text, size_t text_len, unsigned char **strings, size_t strings_len) { size_t input_mb = text_len / (1024*1024); size_t input_kb = text_len / 1024; if (input_mb) - printf(" size: %zu MB (%zu kB, %zu bytes)\n", + std::printf(" size: %zu MB (%zu kB, %zu bytes)\n", input_mb, input_kb, text_len); else if (input_kb) - printf(" size: %zu kB (%zu bytes)\n", + std::printf(" size: %zu kB (%zu bytes)\n", input_kb, text_len); else - printf(" size: %zu bytes\n", text_len); - printf(" strings: %zu\n", strings_len); - puts(""); + std::printf(" size: %zu bytes\n", text_len); + std::printf(" strings: %zu\n", strings_len); + std::puts(""); char *vma_info_text = vma_info(text); char *vma_info_strings = vma_info(strings); - if (strcmp(vma_info_text, vma_info_strings) == 0) { - puts("VMA information for text and string pointer arrays:"); - puts(vma_info_text); + if (std::strcmp(vma_info_text, vma_info_strings) == 0) { + std::puts("VMA information for text and string pointer arrays:"); + std::puts(vma_info_text); } else { - puts("VMA information for text array:"); - puts(vma_info_text); - puts("VMA information for string pointer array:"); - puts(vma_info_strings); + std::puts("VMA information for text array:"); + std::puts(vma_info_text); + std::puts("VMA information for string pointer array:"); + std::puts(vma_info_strings); } - free(vma_info_text); - free(vma_info_strings); + std::free(vma_info_text); + std::free(vma_info_strings); } -static void +void cpu_information(void) { - int i; int maxcpu = -1; size_t cpus_setsize = 0; char *cpus_al = cpus_allowed_list(); cpu_set_t *cpus = cpus_allowed(&cpus_setsize, &maxcpu); if (!cpus_al && !cpus) return; - printf("CPU information:\n"); + std::printf("CPU information:\n"); if (cpus_al) - printf(" CPUs allowed: %s\n", cpus_al); - for (i=0; i < maxcpu; ++i) { + std::printf(" CPUs allowed: %s\n", cpus_al); + for (int i=0; i < maxcpu; ++i) { if (CPU_ISSET_S(i, cpus_setsize, cpus)) { - int min_freq, max_freq; - printf(" CPU%d", i); - min_freq = cpu_scaling_min_freq(i); - max_freq = cpu_scaling_max_freq(i); + std::printf(" CPU%d", i); + int min_freq = cpu_scaling_min_freq(i); + int max_freq = cpu_scaling_max_freq(i); if (min_freq != -1 && max_freq != -1) - printf(", scaling frequencies: [%dMHz .. %dMHz]", + std::printf(", scaling frequencies: [%dMHz .. %dMHz]", max_freq/1000, min_freq/1000); - puts(""); + std::puts(""); } } - putchar('\n'); - free(cpus_al); - free(cpus); + std::putchar('\n'); + std::free(cpus_al); + std::free(cpus); } -static void +void usage(void) { - puts( + std::puts( "String sorting\n" "--------------\n" "\n" @@ -614,18 +601,19 @@ usage(void) "\n"); } -static void +void print_cmdline(int argc, char **argv, FILE *fp) { - int i; if (!fp) return; - fprintf(fp, "Command line:"); - for (i=0; i < argc; ++i) - fprintf(fp, " %s", argv[i]); - fprintf(fp, "\n"); + std::fprintf(fp, "Command line:"); + for (int i=0; i < argc; ++i) + std::fprintf(fp, " %s", argv[i]); + std::fprintf(fp, "\n"); } +} // namespace + int main(int argc, char **argv) { int ret = 0; @@ -689,7 +677,7 @@ int main(int argc, char **argv) opts.text_raw = 1; break; case 1012: - opts.perf_control_fd = atoi(optarg); + opts.perf_control_fd = std::atoi(optarg); break; case '?': default: @@ -697,41 +685,41 @@ int main(int argc, char **argv) } } if (argc - 2 != optind) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: wrong number of arguments.\n"); return 1; } const char *algorithm = argv[optind]; - if (!algorithm || strlen(algorithm) == 0) { - fprintf(stderr, + if (!algorithm || std::strlen(algorithm) == 0) { + std::fprintf(stderr, "ERROR: please specify algorithm name.\n"); return 1; } opts.r = routine_from_name(algorithm); if (!opts.r) { - fprintf(stderr, + std::fprintf(stderr, "ERROR: no match found for algorithm '%s'!\n", algorithm); return 1; } const char *filename = argv[optind+1]; - if (!filename || strlen(filename) == 0) { - fprintf(stderr, + if (!filename || std::strlen(filename) == 0) { + std::fprintf(stderr, "ERROR: please specify input filename.\n"); return 1; } open_log_file(); if (log_file) - fprintf(log_file, "===START===\n"); + std::fprintf(log_file, "===START===\n"); print_cmdline(argc, argv, log_file); routine_information(opts.r); cpu_information(); - unsigned long seed = getpid()*time(0); + unsigned long seed = getpid() * std::time(nullptr); //seed = 0xdeadbeef; srand48(seed); if (log_file) - fprintf(log_file, "Random seed: %lu.\n", seed); - printf("Input (%s): %s ...\n", + std::fprintf(log_file, "Random seed: %lu.\n", seed); + std::printf("Input (%s): %s ...\n", opts.text_raw ? "RAW" : "plain", bazename(filename)); unsigned char *text; @@ -740,7 +728,7 @@ int main(int argc, char **argv) readbytes(filename, &text, &text_len); if (opts.suffixsorting) { if (log_file) - fprintf(log_file, "Suffix sorting mode!\n"); + std::fprintf(log_file, "Suffix sorting mode!\n"); create_suffixes(text, text_len, &strings, &strings_len); } else { create_strings(text, text_len, &strings, &strings_len); @@ -750,8 +738,8 @@ int main(int argc, char **argv) free_text(text, text_len); free_pointers(strings, strings_len); if (log_file) { - fprintf(log_file, "===DONE===\n"); - fclose(log_file); + std::fprintf(log_file, "===DONE===\n"); + std::fclose(log_file); } return ret; } diff --git a/src/util/cpus_allowed.cpp b/src/util/cpus_allowed.cpp index d884398..ac6091b 100644 --- a/src/util/cpus_allowed.cpp +++ b/src/util/cpus_allowed.cpp @@ -127,7 +127,7 @@ read_int_file(const std::string &path) } // namespace -extern "C" char * +char * cpus_allowed_list(void) { std::string s = status_entry("Cpus_allowed_list"); @@ -135,7 +135,7 @@ cpus_allowed_list(void) return strdup_malloc(s); } -extern "C" cpu_set_t * +cpu_set_t * cpus_allowed(size_t *setsize, int *maxcpu) { std::string allowed = status_entry("Cpus_allowed"); @@ -151,7 +151,7 @@ cpus_allowed(size_t *setsize, int *maxcpu) return c; } -extern "C" int +int cpu_scaling_min_freq(int cpu) { std::ostringstream path; @@ -159,7 +159,7 @@ cpu_scaling_min_freq(int cpu) return read_int_file(path.str()); } -extern "C" int +int cpu_scaling_max_freq(int cpu) { std::ostringstream path; diff --git a/src/util/cpus_allowed.h b/src/util/cpus_allowed.h index a0e8cda..55c34bf 100644 --- a/src/util/cpus_allowed.h +++ b/src/util/cpus_allowed.h @@ -23,20 +23,11 @@ #ifndef CPUS_ALLOWED_H #define CPUS_ALLOWED_H -#define _GNU_SOURCE #include -#ifdef __cplusplus -extern "C" { -#endif - char *cpus_allowed_list(void); cpu_set_t *cpus_allowed(size_t *, int *maxcpu); int cpu_scaling_max_freq(int cpu); int cpu_scaling_min_freq(int cpu); -#ifdef __cplusplus -} -#endif - #endif /* CPUS_ALLOWED_H */ diff --git a/src/util/timing.cpp b/src/util/timing.cpp index 5243fb6..91c3bea 100644 --- a/src/util/timing.cpp +++ b/src/util/timing.cpp @@ -52,7 +52,7 @@ ms_between(const timeval &a, const timeval &b) } // namespace -extern "C" void +void timing_start(void) { getrusage(RUSAGE_SELF, &startclock); @@ -60,7 +60,7 @@ timing_start(void) clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &process_cputime_start); } -extern "C" void +void timing_stop(void) { getrusage(RUSAGE_SELF, &stopclock); @@ -68,31 +68,31 @@ timing_stop(void) clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &process_cputime_stop); } -extern "C" double +double gettime_wall_clock(void) { return ms_between(monotonic_start, monotonic_stop); } -extern "C" double +double gettime_user(void) { return ms_between(startclock.ru_utime, stopclock.ru_utime); } -extern "C" double +double gettime_sys(void) { return ms_between(startclock.ru_stime, stopclock.ru_stime); } -extern "C" double +double gettime_user_sys(void) { return gettime_user() + gettime_sys(); } -extern "C" double +double gettime_process_cputime(void) { return ms_between(process_cputime_start, process_cputime_stop); diff --git a/src/util/timing.h b/src/util/timing.h index 1139e48..0eedca1 100644 --- a/src/util/timing.h +++ b/src/util/timing.h @@ -23,10 +23,6 @@ #ifndef TIMING_H #define TIMING_H -#ifdef __cplusplus -extern "C" { -#endif - void timing_start(void); void timing_stop(void); @@ -36,8 +32,4 @@ double gettime_user_sys(void); double gettime_process_cputime(void); double gettime_wall_clock(void); -#ifdef __cplusplus -} -#endif - #endif /* TIMING_H */ diff --git a/src/util/vmainfo.cpp b/src/util/vmainfo.cpp index 3ffd5b1..de7dcd8 100644 --- a/src/util/vmainfo.cpp +++ b/src/util/vmainfo.cpp @@ -115,7 +115,7 @@ strdup_malloc(const std::string &s) } // namespace -extern "C" char * +char * vma_info(void *ptr) { auto target = reinterpret_cast(ptr); diff --git a/src/util/vmainfo.h b/src/util/vmainfo.h index 0b280d7..1e6b5a3 100644 --- a/src/util/vmainfo.h +++ b/src/util/vmainfo.h @@ -23,15 +23,7 @@ #ifndef VMAINFO_H #define VMAINFO_H -#ifdef __cplusplus -extern "C" { -#endif - /* Release return value with free() when no longer needed. */ char *vma_info(void *ptr); -#ifdef __cplusplus -} -#endif - #endif /* VMAINFO_H */ From 36b3187163af117de29fb7f69dd51a80ca72c451 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Sat, 16 May 2026 21:55:56 +0300 Subject: [PATCH 15/16] Update to C++17 standard CMakeLists.txt bumps both target properties from CXX_STANDARD 11 to 17. C_STANDARD stays at 99 (external/*.c is untouched). The algorithm implementation files in src/ compile under the new standard with no source changes -- no removed/deprecated C++17 constructs were in use (no register, auto_ptr, random_shuffle, bind1st/bind2nd, ptr_fun, unary/binary_function, dynamic exception specs, or trigraphs). Cleanups taking advantage of the new standard, scoped to sortstring + util + routines per the user's guidance ("minimal in algorithm files, extensive in sortstring/util"): vma_info now returns std::string instead of a malloc'd char*; the strdup_malloc helper goes away and the caller's free()/strcmp pair collapses to operator==. cpus_allowed_list returns std::string ("" means not present). cpus_allowed returns a cpus_info struct -- the two out-params turn into a structured-binding consumer. cpu_scaling_{min,max}_freq return std::optional, retiring the -1 sentinel. Internal helpers (status_entry, high_bit_order, set_cpu_bits) take string_view. [[nodiscard]] on all four public entry points. Incidental fix at the caller: std::free(cpus) -> CPU_FREE(cpus) (functionally identical on glibc, but spec-correct). routine_from_name returns std::optional and takes string_view; the call site uses if-with-init. routine_get_all returns std::pair, consumed by structured bindings in both sortstring.cpp and unit-test/main.cpp. qsort -> std::sort with a typed lambda comparator. [[nodiscard]] on both. sortstring.cpp: opts bitfields become plain bools with default member initializers; write_filename is std::string. Two asprintf sites (log file and write-output path) become std::string concatenation, dropping the malloc/free/return-code dance. bazename returns std::string, retiring the static char buf[300] (not thread-safe, though never actually exercised concurrently here). [[nodiscard]] on alloc_bytes, alloc_text, alloc_pointers, file_size. util/debug.h: [[nodiscard]] on check_result. Verified output byte-identical to pre-change binary for -L, -A, --help, full sort run (regular and taskset-restricted), and all error paths (modulo ASLR addresses, random seed, timing values, and RSS/Pss). Unit tests pass. Algorithm-file object code unchanged in shape (standard bump only). Co-Authored-By: Claude Opus 4.7 (1M context) --- CMakeLists.txt | 4 +- README.md | 2 +- src/routines.cpp | 37 ++++------ src/routines.h | 8 ++- src/sortstring.cpp | 140 ++++++++++++++++++-------------------- src/util/cpus_allowed.cpp | 60 +++++++--------- src/util/cpus_allowed.h | 18 +++-- src/util/debug.h | 2 +- src/util/vmainfo.cpp | 18 ++--- src/util/vmainfo.h | 5 +- unit-test/main.cpp | 4 +- 11 files changed, 136 insertions(+), 162 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 990c527..cb89ab5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,7 +112,7 @@ foreach(tgt sortstring_internal sortstring_internal_unittest) target_link_libraries(${tgt} PUBLIC OpenMP::OpenMP_C OpenMP::OpenMP_CXX) set_target_properties(${tgt} PROPERTIES C_STANDARD 99 C_STANDARD_REQUIRED YES - CXX_STANDARD 11 CXX_STANDARD_REQUIRED YES) + CXX_STANDARD 17 CXX_STANDARD_REQUIRED YES) endforeach() if(ENABLE_GCC_ANALYZER) @@ -135,4 +135,4 @@ target_compile_definitions(unit-test PRIVATE UNIT_TEST) set_target_properties(sortstring unit-test PROPERTIES C_STANDARD 99 C_STANDARD_REQUIRED YES - CXX_STANDARD 11 CXX_STANDARD_REQUIRED YES) + CXX_STANDARD 17 CXX_STANDARD_REQUIRED YES) diff --git a/README.md b/README.md index afbd398..4f0ab60 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ purposes, and are copyright by their respective authors. Requirements ------------ - * C++11 + * C++17 * CMake >= 3.16 * OpenMP * Ninja (optional; the default Make generator also works) diff --git a/src/routines.cpp b/src/routines.cpp index 3f69b21..8184c3c 100644 --- a/src/routines.cpp +++ b/src/routines.cpp @@ -22,6 +22,7 @@ #include "routines.h" +#include #include #include @@ -32,20 +33,6 @@ constexpr unsigned ROUTINES_MAX = 256; const routine *routines[ROUTINES_MAX]; unsigned routine_cnt; -int -routine_cmp(const void *a, const void *b) -{ - const routine *aa = *static_cast(a); - const routine *bb = *static_cast(b); - if (aa->f == bb->f) - return 0; - if (aa->multicore < bb->multicore) - return -1; - if (aa->multicore > bb->multicore) - return 1; - return std::strcmp(aa->name, bb->name); -} - } // namespace extern "C" void @@ -62,19 +49,23 @@ routine_register(const routine *r) routines[routine_cnt++] = r; } -const routine * -routine_from_name(const char *name) +std::optional +routine_from_name(std::string_view name) { for (unsigned i = 0; i < routine_cnt; ++i) - if (std::strcmp(name, routines[i]->name) == 0) + if (name == routines[i]->name) return routines[i]; - return nullptr; + return std::nullopt; } -void -routine_get_all(const routine ***r, unsigned *cnt) +std::pair +routine_get_all() { - *r = routines; - *cnt = routine_cnt; - std::qsort(*r, *cnt, sizeof(routine *), routine_cmp); + std::sort(routines, routines + routine_cnt, + [](const routine *a, const routine *b) { + if (a->multicore != b->multicore) + return a->multicore < b->multicore; + return std::strcmp(a->name, b->name) < 0; + }); + return {routines, routine_cnt}; } diff --git a/src/routines.h b/src/routines.h index aff46fb..00a6878 100644 --- a/src/routines.h +++ b/src/routines.h @@ -25,7 +25,11 @@ #include "routine.h" -const routine *routine_from_name(const char *); -void routine_get_all(const routine ***, unsigned *); +#include +#include +#include + +[[nodiscard]] std::optional routine_from_name(std::string_view name); +[[nodiscard]] std::pair routine_get_all(); #endif /* ROUTINES_H */ diff --git a/src/sortstring.cpp b/src/sortstring.cpp index af586fe..7acc985 100644 --- a/src/sortstring.cpp +++ b/src/sortstring.cpp @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include #include #include @@ -44,17 +46,17 @@ namespace { struct { - const routine *r; - char *write_filename; - unsigned suffixsorting : 1; - unsigned check_result : 1; - unsigned oprofile : 1; - unsigned write : 1; - unsigned xml_stats : 1; - unsigned hugetlb_text : 1; - unsigned hugetlb_pointers : 1; - unsigned text_raw : 1; - int perf_control_fd; + const routine *r = nullptr; + std::string write_filename; + bool suffixsorting = false; + bool check_result = false; + bool oprofile = false; + bool write = false; + bool xml_stats = false; + bool hugetlb_text = false; + bool hugetlb_pointers = false; + bool text_raw = false; + int perf_control_fd = 0; } opts; FILE *log_file; @@ -62,19 +64,14 @@ FILE *log_file; void open_log_file(void) { - char *log_fn = nullptr; - char *host = nullptr; if (log_file) return; - host = std::getenv("HOSTNAME"); - if (host && *host) - if (asprintf(&log_fn, "sortstring_log_%s", host) == -1) - log_fn = nullptr; - if (log_fn) - log_file = std::fopen(log_fn, "a"); - else - log_file = std::fopen("sortstring_log", "a"); - std::free(log_fn); + std::string log_fn = "sortstring_log"; + if (const char *host = std::getenv("HOSTNAME"); host && *host) { + log_fn += '_'; + log_fn += host; + } + log_file = std::fopen(log_fn.c_str(), "a"); if (log_file) setlinebuf(log_file); } @@ -139,15 +136,14 @@ opcontrol_stop(void) } } -char * -bazename(const char *fname) +std::string +bazename(std::string_view fname) { - static char buf[300]; - std::strcpy(buf, fname); - return basename(buf); + std::string copy(fname); + return basename(copy.data()); } -void * +[[nodiscard]] void * alloc_bytes(size_t bytes, int hugetlb) { int map_flags = MAP_ANONYMOUS | MAP_PRIVATE; @@ -163,13 +159,13 @@ alloc_bytes(size_t bytes, int hugetlb) return p; } -unsigned char * +[[nodiscard]] unsigned char * alloc_text(size_t bytes) { return static_cast(alloc_bytes(bytes, opts.hugetlb_text)); } -unsigned char ** +[[nodiscard]] unsigned char ** alloc_pointers(size_t num) { return static_cast( @@ -188,7 +184,7 @@ free_pointers(unsigned char **strings, size_t strings_len) munmap(strings, strings_len); } -off_t +[[nodiscard]] off_t file_size(int fd) { off_t size = lseek(fd, 0, SEEK_END); @@ -355,15 +351,15 @@ create_suffixes(unsigned char *text, size_t text_len, void write_result(unsigned char **strings, size_t n) { - FILE *fp; - if (!opts.write_filename) { + if (opts.write_filename.empty()) { const char *username = std::getenv("USERNAME"); if (!username) username = ""; - if (asprintf(&opts.write_filename, "/tmp/%s/alg.out", username) == -1) - opts.write_filename = nullptr; + opts.write_filename = "/tmp/"; + opts.write_filename += username; + opts.write_filename += "/alg.out"; } - fp = std::fopen(opts.write_filename, "w"); + FILE *fp = std::fopen(opts.write_filename.c_str(), "w"); if (!fp) { std::fprintf(stderr, "WARNING: --write failed: " @@ -376,7 +372,7 @@ write_result(unsigned char **strings, size_t n) } std::fclose(fp); std::fprintf(stderr, "Wrote sorted output to '%s'.\n", - opts.write_filename); + opts.write_filename.c_str()); } void @@ -442,9 +438,8 @@ run(const routine *r, unsigned char **strings, size_t n) void print_alg_names_and_descs(void) { - const routine **routines; - unsigned i = 0, routines_cnt; - routine_get_all(&routines, &routines_cnt); + auto [routines, routines_cnt] = routine_get_all(); + unsigned i = 0; if (routines[0]->multicore == 0) { std::puts(":: SINGLE CORE ROUTINES ::::::::::::::::::::::::::::::::::::::::::::::::::::::::"); std::puts(":: NAME :::::::::::::::::::::: DESCRIPTION :::::::::::::::::::::::::::::::::::::"); @@ -472,9 +467,7 @@ print_alg_names_and_descs(void) void print_alg_names(void) { - const routine **routines; - unsigned routines_cnt; - routine_get_all(&routines, &routines_cnt); + auto [routines, routines_cnt] = routine_get_all(); for (unsigned i=0; i < routines_cnt; ++i) std::puts(routines[i]->name); } @@ -505,47 +498,43 @@ input_information(unsigned char *text, size_t text_len, std::printf(" size: %zu bytes\n", text_len); std::printf(" strings: %zu\n", strings_len); std::puts(""); - char *vma_info_text = vma_info(text); - char *vma_info_strings = vma_info(strings); - if (std::strcmp(vma_info_text, vma_info_strings) == 0) { + std::string vma_info_text = vma_info(text); + std::string vma_info_strings = vma_info(strings); + if (vma_info_text == vma_info_strings) { std::puts("VMA information for text and string pointer arrays:"); - std::puts(vma_info_text); + std::puts(vma_info_text.c_str()); } else { std::puts("VMA information for text array:"); - std::puts(vma_info_text); + std::puts(vma_info_text.c_str()); std::puts("VMA information for string pointer array:"); - std::puts(vma_info_strings); + std::puts(vma_info_strings.c_str()); } - std::free(vma_info_text); - std::free(vma_info_strings); } void cpu_information(void) { - int maxcpu = -1; - size_t cpus_setsize = 0; - char *cpus_al = cpus_allowed_list(); - cpu_set_t *cpus = cpus_allowed(&cpus_setsize, &maxcpu); - if (!cpus_al && !cpus) + std::string cpus_al = cpus_allowed_list(); + auto [cpus, cpus_setsize, maxcpu] = cpus_allowed(); + if (cpus_al.empty() && !cpus) return; std::printf("CPU information:\n"); - if (cpus_al) - std::printf(" CPUs allowed: %s\n", cpus_al); + if (!cpus_al.empty()) + std::printf(" CPUs allowed: %s\n", cpus_al.c_str()); for (int i=0; i < maxcpu; ++i) { if (CPU_ISSET_S(i, cpus_setsize, cpus)) { std::printf(" CPU%d", i); - int min_freq = cpu_scaling_min_freq(i); - int max_freq = cpu_scaling_max_freq(i); - if (min_freq != -1 && max_freq != -1) + auto min_freq = cpu_scaling_min_freq(i); + auto max_freq = cpu_scaling_max_freq(i); + if (min_freq && max_freq) std::printf(", scaling frequencies: [%dMHz .. %dMHz]", - max_freq/1000, min_freq/1000); + *max_freq/1000, *min_freq/1000); std::puts(""); } } std::putchar('\n'); - std::free(cpus_al); - std::free(cpus); + if (cpus) + CPU_FREE(cpus); } void @@ -651,30 +640,30 @@ int main(int argc, char **argv) print_alg_names(); return 0; case 1003: - opts.check_result = 1; + opts.check_result = true; break; case 1004: - opts.suffixsorting = 1; + opts.suffixsorting = true; break; case 1005: - opts.write = 1; + opts.write = true; if (optarg) opts.write_filename = optarg; break; case 1007: - opts.oprofile = 1; + opts.oprofile = true; break; case 1008: - opts.xml_stats = 1; + opts.xml_stats = true; break; case 1009: - opts.hugetlb_text = 1; + opts.hugetlb_text = true; break; case 1010: - opts.hugetlb_pointers = 1; + opts.hugetlb_pointers = true; break; case 1011: - opts.text_raw = 1; + opts.text_raw = true; break; case 1012: opts.perf_control_fd = std::atoi(optarg); @@ -695,8 +684,9 @@ int main(int argc, char **argv) "ERROR: please specify algorithm name.\n"); return 1; } - opts.r = routine_from_name(algorithm); - if (!opts.r) { + if (auto r = routine_from_name(algorithm); r) { + opts.r = *r; + } else { std::fprintf(stderr, "ERROR: no match found for algorithm '%s'!\n", algorithm); @@ -721,7 +711,7 @@ int main(int argc, char **argv) std::fprintf(log_file, "Random seed: %lu.\n", seed); std::printf("Input (%s): %s ...\n", opts.text_raw ? "RAW" : "plain", - bazename(filename)); + bazename(filename).c_str()); unsigned char *text; unsigned char **strings; size_t text_len, strings_len; diff --git a/src/util/cpus_allowed.cpp b/src/util/cpus_allowed.cpp index ac6091b..b361699 100644 --- a/src/util/cpus_allowed.cpp +++ b/src/util/cpus_allowed.cpp @@ -23,34 +23,24 @@ #include "cpus_allowed.h" #include -#include -#include #include #include #include +#include namespace { -char * -strdup_malloc(const std::string &s) -{ - char *p = static_cast(std::malloc(s.size() + 1)); - if (!p) std::abort(); - std::memcpy(p, s.c_str(), s.size() + 1); - return p; -} - /* Read /proc/self/status, return the (whitespace-trimmed) value of the * requested key, or an empty string if not present. */ std::string -status_entry(const std::string &key) +status_entry(std::string_view key) { std::ifstream f("/proc/self/status"); if (!f) return std::string(); for (std::string line; std::getline(f, line); ) { auto colon = line.find(':'); if (colon == std::string::npos) continue; - if (line.compare(0, colon, key) != 0) continue; + if (std::string_view(line).substr(0, colon) != key) continue; auto v = colon + 1; while (v < line.size() && (line[v] == ' ' || line[v] == '\t')) ++v; @@ -78,7 +68,7 @@ hex_val(char ch) * digits on the right). Non-hex characters (e.g. comma separators) are * skipped. Returns -1 if no bits are set. */ int -high_bit_order(const std::string &allowed) +high_bit_order(std::string_view allowed) { int order = -1; int i = 0; @@ -100,7 +90,7 @@ high_bit_order(const std::string &allowed) } void -set_cpu_bits(const std::string &allowed, cpu_set_t *c, size_t setsize) +set_cpu_bits(std::string_view allowed, cpu_set_t *c, size_t setsize) { int i = 0; for (auto it = allowed.rbegin(); it != allowed.rend(); ++it) { @@ -115,43 +105,43 @@ set_cpu_bits(const std::string &allowed, cpu_set_t *c, size_t setsize) } } -int +std::optional read_int_file(const std::string &path) { std::ifstream f(path); - if (!f) return -1; + if (!f) return std::nullopt; int v; - if (!(f >> v)) return -1; + if (!(f >> v)) return std::nullopt; return v; } } // namespace -char * -cpus_allowed_list(void) +std::string +cpus_allowed_list() { - std::string s = status_entry("Cpus_allowed_list"); - if (s.empty()) return nullptr; - return strdup_malloc(s); + return status_entry("Cpus_allowed_list"); } -cpu_set_t * -cpus_allowed(size_t *setsize, int *maxcpu) +cpus_info +cpus_allowed() { + cpus_info info; std::string allowed = status_entry("Cpus_allowed"); - if (allowed.empty()) return nullptr; + if (allowed.empty()) return info; int top = high_bit_order(allowed); - if (top == -1) return nullptr; - *maxcpu = top; + if (top == -1) return info; cpu_set_t *c = CPU_ALLOC(top + 1); - if (!c) return nullptr; - *setsize = CPU_ALLOC_SIZE(top + 1); - CPU_ZERO_S(*setsize, c); - set_cpu_bits(allowed, c, *setsize); - return c; + if (!c) return info; + info.set = c; + info.setsize = CPU_ALLOC_SIZE(top + 1); + info.maxcpu = top; + CPU_ZERO_S(info.setsize, info.set); + set_cpu_bits(allowed, info.set, info.setsize); + return info; } -int +std::optional cpu_scaling_min_freq(int cpu) { std::ostringstream path; @@ -159,7 +149,7 @@ cpu_scaling_min_freq(int cpu) return read_int_file(path.str()); } -int +std::optional cpu_scaling_max_freq(int cpu) { std::ostringstream path; diff --git a/src/util/cpus_allowed.h b/src/util/cpus_allowed.h index 55c34bf..1bc5178 100644 --- a/src/util/cpus_allowed.h +++ b/src/util/cpus_allowed.h @@ -25,9 +25,19 @@ #include -char *cpus_allowed_list(void); -cpu_set_t *cpus_allowed(size_t *, int *maxcpu); -int cpu_scaling_max_freq(int cpu); -int cpu_scaling_min_freq(int cpu); +#include +#include +#include + +struct cpus_info { + cpu_set_t *set = nullptr; + size_t setsize = 0; + int maxcpu = 0; +}; + +[[nodiscard]] std::string cpus_allowed_list(); +[[nodiscard]] cpus_info cpus_allowed(); +[[nodiscard]] std::optional cpu_scaling_max_freq(int cpu); +[[nodiscard]] std::optional cpu_scaling_min_freq(int cpu); #endif /* CPUS_ALLOWED_H */ diff --git a/src/util/debug.h b/src/util/debug.h index 6eed602..74f6ad1 100644 --- a/src/util/debug.h +++ b/src/util/debug.h @@ -38,7 +38,7 @@ static std::string __debug_indent_str; #endif #endif /* __cplusplus */ -static inline int +[[nodiscard]] static inline int check_result(unsigned char **strings, size_t n) { if (n < 2) return 0; diff --git a/src/util/vmainfo.cpp b/src/util/vmainfo.cpp index de7dcd8..d4ebb34 100644 --- a/src/util/vmainfo.cpp +++ b/src/util/vmainfo.cpp @@ -23,11 +23,10 @@ #include "vmainfo.h" #include -#include -#include #include #include #include +#include #include /* Format the /proc/pid/smaps key-value pairs into two columns: @@ -104,20 +103,11 @@ format(const Entry &e) return out.str(); } -char * -strdup_malloc(const std::string &s) -{ - char *p = static_cast(std::malloc(s.size() + 1)); - if (!p) std::abort(); - std::memcpy(p, s.c_str(), s.size() + 1); - return p; -} - } // namespace -char * -vma_info(void *ptr) +std::string +vma_info(const void *ptr) { auto target = reinterpret_cast(ptr); - return strdup_malloc(format(find_entry(target))); + return format(find_entry(target)); } diff --git a/src/util/vmainfo.h b/src/util/vmainfo.h index 1e6b5a3..4e52fe5 100644 --- a/src/util/vmainfo.h +++ b/src/util/vmainfo.h @@ -23,7 +23,8 @@ #ifndef VMAINFO_H #define VMAINFO_H -/* Release return value with free() when no longer needed. */ -char *vma_info(void *ptr); +#include + +[[nodiscard]] std::string vma_info(const void *ptr); #endif /* VMAINFO_H */ diff --git a/unit-test/main.cpp b/unit-test/main.cpp index b1bea77..abd3ed1 100644 --- a/unit-test/main.cpp +++ b/unit-test/main.cpp @@ -209,9 +209,7 @@ test_routines() { std::cerr<<__PRETTY_FUNCTION__< Date: Sat, 16 May 2026 23:05:52 +0300 Subject: [PATCH 16/16] cpu_information: report turbo/boost state, hardware peak, and P/E class The existing per-CPU line printed only the kernel governor's scaling window via scaling_min_freq / scaling_max_freq. On a system with boost disabled, or on hybrid CPUs where different cores have different boost ceilings, that window is the same number on every core and conveys nothing about whether the CPU can actually run faster than displayed. Add three signals so the benchmark banner reflects what the silicon can do: 1. Turbo/boost state -- a single global line. Reads /sys/devices/system/cpu/cpufreq/boost first (AMD CPB, acpi-cpufreq, amd-pstate); falls back to /sys/devices/system/cpu/intel_pstate/no_turbo (intel_pstate, inverse-sense). Reported as on/off; the line is omitted when neither file is readable. 2. Hardware peak frequency -- per CPU, when it exceeds the scaling cap. Reads amd_pstate_max_freq first (the per-core boost ceiling reported by amd-pstate; differs across cores on AMD hybrid parts), then cpuinfo_max_freq (Intel turbo cap). Suppressed when equal to scaling_max_freq so the line stays short on non-hybrid / boost-on systems. 3. P-core / E-core label -- per CPU. Resolved from /sys/devices/cpu_core/cpus (P) and /sys/devices/cpu_atom/cpus (E), the Intel hybrid PMU sysfs. Empty on non-hybrid Intel and on AMD (where the hardware-peak value already exposes the split, e.g. 5090 MHz Zen 5 vs 3506 MHz Zen 5c on a Ryzen AI 7 PRO 350). Sample on the AMD Ryzen AI 7 PRO 350 (boost disabled, 2 GHz scaling cap): CPU information: CPUs allowed: 0-1,4-5 Turbo/boost: off CPU0, scaling [623MHz .. 2000MHz], hw peak 5090MHz CPU1, scaling [623MHz .. 2000MHz], hw peak 3506MHz CPU4, scaling [623MHz .. 2000MHz], hw peak 5090MHz CPU5, scaling [623MHz .. 2000MHz], hw peak 3506MHz Incidental cleanup: the scaling range now prints [min .. max] (was [max .. min]) since "scaling ... hw peak X" only reads naturally with an ascending range. API shape: cpu_scaling_min_freq / cpu_scaling_max_freq collapse into a single cpu_info_for(cpu) returning a struct (scaling min/max, hw peak, core class). cpu_boost_enabled() returns std::optional. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/sortstring.cpp | 26 +++++++++----- src/util/cpus_allowed.cpp | 71 +++++++++++++++++++++++++++++++++------ src/util/cpus_allowed.h | 16 +++++++-- 3 files changed, 93 insertions(+), 20 deletions(-) diff --git a/src/sortstring.cpp b/src/sortstring.cpp index 7acc985..87409ca 100644 --- a/src/sortstring.cpp +++ b/src/sortstring.cpp @@ -521,16 +521,26 @@ cpu_information(void) std::printf("CPU information:\n"); if (!cpus_al.empty()) std::printf(" CPUs allowed: %s\n", cpus_al.c_str()); + if (auto boost = cpu_boost_enabled()) + std::printf(" Turbo/boost: %s\n", *boost ? "on" : "off"); for (int i=0; i < maxcpu; ++i) { - if (CPU_ISSET_S(i, cpus_setsize, cpus)) { - std::printf(" CPU%d", i); - auto min_freq = cpu_scaling_min_freq(i); - auto max_freq = cpu_scaling_max_freq(i); - if (min_freq && max_freq) - std::printf(", scaling frequencies: [%dMHz .. %dMHz]", - *max_freq/1000, *min_freq/1000); - std::puts(""); + if (!CPU_ISSET_S(i, cpus_setsize, cpus)) + continue; + const cpu_info info = cpu_info_for(i); + std::printf(" CPU%d", i); + switch (info.klass) { + case cpu_info::core_class::performance: std::printf(" (P-core)"); break; + case cpu_info::core_class::efficiency: std::printf(" (E-core)"); break; + case cpu_info::core_class::unknown: break; } + if (info.scaling_min_khz && info.scaling_max_khz) + std::printf(", scaling [%dMHz .. %dMHz]", + *info.scaling_min_khz/1000, + *info.scaling_max_khz/1000); + if (info.hw_max_khz && info.scaling_max_khz && + *info.hw_max_khz > *info.scaling_max_khz) + std::printf(", hw peak %dMHz", *info.hw_max_khz/1000); + std::puts(""); } std::putchar('\n'); if (cpus) diff --git a/src/util/cpus_allowed.cpp b/src/util/cpus_allowed.cpp index b361699..726891a 100644 --- a/src/util/cpus_allowed.cpp +++ b/src/util/cpus_allowed.cpp @@ -115,6 +115,36 @@ read_int_file(const std::string &path) return v; } +/* Parse a Linux cpulist string ("0-3,5,7-9") and report whether `cpu` + * is a member. Returns std::nullopt if the file cannot be read; false + * if the file is readable but `cpu` is not listed. */ +std::optional +cpu_in_cpulist_file(int cpu, const std::string &path) +{ + std::ifstream f(path); + if (!f) return std::nullopt; + std::string list; + std::getline(f, list); + std::stringstream ss(list); + std::string tok; + while (std::getline(ss, tok, ',')) { + auto dash = tok.find('-'); + int lo, hi; + try { + if (dash == std::string::npos) { + lo = hi = std::stoi(tok); + } else { + lo = std::stoi(tok.substr(0, dash)); + hi = std::stoi(tok.substr(dash + 1)); + } + } catch (...) { + continue; + } + if (cpu >= lo && cpu <= hi) return true; + } + return false; +} + } // namespace std::string @@ -141,18 +171,39 @@ cpus_allowed() return info; } -std::optional -cpu_scaling_min_freq(int cpu) +cpu_info +cpu_info_for(int cpu) { - std::ostringstream path; - path << "/sys/devices/system/cpu/cpu" << cpu << "/cpufreq/scaling_min_freq"; - return read_int_file(path.str()); + std::ostringstream base; + base << "/sys/devices/system/cpu/cpu" << cpu << "/cpufreq/"; + const std::string b = base.str(); + + cpu_info info; + info.scaling_min_khz = read_int_file(b + "scaling_min_freq"); + info.scaling_max_khz = read_int_file(b + "scaling_max_freq"); + /* Hardware peak (boost-capable) frequency: + * - AMD pstate: amd_pstate_max_freq is the per-core boost cap; + * cpuinfo_max_freq tracks scaling_max_freq when boost is off. + * - Intel: cpuinfo_max_freq is the turbo cap. */ + info.hw_max_khz = read_int_file(b + "amd_pstate_max_freq"); + if (!info.hw_max_khz) + info.hw_max_khz = read_int_file(b + "cpuinfo_max_freq"); + /* Intel hybrid topology: cpu_core PMU lists P-cores, cpu_atom E-cores. */ + if (cpu_in_cpulist_file(cpu, "/sys/devices/cpu_core/cpus").value_or(false)) + info.klass = cpu_info::core_class::performance; + else if (cpu_in_cpulist_file(cpu, "/sys/devices/cpu_atom/cpus").value_or(false)) + info.klass = cpu_info::core_class::efficiency; + return info; } -std::optional -cpu_scaling_max_freq(int cpu) +/* Turbo/boost state, reported as the inverse-sense pair the kernel + * exposes for each driver family. */ +std::optional +cpu_boost_enabled() { - std::ostringstream path; - path << "/sys/devices/system/cpu/cpu" << cpu << "/cpufreq/scaling_max_freq"; - return read_int_file(path.str()); + if (auto v = read_int_file("/sys/devices/system/cpu/cpufreq/boost")) + return *v != 0; + if (auto v = read_int_file("/sys/devices/system/cpu/intel_pstate/no_turbo")) + return *v == 0; + return std::nullopt; } diff --git a/src/util/cpus_allowed.h b/src/util/cpus_allowed.h index 1bc5178..5e3ca60 100644 --- a/src/util/cpus_allowed.h +++ b/src/util/cpus_allowed.h @@ -35,9 +35,21 @@ struct cpus_info { int maxcpu = 0; }; +struct cpu_info { + enum class core_class { + unknown, + performance, // Intel P-core (cpu_core PMU) + efficiency, // Intel E-core (cpu_atom PMU) + }; + std::optional scaling_min_khz; + std::optional scaling_max_khz; + std::optional hw_max_khz; // amd_pstate_max_freq or cpuinfo_max_freq + core_class klass = core_class::unknown; +}; + [[nodiscard]] std::string cpus_allowed_list(); [[nodiscard]] cpus_info cpus_allowed(); -[[nodiscard]] std::optional cpu_scaling_max_freq(int cpu); -[[nodiscard]] std::optional cpu_scaling_min_freq(int cpu); +[[nodiscard]] cpu_info cpu_info_for(int cpu); +[[nodiscard]] std::optional cpu_boost_enabled(); #endif /* CPUS_ALLOWED_H */