Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ jobs:
fail-fast: false
matrix:
build_type: [Release, Debug]
os: [ubuntu-20.04, ubuntu-18.04]
os: [ubuntu-22.04, ubuntu-24.04]
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v6
- name: Install dependencies
run: sudo apt-get install cmake
run: sudo apt-get install -y cmake systemtap-sdt-dev
- name: cmake
run: cmake -B builddir -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
- name: make
Expand All @@ -28,11 +28,11 @@ jobs:
fail-fast: false
matrix:
build_type: [Release, Debug]
os: [ubuntu-20.04]
os: [ubuntu-22.04, ubuntu-24.04]
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v6
- name: Install dependencies
run: sudo apt-get install cmake clang systemtap-sdt-dev
run: sudo apt-get install -y cmake clang libomp-dev systemtap-sdt-dev
- name: cmake
run: CC=clang CXX=clang++ cmake -B builddir -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
- name: make
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/build*
119 changes: 91 additions & 28 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,58 @@
cmake_minimum_required(VERSION 3.1)
set(CMAKE_CXX_STANDARD 11)
cmake_minimum_required(VERSION 3.16)
project(sortstring C CXX)

include(CheckIncludeFile)

project(sortstring)
include_directories(src src/util)
option(ENABLE_GCC_ANALYZER "Enable GCC -fanalyzer static analysis on internal sources" OFF)
if(ENABLE_GCC_ANALYZER)
if(NOT CMAKE_C_COMPILER_ID STREQUAL "GNU" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
message(FATAL_ERROR "ENABLE_GCC_ANALYZER requires GCC for both C and C++ "
"(got C=${CMAKE_C_COMPILER_ID}, CXX=${CMAKE_CXX_COMPILER_ID}).")
endif()
if(CMAKE_C_COMPILER_VERSION VERSION_LESS 13 OR CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13)
message(FATAL_ERROR "ENABLE_GCC_ANALYZER requires GCC >= 13 (got "
"C=${CMAKE_C_COMPILER_VERSION}, CXX=${CMAKE_CXX_COMPILER_VERSION}).")
endif()
endif()

option(ENABLE_CLANG_ANALYZER "Enable clang-tidy clang-analyzer-* checks on internal sources" OFF)
if(ENABLE_CLANG_ANALYZER)
find_program(CLANG_TIDY_EXECUTABLE NAMES clang-tidy)
if(NOT CLANG_TIDY_EXECUTABLE)
message(FATAL_ERROR "ENABLE_CLANG_ANALYZER requires clang-tidy on PATH.")
endif()
set(CLANG_TIDY_COMMAND
${CLANG_TIDY_EXECUTABLE}
-checks=-*,clang-analyzer-*
-header-filter=^${CMAKE_SOURCE_DIR}/src/.*)
endif()

set(MARCH "native" CACHE STRING
"Value passed to -march=. Override to target a different ISA level.")

check_include_file(sys/sdt.h HAVE_SYS_SDT_H)

find_package(OpenMP REQUIRED)

add_compile_options(-Wall -Wextra -march=${MARCH})

link_libraries(rt)
string(APPEND CMAKE_C_FLAGS_RELEASE " -g")
string(APPEND CMAKE_CXX_FLAGS_RELEASE " -g")

string(APPEND CMAKE_C_FLAGS_DEBUG " -O1 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2")
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -O1 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2")

set(INTERNAL_SRCS
src/funnelsort.cpp
src/funnelsort_bfs_128way.cpp
src/funnelsort_bfs_16way.cpp
src/funnelsort_bfs_32way.cpp
src/funnelsort_bfs_64way.cpp
src/funnelsort_bfs_8way.cpp
src/funnelsort_dfs_128way.cpp
src/funnelsort_dfs_16way.cpp
src/funnelsort_dfs_32way.cpp
src/funnelsort_dfs_64way.cpp
src/funnelsort_dfs_8way.cpp
src/msd_a.cpp
src/msd_a2.cpp
src/msd_lsd.cpp
Expand All @@ -28,10 +72,10 @@ set(INTERNAL_SRCS
src/mergesort_unstable.cpp
src/mergesort_losertree.cpp
src/mergesort_lcp.cpp
src/routines.c
src/util/timing.c
src/util/cpus_allowed.c
src/util/vmainfo.c)
src/routines.cpp
src/util/timing.cpp
src/util/cpus_allowed.cpp
src/util/vmainfo.cpp)

set(EXTERNAL_SRCS
external/lcp-quicksort.cpp
Expand All @@ -50,26 +94,45 @@ set(EXTERNAL_SRCS
external/forward16.c
external/parallel_string_radix_sort.cpp)

check_include_file(sys/sdt.h HAVE_SYS_SDT_H)
if(HAVE_SYS_SDT_H)
add_definitions(-DHAVE_SYS_SDT_H=1)
endif()

set_source_files_properties(external/adaptive.c PROPERTIES COMPILE_FLAGS -Wno-sign-compare)
set_source_files_properties(external/adaptive.c PROPERTIES COMPILE_FLAGS -Wno-sign-compare)
set_source_files_properties(external/quicksort.c PROPERTIES COMPILE_FLAGS -Wno-sign-compare)

add_executable(sortstring src/sortstring.c ${INTERNAL_SRCS} ${EXTERNAL_SRCS})
add_library(sortstring_internal OBJECT ${INTERNAL_SRCS})
add_library(sortstring_internal_unittest OBJECT ${INTERNAL_SRCS})
target_compile_definitions(sortstring_internal_unittest PRIVATE UNIT_TEST)

foreach(tgt sortstring_internal sortstring_internal_unittest)
target_include_directories(${tgt} PUBLIC
${CMAKE_SOURCE_DIR}/src
${CMAKE_SOURCE_DIR}/src/util)
target_compile_definitions(${tgt} PUBLIC restrict=__restrict__)
if(HAVE_SYS_SDT_H)
target_compile_definitions(${tgt} PUBLIC HAVE_SYS_SDT_H=1)
endif()
target_link_libraries(${tgt} PUBLIC OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
set_target_properties(${tgt} PROPERTIES
C_STANDARD 99 C_STANDARD_REQUIRED YES
CXX_STANDARD 17 CXX_STANDARD_REQUIRED YES)
endforeach()

if(ENABLE_GCC_ANALYZER)
target_compile_options(sortstring_internal PRIVATE -fanalyzer)
target_compile_options(sortstring_internal_unittest PRIVATE -fanalyzer)
endif()

if(ENABLE_CLANG_ANALYZER)
set_target_properties(sortstring_internal sortstring_internal_unittest PROPERTIES
C_CLANG_TIDY "${CLANG_TIDY_COMMAND}"
CXX_CLANG_TIDY "${CLANG_TIDY_COMMAND}")
endif()

add_executable(unit-test unit-test/main.cpp ${INTERNAL_SRCS} ${EXTERNAL_SRCS})
target_compile_definitions(unit-test PUBLIC UNIT_TEST)
add_executable(sortstring src/sortstring.cpp ${EXTERNAL_SRCS})
target_link_libraries(sortstring PRIVATE sortstring_internal rt)

add_definitions(-Drestrict=__restrict__)
set(CMAKE_CXX_FLAGS_RELEASE "-fopenmp -g -DNDEBUG -march=native ${CMAKE_CXX_FLAGS_RELEASE}")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-fopenmp -g -DNDEBUG -march=native ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
set(CMAKE_C_FLAGS_RELEASE "-fopenmp -g -DNDEBUG -march=native ${CMAKE_C_FLAGS_RELEASE}")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-fopenmp -g -DNDEBUG -march=native ${CMAKE_C_FLAGS_RELWITHDEBINFO}")
set(CMAKE_CXX_FLAGS "-Wall -Wextra ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-Wall -Wextra -std=c99 ${CMAKE_C_FLAGS}")
add_executable(unit-test unit-test/main.cpp ${EXTERNAL_SRCS})
target_link_libraries(unit-test PRIVATE sortstring_internal_unittest rt)
target_compile_definitions(unit-test PRIVATE UNIT_TEST)

set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O1 -g -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O1 -g -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2")
set_target_properties(sortstring unit-test PROPERTIES
C_STANDARD 99 C_STANDARD_REQUIRED YES
CXX_STANDARD 17 CXX_STANDARD_REQUIRED YES)
62 changes: 51 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,27 +40,67 @@ purposes, and are copyright by their respective authors.
Requirements
------------

* C++11
* CMake
* C++17
* CMake >= 3.16
* OpenMP
* Ninja (optional; the default Make generator also works)


Compilation
-----------

Default compilation with GCC:

$ git clone git://github.com/rantala/string-sorting.git
$ mkdir string-sorting-build
$ cd string-sorting-build
$ cmake -DCMAKE_BUILD_TYPE=Release ../string-sorting
$ make
$ ./sortstring
$ git clone https://github.com/rantala/string-sorting.git
$ cd string-sorting
$ cmake -B build -G Ninja && ninja -C build
$ ./build/sortstring

Use a separate debug build for easier debugging:

$ mkdir debug-build
$ cd debug-build
$ cmake -DCMAKE_BUILD_TYPE=Debug ../string-sorting
$ cmake -B build-debug -G Ninja -DCMAKE_BUILD_TYPE=Debug && ninja -C build-debug


GCC static analyzer
-------------------

The build can be configured to run the GCC static analyzer (`-fanalyzer`) on
the project's own sources. Third-party code under `external/` is excluded.

$ cmake -B build-gcc-analyzer -G Ninja -DENABLE_GCC_ANALYZER=ON && ninja -C build-gcc-analyzer

The option requires GCC >= 13 for both C and C++; configuration fails fast
with any other compiler or older version. Analyzer diagnostics are emitted as
build warnings; the build still succeeds. Expect significantly longer compile
times when the option is enabled.


Clang static analyzer
---------------------

The build can also be configured to run the Clang static analyzer via
`clang-tidy`, scoped to the `clang-analyzer-*` check group. Third-party code
under `external/` is excluded.

$ cmake -B build-clang-analyzer -G Ninja -DENABLE_CLANG_ANALYZER=ON && ninja -C build-clang-analyzer

The option requires `clang-tidy` on `PATH` (Ubuntu: `apt install clang-tidy`);
configuration fails fast if it is not found. Diagnostics appear inline like
compiler warnings and the build still succeeds. The configured C/C++ compiler
does not need to be Clang. `ENABLE_GCC_ANALYZER` and `ENABLE_CLANG_ANALYZER`
can be combined when building with GCC >= 13.


Target architecture
-------------------

The compiler `-march=` value can be overridden via the `MARCH` cache
variable (default `native`):

$ cmake -B build-v3 -G Ninja -DMARCH=x86-64-v3 && ninja -C build-v3

Useful for targeting a portable ISA level or restricting the instructions
emitted by the compiler.


Huge pages
Expand Down
3 changes: 3 additions & 0 deletions src/burstsort_mkq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ burst_insert(TSTNode<CharT>* root, unsigned char** strings, size_t N)
}
CharT* oracle = static_cast<CharT*>(
malloc(buck->size()*sizeof(CharT)));
if (!oracle) abort();
for (unsigned j=0; j < buck->size(); ++j) {
oracle[j] = get_char<CharT>((*buck)[j], depth);
}
Expand Down Expand Up @@ -327,6 +328,7 @@ template <typename CharT>
static inline void
burstsort_mkq_simpleburst(unsigned char** strings, size_t N)
{
if (N == 0) return;
typedef std::vector<unsigned char*> BucketT;
typedef BurstSimple<CharT> BurstImpl;
TSTNode<CharT> root;
Expand Down Expand Up @@ -355,6 +357,7 @@ template <typename CharT>
static inline void
burstsort_mkq_recursiveburst(unsigned char** strings, size_t N)
{
if (N == 0) return;
typedef std::vector<unsigned char*> BucketT;
typedef BurstRecursive<CharT> BurstImpl;
TSTNode<CharT> root;
Expand Down
32 changes: 32 additions & 0 deletions src/funnelsort_bfs_128way.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/

#include "funnelsort_impl.h"

template void funnelsort<128, buffer_layout_bfs>(
unsigned char**, size_t, unsigned char**);

void funnelsort_128way_bfs(unsigned char** strings, size_t n)
{ funnelsort_Kway<128, buffer_layout_bfs>(strings, n); }

ROUTINE_REGISTER_SINGLECORE(funnelsort_128way_bfs,
"funnelsort_128way_bfs")
32 changes: 32 additions & 0 deletions src/funnelsort_bfs_16way.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/

#include "funnelsort_impl.h"

template void funnelsort<16, buffer_layout_bfs>(
unsigned char**, size_t, unsigned char**);

void funnelsort_16way_bfs(unsigned char** strings, size_t n)
{ funnelsort_Kway<16, buffer_layout_bfs>(strings, n); }

ROUTINE_REGISTER_SINGLECORE(funnelsort_16way_bfs,
"funnelsort_16way_bfs")
32 changes: 32 additions & 0 deletions src/funnelsort_bfs_32way.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright 2008 by Tommi Rantala <tt.rantala@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/

#include "funnelsort_impl.h"

template void funnelsort<32, buffer_layout_bfs>(
unsigned char**, size_t, unsigned char**);

void funnelsort_32way_bfs(unsigned char** strings, size_t n)
{ funnelsort_Kway<32, buffer_layout_bfs>(strings, n); }

ROUTINE_REGISTER_SINGLECORE(funnelsort_32way_bfs,
"funnelsort_32way_bfs")
Loading