From 4b54fc5473b9350330d8b7535fe7412905ac091b Mon Sep 17 00:00:00 2001 From: Jiacheng Huang Date: Wed, 20 May 2026 15:22:50 +0800 Subject: [PATCH] feat: add public C++ operator API --- CMakeLists.txt | 6 +- cmake/InfiniOpsConfig.cmake.in | 3 + cmake/infiniops.pc.in | 10 ++ include/infini/ops.h | 8 + scripts/generate_wrappers.py | 283 ++++++++++++++++++++++++++++++++- src/CMakeLists.txt | 106 +++++++++++- src/common/constexpr_map.h | 1 + src/common/traits.h | 1 + src/data_type.h | 2 + src/hash.h | 1 + src/tensor.h | 1 + tests/test_cpp_api.py | 101 ++++++++++++ 12 files changed, 511 insertions(+), 12 deletions(-) create mode 100644 cmake/InfiniOpsConfig.cmake.in create mode 100644 cmake/infiniops.pc.in create mode 100644 include/infini/ops.h create mode 100644 tests/test_cpp_api.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 9973438cf..e75459c38 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required(VERSION 3.18) -project(InfiniOps LANGUAGES CXX) +project(InfiniOps VERSION 0.1.0 LANGUAGES CXX) + +include(GNUInstallDirs) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -27,6 +29,7 @@ option(BUILD_CUSTOM_KERNEL "Build custom AscendC kernel PyTorch extension (requi option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF) option(AUTO_DETECT_BACKENDS "Automatically detect available backends" OFF) +option(GENERATE_CPP_OPERATOR_API "Generate public C++ operator API" ON) option(GENERATE_PYTHON_BINDINGS "Generate Python bindings" OFF) if(AUTO_DETECT_DEVICES) @@ -351,6 +354,7 @@ endif() # If all other platforms are not enabled, CPU is enabled by default. if(NOT WITH_NVIDIA AND NOT WITH_ILUVATAR AND NOT WITH_METAX AND NOT WITH_MOORE AND NOT WITH_CAMBRICON AND NOT WITH_ASCEND) + set(WITH_CPU ON CACHE BOOL "Enable CPU backend" FORCE) add_compile_definitions(WITH_CPU=1) endif() diff --git a/cmake/InfiniOpsConfig.cmake.in b/cmake/InfiniOpsConfig.cmake.in new file mode 100644 index 000000000..af1f50794 --- /dev/null +++ b/cmake/InfiniOpsConfig.cmake.in @@ -0,0 +1,3 @@ +@PACKAGE_INIT@ + +include("${CMAKE_CURRENT_LIST_DIR}/InfiniOpsTargets.cmake") diff --git a/cmake/infiniops.pc.in b/cmake/infiniops.pc.in new file mode 100644 index 000000000..09b544ef0 --- /dev/null +++ b/cmake/infiniops.pc.in @@ -0,0 +1,10 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ + +Name: InfiniOps +Description: InfiniOps operator library +Version: @PROJECT_VERSION@ +Libs: -L${libdir} -linfiniops +Cflags: -I${includedir} diff --git a/include/infini/ops.h b/include/infini/ops.h new file mode 100644 index 000000000..db17bd335 --- /dev/null +++ b/include/infini/ops.h @@ -0,0 +1,8 @@ +#ifndef INFINI_OPS_H_ +#define INFINI_OPS_H_ + +#ifdef __cplusplus +#include +#endif + +#endif // INFINI_OPS_H_ diff --git a/scripts/generate_wrappers.py b/scripts/generate_wrappers.py index f2ff37065..b57fd286a 100644 --- a/scripts/generate_wrappers.py +++ b/scripts/generate_wrappers.py @@ -9,8 +9,12 @@ import subprocess import textwrap -import clang.cindex -from clang.cindex import CursorKind +try: + import clang.cindex + from clang.cindex import CursorKind +except ImportError: + clang = None + CursorKind = None _SRC_DIR = pathlib.Path("src") @@ -29,6 +33,8 @@ _INCLUDE_DIR = _GENERATION_DIR / "include" +_PUBLIC_INCLUDE_DIR = _INCLUDE_DIR / "infini" + _INDENTATION = " " @@ -74,8 +80,30 @@ def _find_base_header(op_name): raise FileNotFoundError(f"no base header for op {op_name!r}") +class _ParsedType: + def __init__(self, spelling): + self.spelling = spelling + + +class _ParsedArgument: + def __init__(self, type_spelling, spelling): + self.type = _ParsedType(type_spelling) + self.spelling = spelling + + +class _ParsedFunction: + def __init__(self, arguments): + self._arguments = arguments + + def get_arguments(self): + return self._arguments + + class _OperatorExtractor: def __call__(self, op_name): + if clang is None: + return _parse_operator_header(op_name) + index = clang.cindex.Index.create() args = ( "-std=c++17", @@ -115,6 +143,131 @@ def _find(node, op_name): yield from _OperatorExtractor._find(child, op_name) +def _parse_operator_header(op_name): + pascal_case_op_name = _snake_to_pascal(op_name) + source = _strip_cpp_comments(_find_base_header(op_name).read_text()) + class_body = _extract_class_body(source, pascal_case_op_name) + constructors = [ + _ParsedFunction(_parse_parameter_list(params)) + for params in _find_signature_parameters( + class_body, rf"(?:explicit\s+)?{pascal_case_op_name}\s*\(" + ) + ] + calls = [ + _ParsedFunction(_parse_parameter_list(params)) + for params in _find_signature_parameters( + class_body, r"(?:virtual\s+)?void\s+operator\s*\(\s*\)\s*\(" + ) + ] + + return _Operator(op_name, constructors, calls) + + +def _strip_cpp_comments(source): + source = re.sub(r"/\*.*?\*/", "", source, flags=re.DOTALL) + return re.sub(r"//.*", "", source) + + +def _extract_class_body(source, class_name): + match = re.search(rf"\bclass\s+{class_name}\b[^{{]*{{", source) + + if match is None: + raise ValueError(f"no class definition for {class_name!r}") + + start = match.end() + depth = 1 + index = start + + while index < len(source): + char = source[index] + + if char == "{": + depth += 1 + elif char == "}": + depth -= 1 + if depth == 0: + return source[start:index] + + index += 1 + + raise ValueError(f"unterminated class definition for {class_name!r}") + + +def _find_signature_parameters(source, pattern): + params = [] + + for match in re.finditer(pattern, source): + opening_paren = match.end() - 1 + + if opening_paren < 0 or source[opening_paren] != "(": + continue + + closing_paren = _find_matching_delimiter(source, opening_paren, "(", ")") + params.append(source[opening_paren + 1 : closing_paren]) + + return params + + +def _find_matching_delimiter(source, start, opening, closing): + depth = 0 + + for index in range(start, len(source)): + char = source[index] + + if char == opening: + depth += 1 + elif char == closing: + depth -= 1 + if depth == 0: + return index + + raise ValueError(f"unmatched delimiter {opening!r}") + + +def _parse_parameter_list(params): + arguments = [] + + for param in _split_top_level(params, ","): + param = _strip_default_argument(param.strip()) + + if not param or param == "void": + continue + + match = re.match(r"(.+?[\s*&]+)([A-Za-z_][A-Za-z0-9_]*)$", param) + + if match is None: + raise ValueError(f"could not parse parameter {param!r}") + + arguments.append(_ParsedArgument(match.group(1).strip(), match.group(2))) + + return arguments + + +def _split_top_level(text, delimiter): + parts = [] + start = 0 + depth = 0 + pairs = {"<": ">", "(": ")", "[": "]", "{": "}"} + closing = {value: key for key, value in pairs.items()} + + for index, char in enumerate(text): + if char in pairs: + depth += 1 + elif char in closing: + depth -= 1 + elif char == delimiter and depth == 0: + parts.append(text[start:index]) + start = index + 1 + + parts.append(text[start:]) + return parts + + +def _strip_default_argument(param): + parts = _split_top_level(param, "=") + return parts[0].strip() + + class _Operator: def __init__(self, name, constructors, calls): self.name = name @@ -268,7 +421,7 @@ def _generate_call(op_name, call, method=True): f" }}\n" f" Config config;\n" f" config.set_implementation_index(implementation_index);\n" - f" return generated_dispatch::Call{pascal_case_op_name}(handle, config, {call_args});\n" + f" return functional::{pascal_case_op_name}(handle, config, {call_args});\n" f' }}, {py_args_str}py::kw_only(), py::arg("stream") = 0, py::arg("implementation_index") = 0);' ) @@ -328,6 +481,7 @@ def _overload_order_key(node): #include "base/{op_name}.h" #include "config.h" +#include "infini/ops.h" #include "generated/bindings/generated_dispatch.h" #include "handle.h" #include "pybind11_utils.h" @@ -620,6 +774,54 @@ def _append_optional_params(prefix, params): return declarations, definitions +def _generate_functional_entries(operator): + def _generate_params(node): + return ", ".join( + f"{arg.type.spelling} {arg.spelling}" + for arg in node.get_arguments() + if arg.spelling != "stream" + ) + + def _generate_arguments(node): + return ", ".join( + arg.spelling for arg in node.get_arguments() if arg.spelling != "stream" + ) + + def _append_optional_args(prefix, args): + if args: + return f"{prefix}, {args}" + + return prefix + + def _append_optional_params(prefix, params): + if params: + return f"{prefix}, {params}" + + return prefix + + pascal_case_op_name = _snake_to_pascal(operator.name) + op_type = f"::infini::ops::{pascal_case_op_name}" + operator_type = f"::infini::ops::Operator<{op_type}>" + declarations = [] + definitions = [] + + for call in operator.calls: + params = _generate_params(call) + args = _generate_arguments(call) + function_params = _append_optional_params( + "const Handle& handle, const Config& config", params + ) + + declarations.append(f"void {pascal_case_op_name}({function_params});") + definitions.append( + f"""void {pascal_case_op_name}({function_params}) {{ + return {operator_type}::Call({_append_optional_args("handle, config", args)}); +}}""" + ) + + return declarations, definitions + + def _generate_generated_dispatch_header(op_names, devices, declarations): header_base_includes = "\n".join( f'#include "base/{op_name}.h"' for op_name in op_names @@ -672,6 +874,52 @@ def _generate_generated_dispatch_source(impl_paths, definitions): """ +def _generate_functional_header(declarations): + return f"""#ifndef INFINI_OPS_FUNCTIONAL_OPS_H_ +#define INFINI_OPS_FUNCTIONAL_OPS_H_ + +#include +#include +#include +#include + +#include "config.h" +#include "data_type.h" +#include "device.h" +#include "handle.h" +#include "tensor.h" + +namespace infini::ops::functional {{ + +{chr(10).join(declarations)} + +}} // namespace infini::ops::functional + +#endif +""" + + +def _generate_functional_source(op_names, impl_paths, definitions): + base_includes = "\n".join(f'#include "base/{op_name}.h"' for op_name in op_names) + impl_includes = "\n".join( + f'#include "{_to_include_path(impl_path)}"' for impl_path in impl_paths + ) + + return f"""#include "infini/functional_ops.h" + +// clang-format off +{base_includes} +{impl_includes} +// clang-format on + +namespace infini::ops::functional {{ + +{chr(10).join(definitions)} + +}} // namespace infini::ops::functional +""" + + def _device_marker_headers(devices): paths = { "cpu": "native/cpu/device_.h", @@ -789,6 +1037,9 @@ def _generate_op_artifacts(item): dispatch_declarations, dispatch_definitions = _generate_generated_dispatch_entries( operator ) + functional_declarations, functional_definitions = _generate_functional_entries( + operator + ) return { "op_name": op_name, @@ -800,6 +1051,8 @@ def _generate_op_artifacts(item): "legacy_c_header": legacy_c_header, "dispatch_declarations": dispatch_declarations, "dispatch_definitions": dispatch_definitions, + "functional_declarations": functional_declarations, + "functional_definitions": functional_definitions, "impl_paths": impl_paths, } @@ -865,6 +1118,8 @@ def _dispatch_gen_batch_size(): directory.mkdir(parents=True) + _PUBLIC_INCLUDE_DIR.mkdir(parents=True, exist_ok=True) + ops_json = pathlib.Path("ops.json") if ops_json.exists(): @@ -888,6 +1143,11 @@ def _dispatch_gen_batch_size(): for artifact in artifacts for declaration in artifact["dispatch_declarations"] ] + functional_declarations = [ + declaration + for artifact in artifacts + for declaration in artifact["functional_declarations"] + ] use_monolithic_bindings = _use_monolithic_bindings() op_includes = [] @@ -917,6 +1177,9 @@ def _dispatch_gen_batch_size(): ) (_BINDINGS_DIR / "generated_dispatch.h").write_text(dispatch_header) + functional_header = _generate_functional_header(functional_declarations) + (_PUBLIC_INCLUDE_DIR / "functional_ops.h").write_text(functional_header) + dispatch_batch_size = _dispatch_gen_batch_size() for dispatch_batch_index, start in enumerate( @@ -938,6 +1201,20 @@ def _dispatch_gen_batch_size(): dispatch_source ) + functional_definitions = [ + definition + for artifact in batch + for definition in artifact["functional_definitions"] + ] + functional_source = _generate_functional_source( + [artifact["op_name"] for artifact in batch], + impl_paths, + functional_definitions, + ) + (_GENERATED_SRC_DIR / f"functional_ops_{dispatch_batch_index}.cc").write_text( + functional_source + ) + bind_func_calls = "\n".join( f"{bind_func_name}(m);" for bind_func_name in bind_func_names ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 762b9d48f..6086e072b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -446,14 +446,20 @@ if(WITH_TORCH) endif() endif() -target_include_directories(infiniops PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) - -if(GENERATE_PYTHON_BINDINGS) +target_include_directories(infiniops + PUBLIC + $ + $ + $ + $ +) + +if(GENERATE_CPP_OPERATOR_API OR GENERATE_PYTHON_BINDINGS) find_package(Python COMPONENTS Interpreter REQUIRED) - # Always regenerate bindings so the included kernel headers match the - # active device list. Stale generated files (e.g., committed for one - # platform) would omit specializations for other enabled backends, - # causing link-time or runtime failures. + # Always regenerate wrappers so the generated functional API and pybind11 + # dispatch code match the active device list. Stale generated files (e.g., + # committed for one platform) would omit specializations for other enabled + # backends, causing link-time or runtime failures. set(GENERATOR_ARGS --devices ${DEVICE_LIST}) if(WITH_TORCH) @@ -472,6 +478,18 @@ if(GENERATE_PYTHON_BINDINGS) message(STATUS "Generating wrappers - done") endif() + file(GLOB_RECURSE FUNCTIONAL_API_SOURCES CONFIGURE_DEPENDS + "${PROJECT_SOURCE_DIR}/generated/src/functional_ops_*.cc") + + if(WITH_NVIDIA OR WITH_ILUVATAR) + set_source_files_properties(${FUNCTIONAL_API_SOURCES} + PROPERTIES LANGUAGE CUDA) + endif() + + target_sources(infiniops PRIVATE ${FUNCTIONAL_API_SOURCES}) +endif() + +if(GENERATE_PYTHON_BINDINGS) file(GLOB_RECURSE PYBIND11_SOURCES CONFIGURE_DEPENDS "${PROJECT_SOURCE_DIR}/generated/bindings/*.cc") @@ -567,7 +585,11 @@ if(GENERATE_PYTHON_BINDINGS) target_compile_options(ops PRIVATE "-x" "musa") endif() - target_include_directories(ops PRIVATE ${PROJECT_SOURCE_DIR}) + target_include_directories(ops PRIVATE + ${PROJECT_SOURCE_DIR} + ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/generated/include + ) target_link_libraries(ops PRIVATE infiniops) # Custom `AscendC` kernel objects must be linked directly into ops @@ -596,3 +618,71 @@ if(GENERATE_PYTHON_BINDINGS) DESTINATION .) endif() endif() + +include(CMakePackageConfigHelpers) + +configure_file( + ${PROJECT_SOURCE_DIR}/cmake/infiniops.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/infiniops.pc + @ONLY +) + +configure_package_config_file( + ${PROJECT_SOURCE_DIR}/cmake/InfiniOpsConfig.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfig.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/InfiniOps +) + +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfigVersion.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion +) + +install(TARGETS infiniops + EXPORT InfiniOpsTargets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) + +install(FILES ${PROJECT_SOURCE_DIR}/include/infini/ops.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/infini +) + +if(GENERATE_CPP_OPERATOR_API OR GENERATE_PYTHON_BINDINGS) + install(FILES ${PROJECT_SOURCE_DIR}/generated/include/infini/functional_ops.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/infini + ) +endif() + +install(FILES + ${PROJECT_SOURCE_DIR}/src/config.h + ${PROJECT_SOURCE_DIR}/src/data_type.h + ${PROJECT_SOURCE_DIR}/src/device.h + ${PROJECT_SOURCE_DIR}/src/handle.h + ${PROJECT_SOURCE_DIR}/src/hash.h + ${PROJECT_SOURCE_DIR}/src/tensor.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) + +install(FILES + ${PROJECT_SOURCE_DIR}/src/common/constexpr_map.h + ${PROJECT_SOURCE_DIR}/src/common/traits.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/common +) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/infiniops.pc + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig +) + +install(EXPORT InfiniOpsTargets + NAMESPACE InfiniOps:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/InfiniOps +) + +install(FILES + ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfigVersion.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/InfiniOps +) diff --git a/src/common/constexpr_map.h b/src/common/constexpr_map.h index 7454f548a..450b4faf2 100644 --- a/src/common/constexpr_map.h +++ b/src/common/constexpr_map.h @@ -3,6 +3,7 @@ #include #include +#include #include #include diff --git a/src/common/traits.h b/src/common/traits.h index c746f4cb2..8ce3e02f7 100644 --- a/src/common/traits.h +++ b/src/common/traits.h @@ -1,6 +1,7 @@ #ifndef INFINI_OPS_COMMON_TRAITS_H_ #define INFINI_OPS_COMMON_TRAITS_H_ +#include #include #include diff --git a/src/data_type.h b/src/data_type.h index 75483d2b8..a6bd72e6b 100644 --- a/src/data_type.h +++ b/src/data_type.h @@ -1,9 +1,11 @@ #ifndef INFINI_OPS_DATA_TYPE_H_ #define INFINI_OPS_DATA_TYPE_H_ +#include #include #include #include +#include #include "common/constexpr_map.h" #include "common/traits.h" diff --git a/src/hash.h b/src/hash.h index 4721f33f3..f7e79a162 100644 --- a/src/hash.h +++ b/src/hash.h @@ -1,6 +1,7 @@ #ifndef INFINI_OPS_HASH_H_ #define INFINI_OPS_HASH_H_ +#include #include #include diff --git a/src/tensor.h b/src/tensor.h index 290e3cf96..f45527f46 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -1,6 +1,7 @@ #ifndef INFINI_OPS_TENSOR_H_ #define INFINI_OPS_TENSOR_H_ +#include #include #include #include diff --git a/tests/test_cpp_api.py b/tests/test_cpp_api.py new file mode 100644 index 000000000..62eb8969d --- /dev/null +++ b/tests/test_cpp_api.py @@ -0,0 +1,101 @@ +import os +import subprocess +import textwrap +from pathlib import Path + +import pytest + + +def test_cpp_functional_add_smoke(tmp_path): + install_prefix = _install_prefix() + include_dir = install_prefix / "include" + library_dir = _library_dir(install_prefix) + source = tmp_path / "add_smoke.cc" + binary = tmp_path / "add_smoke" + source.write_text(_ADD_SMOKE_SOURCE) + + _run( + [ + _compiler("CXX", "c++"), + "-std=c++17", + "-Werror", + f"-I{include_dir}", + str(source), + f"-L{library_dir}", + "-linfiniops", + f"-Wl,-rpath,{library_dir}", + "-o", + str(binary), + ] + ) + _run([str(binary)]) + + +def _install_prefix(): + prefix = os.environ.get("INFINIOPS_INSTALL_PREFIX") + + if prefix: + return Path(prefix) + + pytest.skip("`INFINIOPS_INSTALL_PREFIX` is not set.") + + +def _library_dir(prefix): + for name in ("lib", "lib64"): + library_dir = prefix / name + if (library_dir / "libinfiniops.so").exists(): + return library_dir + + pytest.skip(f"`libinfiniops.so` was not found under `{prefix}`.") + + +def _compiler(env_name, default): + compiler = os.environ.get(env_name, default) + + if not compiler: + pytest.skip(f"`{env_name}` is not configured.") + + return compiler + + +def _run(command): + try: + subprocess.run(command, check=True, text=True, capture_output=True) + except FileNotFoundError as error: + pytest.skip(f"`{command[0]}` is not available: {error}") + except subprocess.CalledProcessError as error: + output = "\n".join((error.stdout, error.stderr)).strip() + raise AssertionError(output) from error + + +_ADD_SMOKE_SOURCE = textwrap.dedent( + r""" + #include + + #include + + int main() { + float input_data[3] = {1.0f, 2.0f, 3.0f}; + float other_data[3] = {4.0f, 5.0f, 6.0f}; + float output_data[3] = {0.0f, 0.0f, 0.0f}; + + const infini::ops::Tensor::Shape shape{3}; + const infini::ops::Device device{infini::ops::Device::Type::kCpu}; + const infini::ops::DataType data_type{infini::ops::DataType::kFloat32}; + + infini::ops::Tensor input(input_data, shape, data_type, device); + infini::ops::Tensor other(other_data, shape, data_type, device); + infini::ops::Tensor output(output_data, shape, data_type, device); + infini::ops::Handle handle; + infini::ops::Config config; + + infini::ops::functional::Add(handle, config, input, other, output); + + if (output_data[0] != 5.0f || output_data[1] != 7.0f || + output_data[2] != 9.0f) { + return 1; + } + return 0; + } + """ +).lstrip()