elastic · edsavage · Apr 16, 2026 · Apr 16, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/.buildkite/ml_pipeline/config.py b/.buildkite/ml_pipeline/config.py
@@ -11,6 +11,17 @@
 import os
 import re
 
+# Keys allowed in the optional tail of trigger_comment_regex (group serverless_kv).
+_SERVERLESS_KV_KEYS = frozenset(
+    {
+        "KEEP_DEPLOYMENT",
+        "REGION_ID",
+        "PROJECT_TYPE",
+        "ES_SERVERLESS_BRANCH",
+    }
+)
+
+
 class Config:
     build_windows: bool = False
     build_macos: bool = False
@@ -19,6 +30,8 @@ class Config:
     build_x86_64: str = ""
     run_qa_tests: bool = False
     run_pytorch_tests: bool = False
+    run_serverless_tests: bool = False
+    deploy_serverless_qa: bool = False
     action: str = "build"
 
     def parse_comment(self):
@@ -37,9 +50,13 @@ def parse_comment(self):
             self.action = os.environ["GITHUB_PR_COMMENT_VAR_ACTION"]
             self.run_qa_tests = self.action == "run_qa_tests"
             self.run_pytorch_tests = self.action == "run_pytorch_tests"
-            if self.run_pytorch_tests or self.run_qa_tests:
+            self.run_serverless_tests = self.action == "run_serverless_tests"
+            self.deploy_serverless_qa = self.action == "deploy_serverless_qa"
+            if self.run_pytorch_tests or self.run_qa_tests or self.run_serverless_tests or self.deploy_serverless_qa:
                 self.action = "build"
 
+            self._apply_serverless_kv_from_comment()
+
         # If the ACTION is set to "run_qa_tests" then set some optional variables governing the ES branch to build, the
         # stack version to set and the subset of QA tests to run, depending on whether appropriate variables are set in
         # the environment.
@@ -66,6 +83,9 @@ def parse_comment(self):
                     self.build_x86_64 = "--build-x86_64"
         elif self.run_qa_tests or self.run_pytorch_tests:
             self.build_x86_64 = "--build-x86_64"
+        elif self.run_serverless_tests or self.deploy_serverless_qa:
+            self.build_aarch64 = "--build-aarch64"
+            self.build_x86_64 = "--build-x86_64"
         else:
             self.build_aarch64 = "--build-aarch64"
             self.build_x86_64 = "--build-x86_64"
@@ -83,13 +103,21 @@ def parse_comment(self):
                     self.build_macos = True
                 elif each == "linux":
                     self.build_linux = True
-        elif self.run_qa_tests or self.run_pytorch_tests:
+        elif self.run_qa_tests or self.run_pytorch_tests or self.run_serverless_tests or self.deploy_serverless_qa:
             self.build_linux = True
         else:
             self.build_windows = True
             self.build_macos = True
             self.build_linux = True
 
+        # Serverless runner pipelines depend on both Linux aarch64 and x86_64
+        # build steps. Normalize after platform/arch parsing so PR comment tails
+        # cannot leave dangling depends_on keys or skip Linux builds.
+        if self.run_serverless_tests or self.deploy_serverless_qa:
+            self.build_aarch64 = "--build-aarch64"
+            self.build_x86_64 = "--build-x86_64"
+            self.build_linux = True
+
         # If no explicit action was set (e.g. "buildkite test this" via
         # always_trigger_comment_regex), check PR labels for QA/PyTorch
         # flags.  This is done after platform/arch defaults so that
@@ -100,11 +128,15 @@ def parse_comment(self):
                 self.run_qa_tests = True
             if "ci:run-pytorch-tests" in labels:
                 self.run_pytorch_tests = True
+            if "ci:run-serverless-tests" in labels:
+                self.run_serverless_tests = True
+            if "ci:deploy-serverless-qa" in labels:
+                self.deploy_serverless_qa = True
 
     def parse_label(self):
         """ Parse labels set on GitHub PR comments."""
 
-        build_labels = ['ci:build-linux','ci:build-macos','ci:build-windows','ci:run-qa-tests','ci:run-pytorch-tests','ci:build-aarch64','ci:build-x86_64']
+        build_labels = ['ci:build-linux','ci:build-macos','ci:build-windows','ci:run-qa-tests','ci:run-pytorch-tests','ci:run-serverless-tests','ci:deploy-serverless-qa','ci:build-aarch64','ci:build-x86_64']
         all_labels = [x.strip().lower() for x in os.environ["GITHUB_PR_LABELS"].split(",")]
         ci_labels = [label for label in all_labels if re.search("|".join(build_labels), label)]
         if not ci_labels:
@@ -137,6 +169,16 @@ def parse_label(self):
                     self.build_macos = True
                     self.build_linux = True
                     self.run_pytorch_tests = True
+                if "ci:run-serverless-tests" == label:
+                    self.build_linux = True
+                    self.build_aarch64 = "--build-aarch64"
+                    self.build_x86_64 = "--build-x86_64"
+                    self.run_serverless_tests = True
+                if "ci:deploy-serverless-qa" == label:
+                    self.build_linux = True
+                    self.build_aarch64 = "--build-aarch64"
+                    self.build_x86_64 = "--build-x86_64"
+                    self.deploy_serverless_qa = True
             if self.build_aarch64 == "" and self.build_x86_64 == "":
                 self.build_aarch64 = "--build-aarch64"
                 self.build_x86_64 = "--build-x86_64"
@@ -156,3 +198,24 @@ def parse(self):
             self.build_x86_64 = "--build-x86_64"
             self.run_qa_tests = False
 
+    def _apply_serverless_kv_from_comment(self):
+        """Copy whitelisted KEY=value tokens from the PR comment regex capture into os.environ."""
+
+        env_key = "GITHUB_PR_COMMENT_VAR_SERVERLESS_KV"
+        if env_key not in os.environ:
+            return
+        raw = os.environ[env_key].strip()
+        if not raw:
+            return
+        for token in raw.split():
+            key, sep, value = token.partition("=")
+            if not sep or key not in _SERVERLESS_KV_KEYS:
+                continue
+            if key == "KEEP_DEPLOYMENT" and value.lower() not in ("true", "false"):
+                continue
+            if key in ("REGION_ID", "PROJECT_TYPE") and not re.fullmatch(r"[A-Za-z0-9_.:-]+", value):
+                continue
+            if key == "ES_SERVERLESS_BRANCH" and not re.fullmatch(r"[A-Za-z0-9_./-]+", value):
+                continue
+            os.environ[key] = value
+
diff --git a/.buildkite/pipeline.json.py b/.buildkite/pipeline.json.py
@@ -17,6 +17,7 @@
 #
 
 import json
+import os
 
 from ml_pipeline import (
     step,
@@ -52,6 +53,16 @@ def main():
         "VERSION_QUALIFIER": "",
         "ML_BUILD_STEP_KEYS": ",".join(build_step_keys),
     }
+    if config.run_serverless_tests or config.deploy_serverless_qa:
+        for serverless_env_key in (
+            "KEEP_DEPLOYMENT",
+            "REGION_ID",
+            "PROJECT_TYPE",
+            "ES_SERVERLESS_BRANCH",
+        ):
+            value = os.environ.get(serverless_env_key)
+            if value:
+                env[serverless_env_key] = value
 
     if config.build_windows:
         build_windows = pipeline_steps.generate_step_template("Windows", config.action, "", config.build_x86_64)
@@ -79,6 +90,17 @@ def main():
             pipeline_steps.append(pipeline_steps.generate_step("Upload ES tests aarch64 runner pipeline",
                                                                ".buildkite/pipelines/run_es_tests_aarch64.yml.sh"))
 
+        # Serverless tests/deploy require both Linux aarch64 and x86_64 build steps.
+        linux_both_arches = (
+            config.build_linux and config.build_aarch64 and config.build_x86_64
+        )
+        if linux_both_arches and config.run_serverless_tests:
+            pipeline_steps.append(pipeline_steps.generate_step("Upload serverless tests runner pipeline",
+                                                               ".buildkite/pipelines/run_serverless_tests.yml.sh"))
+        if linux_both_arches and config.deploy_serverless_qa:
+            pipeline_steps.append(pipeline_steps.generate_step("Upload serverless QA deploy pipeline",
+                                                               ".buildkite/pipelines/deploy_serverless_qa.yml.sh"))
+
     # Check for build timing regressions against nightly baseline
     pipeline_steps.append(pipeline_steps.generate_step("Check build timing regressions",
                                                        ".buildkite/pipelines/check_build_regression.yml.sh",

diff --git a/.buildkite/pipelines/deploy_serverless_qa.yml.sh b/.buildkite/pipelines/deploy_serverless_qa.yml.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the Elastic License
+# 2.0 and the following additional limitation. Functionality enabled by the
+# files subject to the Elastic License 2.0 may only be used in production when
+# invoked by an Elasticsearch process with a license key installed that permits
+# use of machine learning features. You may not use this file except in
+# compliance with the Elastic License 2.0 and the foregoing additional
+# limitation.
+
+# Pipeline: build a serverless Docker image with custom ml-cpp and deploy it
+# to the QA environment for interactive use. Unlike run_serverless_tests.yml.sh,
+# this does NOT run E2E tests -- it just gets the environment running so the
+# developer can interact with it (deploy models, run queries, kubectl, etc.).
+#
+# The deployment stays up for 1 hour by default. Set KEEP_DEPLOYMENT=true
+# (via the Buildkite UI) to keep it longer. The build annotations will
+# contain the URL and encrypted credentials for accessing the deployment.
+
+ML_CPP_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+# shellcheck source=dev-tools/serverless_buildkite_trigger_prepare.sh
+source "${ML_CPP_ROOT}/dev-tools/serverless_buildkite_trigger_prepare.sh"
+
+prepareMlCppServerlessTriggerContext "${BASH_SOURCE[0]}" || exit 1
+assignServerlessQaTriggerEnvYamlEscapes
+
+echo "Deploying to serverless QA with custom ml-cpp from PR #${PR_NUM}" >&2
+
+cat <<EOL
+steps:
+$(emitServerlessUploadMlCppDepsStepYaml)
+  - label: ":rocket: Deploy custom ml-cpp to serverless QA"
+    depends_on: "upload_ml_cpp_deps"
+    async: false
+    trigger: elasticsearch-serverless-deploy-qa
+    build:
+      branch: "${SERVERLESS_BRANCH}"
+      message: "ml-cpp PR #${PR_NUM}: ${SAFE_MESSAGE}"
+      env:
+        ML_CPP_BUILD_ID: "${BUILDKITE_BUILD_ID}"
+        ELASTICSEARCH_SUBMODULE_COMMIT: "${ES_COMMIT}"
+        KEEP_DEPLOYMENT: "${KEEP_DEPLOYMENT_SAFE}"
+        REGION_ID: "${REGION_ID_SAFE}"
+        PROJECT_TYPE: "${PROJECT_TYPE_SAFE}"
+EOL
diff --git a/.buildkite/pipelines/run_serverless_tests.yml.sh b/.buildkite/pipelines/run_serverless_tests.yml.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the Elastic License
+# 2.0 and the following additional limitation. Functionality enabled by the
+# files subject to the Elastic License 2.0 may only be used in production when
+# invoked by an Elasticsearch process with a license key installed that permits
+# use of machine learning features. You may not use this file except in
+# compliance with the Elastic License 2.0 and the foregoing additional
+# limitation.
+
+# Pipeline: trigger the elasticsearch-serverless validation pipeline to build
+# a Docker image incorporating custom ml-cpp artifacts from this build, then
+# run E2E tests against MKI QA.
+#
+# The triggered pipeline uses $BUILDKITE_TRIGGERED_FROM_BUILD_ID to download
+# ml-cpp artifacts from this build via buildkite-agent, sets up a local Ivy
+# repo, and passes -Dbuild.ml_cpp.repo to the Gradle Docker build.
+#
+# This avoids cloning elasticsearch-serverless or needing AWS credentials
+# in the ml-cpp PR pipeline.
+
+ML_CPP_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+# shellcheck source=dev-tools/serverless_buildkite_trigger_prepare.sh
+source "${ML_CPP_ROOT}/dev-tools/serverless_buildkite_trigger_prepare.sh"
+
+prepareMlCppServerlessTriggerContext "${BASH_SOURCE[0]}" || exit 1
+
+# --- Resolve ES PR number ---
+# The serverless pipeline's PR-specific tests step looks up labels from the
+# ES PR. First tries the ml-cpp PR author's matching ES PR (coordinated
+# changes), then falls back to any recent open ES PR.
+ES_PR_NUM=""
+if [ -z "${ELASTICSEARCH_PR_NUMBER:-}" ]; then
+  if [ -n "$PR_AUTHOR_FORK" ] && [ -n "$PR_SOURCE" ]; then
+    ES_PR_NUM=$(curl -s "https://api.github.com/repos/elastic/elasticsearch/pulls?head=${PR_AUTHOR_FORK}:${PR_SOURCE}&state=open&per_page=1" 2>/dev/null \
+      | python3 -c "import sys,json; prs=json.load(sys.stdin); print(prs[0]['number'] if prs else '')" 2>/dev/null || true)
+  fi
+  if [ -z "$ES_PR_NUM" ]; then
+    ES_PR_NUM=$(curl -s "https://api.github.com/repos/elastic/elasticsearch/pulls?state=open&per_page=1" 2>/dev/null \
+      | python3 -c "import sys,json; prs=json.load(sys.stdin); print(prs[0]['number'] if prs else '')" 2>/dev/null || true)
+  fi
+fi
+ES_PR_NUM="${ELASTICSEARCH_PR_NUMBER:-${ES_PR_NUM}}"
+if [ -z "$ES_PR_NUM" ]; then
+  echo "WARNING: Could not resolve an ES PR number. The serverless PR-specific tests step may fail." >&2
+fi
+echo "Using ES submodule commit: $ES_COMMIT, ES PR number: $ES_PR_NUM" >&2
+
+assignServerlessQaTriggerEnvYamlEscapes
+
+cat <<EOL
+steps:
+$(emitServerlessUploadMlCppDepsStepYaml)
+  - label: ":docker: :serverless: Build serverless image with custom ml-cpp"
+    depends_on: "upload_ml_cpp_deps"
+    async: false
+    trigger: elasticsearch-serverless-es-pr-check
+    build:
+      branch: "${SERVERLESS_BRANCH}"
+      message: "ml-cpp PR #${PR_NUM}: ${SAFE_MESSAGE}"
+      env:
+        UPDATE_SUBMODULE: "false"
+        ML_CPP_BUILD_ID: "${BUILDKITE_BUILD_ID}"
+        # ml-cpp repo commit at trigger time; serverless folds this into IMAGE_TAG
+        # with ML_CPP_BUILD_ID so Docker tags never collide with stock builds.
+        ML_CPP_COMMIT: "${BUILDKITE_COMMIT}"
+        ELASTICSEARCH_SUBMODULE_COMMIT: "${ES_COMMIT}"
+        ELASTICSEARCH_PR_NUMBER: "${ES_PR_NUM}"
+        KEEP_DEPLOYMENT: "${KEEP_DEPLOYMENT_SAFE}"
+        REGION_ID: "${REGION_ID_SAFE}"
+        PROJECT_TYPE: "${PROJECT_TYPE_SAFE}"
+EOL
diff --git a/.buildkite/pull-requests.json b/.buildkite/pull-requests.json
@@ -9,7 +9,7 @@
       "commit_status_context": "ml-cpp-ci",
       "build_on_commit": true,
       "build_on_comment": true,
-      "trigger_comment_regex": "^(?:(?:buildkite +)(?<action>build|debug|run_qa_tests|run_pytorch_tests)(=(?<args>(?:[^ ]+)))? *(?: for ES_BRANCH=(?<branch>([.0-9a-zA-Z]+)))? *(?:with STACK_VERSION=(?<version>([.0-9]+)))? *(?: *on *(?<platform>(?:[ ,]*(?:windows|linux|mac(os)?))+))?) *(?<arch>(?:[, ]*aarch64|x86_64)+)?$",
+      "trigger_comment_regex": "^(?:(?:buildkite +)(?<action>build|debug|run_qa_tests|run_pytorch_tests|run_serverless_tests|deploy_serverless_qa)(=(?<args>(?:[^ ]+)))? *(?: for ES_BRANCH=(?<branch>([.0-9a-zA-Z]+)))? *(?:with STACK_VERSION=(?<version>([.0-9]+)))? *(?: *on *(?<platform>(?:[ ,]*(?:windows|linux|mac(os)?))+))?) *(?<arch>(?:[, ]*aarch64|x86_64)+)?(?: *(?<serverless_kv>(?:(?:KEEP_DEPLOYMENT|REGION_ID|PROJECT_TYPE|ES_SERVERLESS_BRANCH)=[^\\s]+)(?: +(?:KEEP_DEPLOYMENT|REGION_ID|PROJECT_TYPE|ES_SERVERLESS_BRANCH)=[^\\s]+)*))?$",
       "always_trigger_comment_regex": "^(?:(?:buildkite\\W+)?(?:build|test)\\W+(?:this|it))",
       "skip_ci_labels": ["skip-ci", "jenkins-ci", ">test-mute", ">docs"],
       "skip_target_branches": ["6.8", "7.11", "7.12"],

diff --git a/dev-tools/pick_elasticsearch_clone_target.sh b/dev-tools/pick_elasticsearch_clone_target.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+#
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the Elastic License
+# 2.0 and the following additional limitation. Functionality enabled by the
+# files subject to the Elastic License 2.0 may only be used in production when
+# invoked by an Elasticsearch process with a license key installed that permits
+# use of machine learning features. You may not use this file except in
+# compliance with the Elastic License 2.0 and the foregoing additional
+# limitation.
+#
+
+# Shared logic to choose which elasticsearch fork/branch to use for ml-cpp CI:
+# integration test clones (run_es_tests_common.sh) and Buildkite pipelines that
+# need ELASTICSEARCH_SUBMODULE_COMMIT without cloning.
+#
+# Source this file, then call pickCloneTarget. It reads (in order of precedence):
+#   GITHUB_PR_OWNER / GITHUB_PR_BRANCH — when the job is tied to a GitHub PR
+#   PR_AUTHOR / PR_SOURCE_BRANCH — fork and branch for coordinated ml-cpp + ES changes
+#   elastic / PR_SOURCE_BRANCH — upstream branch matching the ml-cpp PR branch name
+#   elastic / PR_TARGET_BRANCH — target branch of the ml-cpp PR
+#   elastic / main — final fallback
+#
+# On success, SELECTED_FORK and SELECTED_BRANCH are set. Optional helper
+# elasticsearch_selected_branch_head_sha prints the remote HEAD commit for that
+# pair (same transport as isCloneTargetValid: git@github.com).
+#
+# This file must be sourced (not executed) so that SELECTED_* remain in the caller's shell.
+
+function isCloneTargetValid {
+    local fork_to_check="$1"
+    local branch_to_check="$2"
+    if [ -z "$fork_to_check" ] || [ -z "$branch_to_check" ]; then
+        return 1
+    fi
+    # Diagnostics must go to stderr: callers (e.g. deploy_serverless_qa.yml.sh)
+    # pipe stdout to `buildkite-agent pipeline upload` and expect only YAML.
+    echo "Checking for '$branch_to_check' branch at $fork_to_check/elasticsearch" >&2
+    if [ -n "$(git ls-remote --heads "git@github.com:${fork_to_check}/elasticsearch.git" "$branch_to_check" 2>/dev/null)" ]; then
+        echo "Will use '$branch_to_check' branch at $fork_to_check/elasticsearch for ES integration tests" >&2
+        return 0
+    fi
+    return 1
+}
+
+SELECTED_FORK=elastic
+SELECTED_BRANCH=main
+
+function pickCloneTarget {
+
+    if isCloneTargetValid "$GITHUB_PR_OWNER" "$GITHUB_PR_BRANCH" ; then
+        SELECTED_FORK="$GITHUB_PR_OWNER"
+        SELECTED_BRANCH="$GITHUB_PR_BRANCH"
+        return 0
+    fi
+
+    if isCloneTargetValid "$PR_AUTHOR" "$PR_SOURCE_BRANCH" ; then
+        SELECTED_FORK="$PR_AUTHOR"
+        SELECTED_BRANCH="$PR_SOURCE_BRANCH"
+        return 0
+    fi
+
+    if isCloneTargetValid "$SELECTED_FORK" "$PR_SOURCE_BRANCH" ; then
+        SELECTED_BRANCH="$PR_SOURCE_BRANCH"
+        return 0
+    fi
+
+    if isCloneTargetValid "$SELECTED_FORK" "$PR_TARGET_BRANCH" ; then
+        SELECTED_BRANCH="$PR_TARGET_BRANCH"
+        return 0
+    fi
+
+    if isCloneTargetValid "$SELECTED_FORK" "$SELECTED_BRANCH" ; then
+        return 0
+    fi
+
+    return 1
+}
+
+# Prints the commit SHA at the head of SELECTED_BRANCH on SELECTED_FORK, or empty if unavailable.
+function elasticsearch_selected_branch_head_sha {
+    git ls-remote --heads "git@github.com:${SELECTED_FORK}/elasticsearch.git" "${SELECTED_BRANCH}" 2>/dev/null | awk '{print $1; exit}'
+}