Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .buildkite/pipelines/format_and_validation.yml.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,21 @@ steps:
notify:
- github_commit_status:
context: "Validate formatting with clang-format"
- label: "Validate changelog entries"
key: "validate_changelogs"
command: ".buildkite/scripts/steps/validate-changelogs.sh"
agents:
image: "python:3.11-slim"
soft_fail: true
notify:
- github_commit_status:
context: "Validate changelog entries"
- label: "Unit tests: changelog Python tools"
key: "test_changelog_tools"
command: ".buildkite/scripts/steps/test-changelog-tools.sh"
agents:
image: "python:3.11-slim"
notify:
- github_commit_status:
context: "Unit tests: changelog Python tools"
EOL
21 changes: 21 additions & 0 deletions .buildkite/scripts/steps/test-changelog-tools.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License
# 2.0 and the following additional limitation. Functionality enabled by the
# files subject to the Elastic License 2.0 may only be used in production when
# invoked by an Elasticsearch process with a license key installed that permits
# use of machine learning features. You may not use this file except in
# compliance with the Elastic License 2.0 and the foregoing additional
# limitation.

set -euo pipefail

# Install Python dependencies (same stack as validate-changelogs.sh)
if ! command -v git &>/dev/null; then
apt-get update -qq && apt-get install -y -qq git >/dev/null 2>&1
fi
python3 -m pip install --quiet --break-system-packages pyyaml jsonschema 2>/dev/null \
|| python3 -m pip install --quiet pyyaml jsonschema

echo "Running Python unit tests for dev-tools changelog scripts..."
python3 -m unittest discover -s dev-tools/unittest -p 'test_*.py' -v
74 changes: 74 additions & 0 deletions .buildkite/scripts/steps/validate-changelogs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/bin/bash
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License
# 2.0 and the following additional limitation. Functionality enabled by the
# files subject to the Elastic License 2.0 may only be used in production when
# invoked by an Elasticsearch process with a license key installed that permits
# use of machine learning features. You may not use this file except in
# compliance with the Elastic License 2.0 and the foregoing additional
# limitation.

set -euo pipefail

SKIP_LABELS="|>test|>refactoring|>docs|>build|>non-issue|"

# On PR builds, check if the PR has a label that skips changelog validation.
# BUILDKITE_PULL_REQUEST_LABELS is a comma-separated list set by Buildkite.
if [[ -n "${BUILDKITE_PULL_REQUEST_LABELS:-}" ]]; then
IFS=',' read -ra LABELS <<< "${BUILDKITE_PULL_REQUEST_LABELS}"
for label in "${LABELS[@]}"; do
label="$(echo "${label}" | xargs)" # trim whitespace
if [[ "${SKIP_LABELS}" == *"|${label}|"* ]]; then
echo "Skipping changelog validation: PR has label '${label}'"
exit 0
fi
done
fi

# Install system and Python dependencies
if ! command -v git &>/dev/null; then
apt-get update -qq && apt-get install -y -qq git >/dev/null 2>&1
fi
python3 -m pip install --quiet --break-system-packages pyyaml jsonschema 2>/dev/null \
|| python3 -m pip install --quiet pyyaml jsonschema

# Find changelog files changed in this PR (compared to main/target branch)
TARGET_BRANCH="${BUILDKITE_PULL_REQUEST_BASE_BRANCH:-main}"

# Fetch the target branch so we can diff against it
if ! git fetch origin "${TARGET_BRANCH}" --depth=1 2>/dev/null; then
echo "Warning: could not fetch origin/${TARGET_BRANCH}, skipping changelog validation"
exit 0
fi

if ! git rev-parse --verify "origin/${TARGET_BRANCH}" >/dev/null 2>&1; then
echo "Warning: origin/${TARGET_BRANCH} not available, skipping changelog validation"
exit 0
fi

CHANGED_CHANGELOGS=$(git diff --name-only --diff-filter=ACM "origin/${TARGET_BRANCH}"...HEAD -- 'docs/changelog/*.yaml')
DIFF_EXIT=$?
if [[ $DIFF_EXIT -ne 0 ]]; then
echo "Warning: git diff failed (exit $DIFF_EXIT), skipping changelog validation"
exit 0
fi

if [[ -z "${CHANGED_CHANGELOGS}" ]]; then
echo "No changelog files found in this PR."
echo "If this PR changes user-visible behaviour, please add a changelog entry."
echo "See docs/changelog/README.md for details."
echo "To skip this check, add one of these labels: >test, >refactoring, >docs, >build, >non-issue"

# Soft warning rather than hard failure during rollout
if [[ "${CHANGELOG_REQUIRED:-false}" == "true" ]]; then
exit 1
fi
exit 0
fi

echo "Validating changelog files:"
echo "${CHANGED_CHANGELOGS}"
echo ""

readarray -t CHANGED_FILES <<< "${CHANGED_CHANGELOGS}"
python3 dev-tools/validate_changelogs.py "${CHANGED_FILES[@]}"
14 changes: 14 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,20 @@ task format(type: Exec) {
workingDir "${projectDir}"
}

task validateChangelogs(type: Exec) {
commandLine 'python3', 'dev-tools/validate_changelogs.py'
workingDir "${projectDir}"
description = 'Validate changelog YAML entries against the schema'
group = 'verification'
}

task bundleChangelogs(type: Exec) {
commandLine 'python3', 'dev-tools/bundle_changelogs.py', '--version', project.version
workingDir "${projectDir}"
description = 'Generate consolidated changelog from per-PR YAML entries'
group = 'documentation'
}

task precommit(type: Exec) {
commandLine shell
workingDir "${projectDir}"
Expand Down
153 changes: 153 additions & 0 deletions dev-tools/bundle_changelogs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#!/usr/bin/env python3
"""
Bundle per-PR changelog YAML files into a consolidated changelog for release.

Usage:
python3 bundle_changelogs.py [--dir DIR] [--version VERSION] [--format FORMAT]

Outputs a formatted changelog grouped by type and area, suitable for inclusion
in release notes.

Formats:
markdown (default) - Markdown suitable for GitHub releases
asciidoc - AsciiDoc suitable for Elastic docs
"""

import argparse
import sys
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path

try:
import yaml
except ImportError:
print("Missing pyyaml. Install with: pip3 install pyyaml", file=sys.stderr)
sys.exit(2)


TYPE_ORDER = [
("known-issue", "Known issues"),
("security", "Security fixes"),
("breaking", "Breaking changes"),
("breaking-java", "Breaking Java changes"),
("deprecation", "Deprecations"),
("feature", "New features"),
("new-aggregation", "New aggregations"),
("enhancement", "Enhancements"),
("bug", "Bug fixes"),
("regression", "Regression fixes"),
("upgrade", "Upgrades"),
]

ML_CPP_PULL_URL = "https://github.com/elastic/ml-cpp/pull"
ML_CPP_ISSUE_URL = "https://github.com/elastic/ml-cpp/issues"


@dataclass(frozen=True)
class ChangelogFormatStyle:
"""Heading / bullet / link conventions for bundled changelog output."""

version_heading_prefix: str
type_heading_prefix: str
use_markdown_issue_links: bool

def version_heading(self, version: str) -> str:
return f"{self.version_heading_prefix}{version}\n"

def type_heading(self, label: str) -> str:
return f"{self.type_heading_prefix}{label}\n"

def area_line(self, area: str) -> str:
if self.use_markdown_issue_links:
return f"**{area}**"
return f"*{area}*"

def entry_line(self, entry: dict) -> str:
summary = entry["summary"]
issues = entry.get("issues", [])
pr = entry.get("pr")
if self.use_markdown_issue_links:
issue_refs = ", ".join(f"#{i}" for i in issues)
if pr:
line = f"- {summary} [#{pr}]({ML_CPP_PULL_URL}/{pr})"
else:
line = f"- {summary}"
else:
issue_refs = ", ".join(
f"{ML_CPP_ISSUE_URL}/{i}[#{i}]" for i in issues
)
if pr:
line = f"* {summary} {{ml-pull}}{pr}[#{pr}]"
else:
line = f"* {summary}"
if issue_refs:
line += f" ({issue_refs})"
return line


MARKDOWN_STYLE = ChangelogFormatStyle("## ", "### ", True)
ASCIIDOC_STYLE = ChangelogFormatStyle("== ", "=== ", False)


def load_entries(changelog_dir):
entries = []
for path in sorted(changelog_dir.glob("*.yaml")):
with open(path) as f:
data = yaml.safe_load(f)
if data and isinstance(data, dict):
data["_file"] = path.name
entries.append(data)
return entries


def _group_by_type_and_area(entries):
grouped = defaultdict(lambda: defaultdict(list))
for entry in entries:
area = entry.get("area", "General")
grouped[entry["type"]][area].append(entry)
return grouped


def format_entries(entries, style: ChangelogFormatStyle, version=None):
"""Render bundled entries using the given heading/bullet/link style."""
lines = []
if version:
lines.append(style.version_heading(version))

grouped = _group_by_type_and_area(entries)

for type_key, type_label in TYPE_ORDER:
if type_key not in grouped:
continue
lines.append(style.type_heading(type_label))
for area in sorted(grouped[type_key].keys()):
lines.append(style.area_line(area))
for entry in sorted(grouped[type_key][area], key=lambda e: e.get("pr", 0)):
lines.append(style.entry_line(entry))
lines.append("")

return "\n".join(lines)


def main():
parser = argparse.ArgumentParser(description="Bundle changelog YAML files")
parser.add_argument("--dir", default=None, help="Changelog directory")
parser.add_argument("--version", default=None, help="Version string for heading")
parser.add_argument("--format", default="markdown", choices=["markdown", "asciidoc"])
args = parser.parse_args()

repo_root = Path(__file__).resolve().parent.parent
changelog_dir = Path(args.dir) if args.dir else repo_root / "docs" / "changelog"

entries = load_entries(changelog_dir)
if not entries:
print("No changelog entries found.", file=sys.stderr)
sys.exit(0)

style = ASCIIDOC_STYLE if args.format == "asciidoc" else MARKDOWN_STYLE
print(format_entries(entries, style, args.version))


if __name__ == "__main__":
main()
100 changes: 100 additions & 0 deletions dev-tools/changelog_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License
# 2.0 and the following additional limitation. Functionality enabled by the
# files subject to the Elastic License 2.0 may only be used in production when
# invoked by an Elasticsearch process with a license key installed that permits
# use of machine learning features. You may not use this file except in
# compliance with the Elastic License 2.0 and the foregoing additional
# limitation.

"""Shared helpers for changelog YAML validation (schema + filename rules)."""

from __future__ import annotations

import json
import re
import sys
import urllib.error
import urllib.request
from pathlib import Path


ES_SCHEMA_URL = (
"https://raw.githubusercontent.com/elastic/elasticsearch/main/"
"build-tools-internal/src/main/resources/changelog-schema.json"
)


def load_schema(local_path: Path) -> dict:
"""Load the changelog schema, preferring the canonical ES version.

Fetches the schema from the Elasticsearch repo to ensure we validate
against the single source of truth. Falls back to the local copy if
the fetch fails (e.g. no network / offline development). Warns if
the local copy has diverged from the remote.
"""
local_schema = None
if local_path.exists():
with open(local_path) as f:
local_schema = json.load(f)

try:
response = urllib.request.urlopen(ES_SCHEMA_URL, timeout=10)
remote_schema = json.loads(response.read())
except (urllib.error.URLError, TimeoutError, OSError) as e:
if local_schema is not None:
print(
f"Note: could not fetch ES schema ({e}), using local copy",
file=sys.stderr,
)
return local_schema
raise RuntimeError(
f"could not fetch ES schema and no local copy at {local_path}"
) from e

if local_schema is not None and local_schema != remote_schema:
print(
"WARNING: local changelog-schema.json differs from the Elasticsearch source.\n"
f" Remote: {ES_SCHEMA_URL}\n"
f" Local: {local_path}\n"
" Validating against the remote (canonical) schema.\n"
" Please update the local copy to stay in sync.\n",
file=sys.stderr,
)

return remote_schema


def filename_convention_errors(filename: str, stem: str, data: dict) -> list[str]:
"""Filename / ``pr`` field consistency (same rules as validate_changelogs)."""
errors: list[str] = []
if re.match(r"^\d+$", stem):
if "pr" in data and data["pr"] != int(stem):
errors.append(
f"{filename}: pr field ({data['pr']}) does not match filename ({stem})"
)
elif "pr" in data:
errors.append(
f"{filename}: file has a pr field ({data['pr']}), "
f"so filename should be {data['pr']}.yaml"
)
return errors


def schema_validation_errors(data: dict, schema: dict, filename: str) -> list[str]:
"""JSON Schema validation errors for a parsed changelog mapping."""
import jsonschema

errors: list[str] = []
validator = jsonschema.Draft7Validator(schema)
for error in sorted(validator.iter_errors(data), key=lambda e: list(e.path)):
path = ".".join(str(p) for p in error.absolute_path) or "(root)"
errors.append(f"{filename}: {path}: {error.message}")
return errors


def validate_changelog_mapping(filename: str, stem: str, data: dict, schema: dict) -> list[str]:
"""Validate a single parsed YAML document (mapping) for export / tooling."""
errors = schema_validation_errors(data, schema, filename)
errors.extend(filename_convention_errors(filename, stem, data))
return errors
Loading