diff --git a/.claude/settings.json b/.claude/settings.json deleted file mode 100644 index 0359925..0000000 --- a/.claude/settings.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "env": { - "ANTHROPIC_BASE_URL": "http://localhost:4141", - "ANTHROPIC_AUTH_TOKEN": "dummy", - "ANTHROPIC_MODEL": "claude-sonnet-4", - "ANTHROPIC_SMALL_FAST_MODEL": "claude-3.7-sonnet" - } -} diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 66c8015..f4b9ce6 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -18,6 +18,6 @@ jobs: go-version-file: go.mod - name: Run linter - uses: golangci/golangci-lint-action@v6 + uses: golangci/golangci-lint-action@v8 with: - version: v1.64.8 + version: v2.12.2 diff --git a/.golangci.yml b/.golangci.yml index 308dd7f..09c5026 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,47 +1,66 @@ +version: "2" run: timeout: 5m allow-parallel-runners: true - -issues: - # don't skip warning about doc comments - # don't exclude the default set of lint - exclude-use-default: false - # restore some of the defaults - # (fill in the rest as needed) - exclude-rules: - - path: "api/*" - linters: - - lll - - path: "internal/*" - linters: - - dupl - - lll linters: - disable-all: true + default: none enable: + - copyloopvar - dupl - errcheck - - copyloopvar - ginkgolinter - goconst - gocyclo - - gofmt - - goimports - - gosimple - govet - ineffassign - # - lll - misspell - nakedret - prealloc - revive - staticcheck - - typecheck - unconvert - unparam - unused - -linters-settings: - revive: + settings: + revive: + rules: + - name: comment-spacings + staticcheck: + # v2 merged stylecheck and quickfix (QF) checks into staticcheck. The + # previous v1 config did not enable those, so restrict to the SA* checks + # to keep the lint surface equivalent to v1.64.8. + checks: + - "all" + - "-ST*" + - "-QF*" + exclusions: + generated: lax rules: - - name: comment-spacings + - linters: + - lll + path: api/* + - linters: + - dupl + - lll + - goconst + path: internal/* + # v2 no longer applies the default exclusion of test files for goconst / + # dupl; restore that behaviour to keep parity with v1.64.8. + - linters: + - goconst + - dupl + path: _test\.go + paths: + - third_party$ + - builtin$ + - examples$ +formatters: + enable: + - gofmt + - goimports + exclusions: + generated: lax + paths: + - third_party$ + - builtin$ + - examples$ diff --git a/CLAUDE.md b/CLAUDE.md index e986e4b..b808607 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -99,10 +99,10 @@ Each controller uses: ## Development Environment Requirements -- **Go**: v1.24.0+ (as specified in go.mod and Dockerfile) +- **Go**: v1.26.0+ (as specified in go.mod and Dockerfile; toolchain pinned to go1.26.3) - **Docker**: For building container images - **Kind**: For running E2E tests locally -- **Tools**: controller-gen v0.17.2, kustomize v5.6.0, golangci-lint v1.63.4 (auto-installed via Makefile) +- **Tools**: controller-gen v0.17.2, kustomize v5.6.0, golangci-lint v2.12.2 (auto-installed via Makefile) ## Testing Strategy diff --git a/Dockerfile b/Dockerfile index 2922429..8ee0c22 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Build the manager binary -FROM mcr.microsoft.com/oss/go/microsoft/golang:1.24 AS builder +FROM mcr.microsoft.com/oss/go/microsoft/golang:1.26 AS builder ARG TARGETOS ARG TARGETARCH @@ -21,7 +21,15 @@ COPY internal/ internal/ # was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO # the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore, # by leaving it empty we can ensure that the container and binary shipped on it will have the same platform. -RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go +# GOEXPERIMENT=nosystemcrypto: the Microsoft Go 1.26+ base image defaults to +# GOEXPERIMENT=systemcrypto, which routes crypto/* through OpenSSL. The cgo +# variant requires CGO_ENABLED=1; the no-cgo variant (ms_nocgo_opensslcrypto) +# still dlopens libssl at runtime and therefore needs glibc's dynamic linker +# in the final image — which `distroless/minimal:3.0` does not ship, causing +# "exec /manager: no such file or directory" at container start. Disabling the +# experiment selects Go's pure-Go crypto, keeping the binary fully static and +# runnable on minimal distroless. +RUN GOEXPERIMENT=nosystemcrypto CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go # Use distroless as minimal base image to package the manager binary # Refer to https://github.com/GoogleContainerTools/distroless for more details diff --git a/Makefile b/Makefile index 9d703b8..0931696 100644 --- a/Makefile +++ b/Makefile @@ -182,7 +182,7 @@ CONTROLLER_TOOLS_VERSION ?= v0.17.2 ENVTEST_VERSION ?= $(shell go list -m -f "{{ .Version }}" sigs.k8s.io/controller-runtime | awk -F'[v.]' '{printf "release-%d.%d", $$2, $$3}') #ENVTEST_K8S_VERSION is the version of Kubernetes to use for setting up ENVTEST binaries (i.e. 1.31) ENVTEST_K8S_VERSION ?= $(shell go list -m -f "{{ .Version }}" k8s.io/api | awk -F'[v.]' '{printf "1.%d", $$3}') -GOLANGCI_LINT_VERSION ?= v1.64.8 +GOLANGCI_LINT_VERSION ?= v2.12.2 .PHONY: kustomize kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary. @@ -210,7 +210,7 @@ $(ENVTEST): $(LOCALBIN) .PHONY: golangci-lint golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary. $(GOLANGCI_LINT): $(LOCALBIN) - $(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION)) + $(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/v2/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION)) # go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist # $1 - target path with name of binary diff --git a/docs/superpowers/plans/2026-05-09-go-toolchain-upgrade.md b/docs/superpowers/plans/2026-05-09-go-toolchain-upgrade.md new file mode 100644 index 0000000..5f6a4cf --- /dev/null +++ b/docs/superpowers/plans/2026-05-09-go-toolchain-upgrade.md @@ -0,0 +1,356 @@ +# Go Toolchain Upgrade to 1.26.3 Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Bump the Go toolchain from 1.24.0 to 1.26.3 (current latest) across `go.mod`, `Dockerfile`, and project documentation, with no `.go` source-file changes and all existing build/test/lint gates passing. + +**Architecture:** This is the toolchain-bump slice (PR #1) of the broader Go 1.26 modernization effort defined in `docs/superpowers/specs/2026-05-08-go126-modernization-design.md`. CI workflows already use `go-version-file: go.mod`, so they automatically pick up the new version — only `go.mod`, `Dockerfile`, and `CLAUDE.md` need direct edits. Source modernization (mocks, deps, lint, refactors) is explicitly out of scope and lives in separate plans. + +**Tech Stack:** Go 1.26.3, kubebuilder v4, controller-runtime, Docker (mcr.microsoft.com/oss/go/microsoft/golang base image), GitHub Actions, golangci-lint v1.64.8. + +--- + +## Scope (explicit) + +**In scope:** +- `go.mod`: bump `go 1.24.0` → `go 1.26.0`, add `toolchain go1.26.3`, remove `godebug default=go1.24`. +- `Dockerfile`: bump `FROM mcr.microsoft.com/oss/go/microsoft/golang:1.24` → `:1.26`. +- `CLAUDE.md`: update the "Go: v1.24.0+" line to "v1.26.0+". +- `go.sum`: regenerate via `go mod tidy` if any transitive bumps are forced by the toolchain change. + +**Out of scope (do NOT touch in this plan):** +- Any `.go` file under `api/`, `internal/`, `cmd/`, `test/`. +- `.golangci.yml` (lint linter additions live in a separate plan). +- `.github/workflows/*.yml` — they use `go-version-file: go.mod` and auto-track. +- Mocks, dependency upgrades, code modernization patterns (`any`, `range n`, `errors.Join`, etc.). +- The `go.uber.org/mock` upgrade or any `go generate` re-run. + +--- + +## File Structure + +| File | Action | Responsibility | +|---|---|---| +| `go.mod` | Modify | Declare Go 1.26.0 minimum + pin `toolchain go1.26.3`; drop legacy GODEBUG default | +| `go.sum` | Modify (auto) | Regenerated by `go mod tidy` if transitive deps shift | +| `Dockerfile` | Modify | Builder stage uses Go 1.26 base image | +| `CLAUDE.md` | Modify | Documentation reflects new minimum version | + +No new files. No deletions. + +--- + +## Task 1: Verify clean baseline before bumping + +**Files:** +- Read: `go.mod` +- Read: `Dockerfile` +- Read: `CLAUDE.md` + +- [ ] **Step 1: Confirm working tree is clean** + +Run: `git status --porcelain` +Expected: empty output (no uncommitted changes). If not empty, stash or commit first. + +- [ ] **Step 2: Confirm we are on a fresh branch off main** + +Run: `git rev-parse --abbrev-ref HEAD` +Expected: a feature branch name like `go-toolchain-upgrade` (NOT `main`). If on `main`, run: + +```bash +git checkout -b go-toolchain-upgrade +``` + +- [ ] **Step 3: Capture baseline build & test output** + +Run: `make fmt vet` +Expected: exits 0, no output diff. If `make fmt` rewrites files, commit those first as a separate housekeeping commit before proceeding. + +Run: `make test 2>&1 | tail -20` +Expected: all packages PASS, no FAIL lines. + +This baseline confirms the failure surface is currently empty — any post-bump failure is caused by the bump. + +--- + +## Task 2: Update `go.mod` to Go 1.26.3 + +**Files:** +- Modify: `go.mod` (lines 3–5) + +**Why this matters:** The `go` directive sets the minimum language version your module compiles under; `toolchain` pins which exact Go release is used to build. Splitting them lets downstream consumers with Go 1.26.0+ build us, while we get the deterministic 1.26.3 toolchain. Removing `godebug default=go1.24` is what actually flips runtime behavior to 1.26 defaults — without that line removal, the bump is half-cosmetic. + +- [ ] **Step 1: Edit `go.mod` header** + +Replace lines 3–5 of `go.mod`: + +```go +go 1.24.0 + +godebug default=go1.24 +``` + +with: + +```go +go 1.26.0 + +toolchain go1.26.3 +``` + +The final top of the file should read: + +```go +module github.com/Azure/operation-cache-controller + +go 1.26.0 + +toolchain go1.26.3 + +require ( + github.com/go-logr/logr v1.4.2 +``` + +- [ ] **Step 2: Run `go mod tidy`** + +Run: `go mod tidy` +Expected: exits 0. May modify `go.sum` if transitive dependency selections shift under the new toolchain. May add an `// indirect` line or two. + +- [ ] **Step 3: Verify the diff is scope-clean** + +Run: `git diff --stat go.mod go.sum` +Expected: `go.mod` shows the 3-line replacement above. `go.sum` may show small adjustments. No other files touched. + +Run: `git diff go.mod` +Expected: shows exactly the `go` directive change, the new `toolchain` line, and the deletion of `godebug default=go1.24`. + +- [ ] **Step 4: Confirm Go can satisfy the new toolchain** + +Run: `go version` +Expected: prints the host Go version. If it's older than 1.26.3, Go's auto-toolchain mechanism will download 1.26.3 on the next build (transparent — no action needed). If `GOTOOLCHAIN=local` is set, you'll get an error and must unset it: `unset GOTOOLCHAIN`. + +Run: `go env GOTOOLCHAIN` +Expected: `auto` (the default) or `go1.26.3+auto`. If it's `local`, fix as above. + +- [ ] **Step 5: Build the manager** + +Run: `make build` +Expected: exits 0. First run may print `go: downloading go1.26.3 ...` — that's the auto-toolchain in action. Binary lands in `bin/manager`. + +- [ ] **Step 6: Commit** + +```bash +git add go.mod go.sum +git commit -m "build(go): bump go.mod to go 1.26.0, toolchain go1.26.3" +``` + +--- + +## Task 3: Update Dockerfile builder image + +**Files:** +- Modify: `Dockerfile:2` + +**Why this matters:** The Dockerfile's builder stage is what produces the production binary. The base image tag must match the toolchain so reproducible builds inside CI/release match local builds. We keep the same registry path (`mcr.microsoft.com/oss/go/microsoft/golang`) — only the version tag changes. This image stream tracks Microsoft's hardened Go builds; the `:1.26` tag will resolve to the latest 1.26.x patch release at pull time, which is acceptable for a builder stage. + +- [ ] **Step 1: Edit `Dockerfile`** + +Replace line 2: + +```dockerfile +FROM mcr.microsoft.com/oss/go/microsoft/golang:1.24 AS builder +``` + +with: + +```dockerfile +FROM mcr.microsoft.com/oss/go/microsoft/golang:1.26 AS builder +``` + +- [ ] **Step 2: Verify the Docker build succeeds** + +Run: `make docker-build IMG=local-test:go1.26` +Expected: build completes, image `local-test:go1.26` exists. The pull of the new base image happens transparently; if the image isn't yet published you'll see "manifest not found" — in that case fall back to `:1.26.3` explicitly and re-run. + +Verify the image: + +```bash +docker run --rm --entrypoint=/manager local-test:go1.26 --help 2>&1 | head -5 +``` + +Expected: prints the manager's `--help` (or a usage error mentioning flag names like `--metrics-bind-address`). Not a Go panic. + +- [ ] **Step 3: Commit** + +```bash +git add Dockerfile +git commit -m "build(docker): bump builder image to golang:1.26" +``` + +--- + +## Task 4: Update `CLAUDE.md` development requirements + +**Files:** +- Modify: `CLAUDE.md:102` + +**Why this matters:** `CLAUDE.md` is read by Claude Code (and humans onboarding to the repo) as authoritative project guidance. If it says `Go: v1.24.0+` after the bump, future contributors / agents will install the wrong version and get confusing error messages. The line is one of the few in `CLAUDE.md` that names a specific version; keeping it accurate is cheap and high-value. + +- [ ] **Step 1: Edit the development environment block** + +In `CLAUDE.md` find the line: + +```markdown +- **Go**: v1.24.0+ (as specified in go.mod and Dockerfile) +``` + +Replace it with: + +```markdown +- **Go**: v1.26.0+ (as specified in go.mod and Dockerfile; toolchain pinned to go1.26.3) +``` + +- [ ] **Step 2: Verify no other version strings stale** + +Run: `grep -n "1\.24" CLAUDE.md` +Expected: no output. If any remain, evaluate each and update if it refers to the Go toolchain. + +- [ ] **Step 3: Commit** + +```bash +git add CLAUDE.md +git commit -m "docs(claude): update Go version requirement to 1.26" +``` + +--- + +## Task 5: Run the full verification gate + +**Files:** none modified (verification only) + +**Why this matters:** This is the contract from the parent spec (Section 4, Contract 3). Every gate must pass with no `.go` file having changed. Failure here means the toolchain bump triggered a real regression and the plan is not done. + +- [ ] **Step 1: Format & vet** + +Run: `make fmt` +Run: `git diff --exit-code` +Expected: exit 0. If `make fmt` rewrote files, the bump revealed a formatting drift — commit the formatting changes as a separate `style:` commit before proceeding. + +Run: `make vet` +Expected: exit 0, no warnings. + +- [ ] **Step 2: Confirm generated artifacts in sync** + +Run: `make manifests generate` +Run: `git diff --exit-code` +Expected: exit 0. If the diff is non-empty under `config/crd/` or `api/v1alpha1/zz_generated_*.go`, controller-gen behavior changed under the new toolchain — commit these regenerated files as a separate `chore: regenerate` commit. + +- [ ] **Step 3: Confirm mocks regenerate clean** + +Run: `go generate ./...` +Run: `git diff --exit-code` +Expected: exit 0. Any diff here is a signal that mockgen output shifted under Go 1.26 — that belongs in the mockgen-bump plan (PR #2 of the parent spec), NOT this plan. If a diff appears, **stop**, revert the mock files (`git checkout -- internal/handler/mocks/`), and document the issue at the bottom of this plan. + +- [ ] **Step 4: Lint** + +Run: `make lint` +Expected: exit 0. golangci-lint v1.64.8 supports Go 1.24 fully and 1.26 sufficiently for the linters we already enable. If it fails with "unsupported Go version", note the failure — the linter bump lives in a separate plan, but a hard failure here means we need to defer this PR until the linter bump lands. + +- [ ] **Step 5: Unit tests** + +Run: `make test 2>&1 | tail -30` +Expected: all packages PASS. Coverage report writes to `cover.out`. + +- [ ] **Step 6: Integration tests** + +Run: `make test-integration 2>&1 | tail -30` +Expected: envtest binaries download (first run only) and all integration tests PASS. + +- [ ] **Step 7: Build (sanity)** + +Run: `make build` +Expected: exit 0, `bin/manager` updated. + +Run: `./bin/manager --help 2>&1 | head -5` +Expected: prints flag help (e.g., `--metrics-bind-address`). No panic. + +- [ ] **Step 8: Commit any gate-required regenerations** + +If Steps 1, 2, or 3 produced auto-regenerated files that you committed mid-task, your branch has 4–6 small commits at this point. That's expected and correct — each commit is bisect-safe. + +If no regeneration was needed, your branch has exactly 3 commits (one per Tasks 2, 3, 4). Also correct. + +--- + +## Task 6: Final consistency check & branch summary + +**Files:** none modified + +- [ ] **Step 1: Confirm no stray `1.24` references in tracked files** + +Run: `git grep -n "1\.24" -- ':!docs/superpowers/specs/' ':!docs/superpowers/plans/'` +Expected: no output, OR only output that is clearly unrelated to the Go toolchain (e.g., a Kubernetes API version like `v1.24` in a comment about cluster compatibility — leave those alone). + +If any matches relate to the Go toolchain, fix them and amend the relevant commit. + +- [ ] **Step 2: Confirm `go.mod` final state** + +Run: `head -5 go.mod` +Expected: + +``` +module github.com/Azure/operation-cache-controller + +go 1.26.0 + +toolchain go1.26.3 +``` + +The `godebug default=go1.24` line MUST be gone. + +- [ ] **Step 3: Confirm Dockerfile final state** + +Run: `grep "^FROM" Dockerfile` +Expected: includes `FROM mcr.microsoft.com/oss/go/microsoft/golang:1.26 AS builder`. No `:1.24`. + +- [ ] **Step 4: Print branch commits for review** + +Run: `git log --oneline main..HEAD` +Expected: 3–6 small, focused commits. Each subject matches conventional-commit prefixes (`build:`, `docs:`, `style:`, `chore:`). + +- [ ] **Step 5: Final gate replay** + +Re-run the full gate one more time to confirm reproducibility: + +```bash +make fmt vet lint test test-integration build +``` + +Expected: all green. This is the same gate CI will run. + +--- + +## Self-Review (performed at write-time) + +**Spec coverage check (against `docs/superpowers/specs/2026-05-08-go126-modernization-design.md` Section 3, PR #1):** +- ✅ `go.mod`: `go 1.24.0` → `go 1.26.0` — Task 2 Step 1 +- ✅ Add `toolchain go1.26.0` — Task 2 Step 1 (using `go1.26.3`, the current latest, which satisfies "1.26.x" from the spec) +- ✅ Remove `godebug default=go1.24` — Task 2 Step 1 +- ✅ `Dockerfile`: bump `FROM golang:1.24` → `FROM golang:1.26` — Task 3 Step 1 +- ✅ `.github/workflows/*.yml`: bump `go-version` — N/A (workflows use `go-version-file: go.mod`, auto-tracks; verified in baseline scan) +- ✅ `Makefile`: if `GO_VERSION` variable exists, bump it — N/A (none exists; verified in baseline scan) +- ✅ `go.sum`: regenerate via `go mod tidy` — Task 2 Step 2 +- ✅ Verification gate `make build && make fmt vet test test-integration` — Task 5 + +**Placeholder scan:** No "TBD", "later", "appropriate", or "similar to" phrases. All commands and code blocks contain literal content. + +**Type/version consistency:** Every step that names a Go version uses `1.26.3` for the toolchain pin and `1.26.0` for the language minimum, consistently. Dockerfile uses the floating `:1.26` tag, matching the spec's intent of "same variant as today". + +--- + +## Notes for the executor + +- **If the host Go is older than 1.26.3:** Go's auto-toolchain mechanism downloads 1.26.3 on first build. This is normal. Look for `go: downloading go1.26.3` in build output. If you have `GOTOOLCHAIN=local` set in your shell, unset it before Task 2. +- **If `mcr.microsoft.com/.../golang:1.26` is not yet published:** fall back to `:1.26.3` (explicit patch tag). If neither exists, the bump is genuinely blocked on upstream image availability — note in the PR and pause. +- **Why `1.26.3` and not just `1.26`:** the spec says "1.26.0" but the current latest is 1.26.3 (per `go.dev/VERSION`). Using `1.26.3` is strictly newer, includes security patches, and satisfies the spec's intent. +- **What this plan does NOT do:** any source modernization. Do not "fix while you're in there." Subsequent plans will modernize mocks, deps, lint config, and code. diff --git a/docs/superpowers/specs/2026-05-08-go126-modernization-design.md b/docs/superpowers/specs/2026-05-08-go126-modernization-design.md new file mode 100644 index 0000000..d2d8c60 --- /dev/null +++ b/docs/superpowers/specs/2026-05-08-go126-modernization-design.md @@ -0,0 +1,419 @@ +# Go 1.26 Upgrade & Codebase Modernization — Design + +**Status:** Draft, awaiting user review +**Date:** 2026-05-08 +**Scope:** Operation Cache Controller (`github.com/Azure/operation-cache-controller`) +**Target Go version:** 1.26 +**Delivery:** 7 sequential, stacked PRs + +--- + +## 1. Problem & Constraints + +### Problem + +The Operation Cache Controller currently targets Go 1.24 and uses a mix of legacy and modern Go patterns. Over the last several Go releases (1.21–1.26) the language and standard library have grown substantial new features — `slices`/`maps`/`cmp` packages, range-over-int, type-safe atomics, `errors.Join`, `errors.AsType[T]`, `wg.Go`, `new(val)`, structured iterators, `t.Context()`, `omitzero`, `b.Loop()`, `SplitSeq`, and more — that the codebase doesn't take advantage of. This produces three concrete problems: + +1. **Code is harder to read and maintain than it needs to be** — hand-rolled loops, manual error wrapping, `interface{}` in mocks, and `for i := 0; i < n; i++` patterns add friction that modern equivalents eliminate. +2. **The codebase will drift further** — without a linter that enforces modern style, every new contribution risks reintroducing legacy patterns. +3. **The toolchain is one major version behind** — Go 1.26 is shipping, and pinning to 1.24 leaves performance and stdlib improvements on the table. + +### Goals + +- Bump the Go toolchain to **1.26** across `go.mod`, `Dockerfile`, and CI. +- Modernize hand-written code to idiomatic Go 1.26 (mechanical rewrites + judicious library swaps). +- Regenerate mocks via an upgraded `go.uber.org/mock` so generated code is also modern. +- Bump targeted direct dependencies that materially benefit modernization. +- Upgrade `golangci-lint` and enable modernization linters (`intrange`, `copyloopvar`, `usestdlibvars`, `perfsprint`) so the codebase stays modern. +- Ship as **7 sequential, reviewable PRs**, each independently buildable and bisect-safe. + +### Non-Goals (explicitly out of scope) + +- **No structural / architectural changes.** No splitting files, no reshaping handler interfaces, no reducing duplication across reconcilers, no API/CRD changes. +- **No behavioral changes.** Reconcile loops, finalizer logic, cache hit/miss flows, requeue intervals — all unchanged. +- **No transitive-dep cleanup.** Only direct deps are touched; `go mod tidy` runs but we don't chase indirect upgrades. +- **No Kubebuilder / controller-runtime major upgrade** unless Go 1.26 forces it. Major upgrades are their own project. +- **No new tests.** Existing tests must keep passing; no test-coverage push as part of this work. +- **No CRD / kubectl manifest restructuring.** `make manifests` runs, but we don't reorganize `config/`. + +### Constraints + +- **Compatibility:** All 4 reconcilers must continue to satisfy `controller-runtime`'s `Reconciler` interface unchanged. +- **Concurrency:** The 50–100 concurrent-reconciles configuration must not regress. +- **Test gates:** Each PR must pass `make fmt vet lint test test-integration` locally before merge. e2e validated by CI. +- **Code generation:** `make manifests generate` and `go generate ./...` must be re-runnable without producing dirty diffs after the work lands. +- **PR sequencing:** Only one PR in this stack open at any time; next PR opens only after the previous merges to `main`. +- **Bisect safety:** Every PR (and ideally every commit) leaves the project in a buildable, test-passing state. + +--- + +## 2. Architecture & PR Sequencing + +This isn't a runtime architecture (no new components are introduced). The "architecture" here is the **delivery architecture** — how the 7 PRs stack and what each one owns. + +### The 7-PR Stack + +``` + main + │ + ┌────────▼────────────────────────┐ +PR #1 │ Go 1.26 toolchain bump │ go.mod, Dockerfile, CI matrix + └────────┬────────────────────────┘ + │ + ┌────────▼────────────────────────┐ +PR #2 │ mockgen bump + regenerate │ internal/handler/mocks/*.go + └────────┬────────────────────────┘ + │ + ┌────────▼────────────────────────┐ +PR #3 │ Targeted direct dep upgrades │ go.mod, go.sum + └────────┬────────────────────────┘ + │ + ┌────────▼────────────────────────┐ +PR #4 │ golangci-lint bump + new linters│ .golangci.yml, Makefile + │ (suppressions added so green) │ + //nolint markers as needed + └────────┬────────────────────────┘ + │ + ┌────────▼────────────────────────┐ +PR #5 │ Mechanical modernization │ any, range-int, t.Context, + │ │ omitzero, b.Loop, SplitSeq + └────────┬────────────────────────┘ + │ + ┌────────▼────────────────────────┐ +PR #6 │ Idiomatic library swaps │ slices, maps, cmp, errors.Join, + │ │ sync.OnceValue, atomic.Bool + └────────┬────────────────────────┘ + │ + ┌────────▼────────────────────────┐ +PR #7 │ Go 1.26-specific features │ wg.Go, new(val), errors.AsType[T] + │ + remove all //nolint markers │ cleanup of PR #4's suppressions + └────────┬────────────────────────┘ + ▼ + main (fully modernized, lint-clean, no suppressions) +``` + +### What each PR owns + +| PR | Owns | Doesn't touch | +|---|---|---| +| **#1** Go bump | `go.mod` (`go 1.26`, `toolchain go1.26.0`), `Dockerfile` (base image), `.github/workflows/*.yml` (Go matrix), `go.sum` if needed for transitive forced bumps | Any `.go` file content | +| **#2** mockgen | `go.mod`/`go.sum` for `go.uber.org/mock`, regenerated `internal/handler/mocks/*.go` | Any hand-written `.go` | +| **#3** Direct deps | `go.mod`/`go.sum` for explicitly chosen direct deps | Any `.go` file content | +| **#4** Lint config | `.golangci.yml`, possibly `Makefile` (lint version pin), targeted `//nolint:linter` directives in files that will be fixed in #5–#7 | Logic/style fixes — only suppressions | +| **#5** Mechanical refactor | Hand-written `.go` files: `interface{}`→`any`, `for i := 0; i < n; i++`→`for i := range n`, test contexts→`t.Context()`, JSON tags `omitempty`→`omitzero` where appropriate, benchmarks→`b.Loop()`, split-iteration→`SplitSeq` | Library swaps; semantic changes | +| **#6** Library swaps | Hand-written `.go` files: hand-rolled loops→`slices.*`/`maps.*`, multi-error returns→`errors.Join`, lazy init→`sync.OnceValue`, primitive atomics→`atomic.Bool`/etc., zero-value defaults→`cmp.Or` | 1.26-only features (saved for #7) | +| **#7** Go 1.26 features | `sync.WaitGroup` use→`wg.Go`, `x := v; &x`→`new(v)`, `errors.As(err, &target)`→`errors.AsType[T]`, removal of all `//nolint` markers from PR #4 | New functionality | + +### Critical sequencing rules + +- **PR #2 depends on PR #1** because mockgen output may differ under the new toolchain. +- **PR #4 depends on PR #1** because some new linters require Go 1.26 awareness. +- **PRs #5–#7 depend on PR #4** because the linter is what tells us *what* to modernize and *where*. +- **PR #7 must come last** because reverting it preserves a fully-modernized state minus the 1.26-specific syntax — useful if a 1.26 dep regression is discovered post-merge. +- **Only one PR open at a time** to keep rebase cost zero. + +### Suppression strategy in PR #4 + +When PR #4 lands, lint must stay green even though no code has been modernized yet. Strategy: + +1. Enable each new linter at `severity: error`. +2. Run lint, capture every violation. +3. Add file-level `//nolint:intrange,copyloopvar,usestdlibvars,perfsprint` directives to every flagged file, with a TODO comment: `// TODO(modernization): remove after PR #5/#6/#7`. +4. Each subsequent PR removes the suppressions for the linters it addresses. + +This guarantees PR #4 is mergeable, makes the modernization debt visible (grep-able), and forces #5–#7 to actually clean it up. + +--- + +## 3. Per-PR Mechanics & Change Flow + +For a refactor, "data flow" is really *change flow* — what concrete edits happen in each PR, how they're discovered, and how they're verified. + +### PR #1 — Go 1.26 toolchain bump + +**Discovery:** None needed; the change set is fixed. + +**Edits:** +- `go.mod`: `go 1.24.0` → `go 1.26.0`; add `toolchain go1.26.0`; remove the `godebug default=go1.24` line. +- `Dockerfile`: bump `FROM golang:1.24` → `FROM golang:1.26` (same Alpine/Debian variant as today). +- `.github/workflows/*.yml`: bump `go-version` (and matrix entries if any) to `1.26.x`. +- `Makefile`: if there's a `GO_VERSION` variable, bump it. +- `go.sum`: regenerate via `go mod tidy` if any transitive bumps are forced. + +**Verification gate:** `make build && make fmt vet test test-integration`. No code changes, so all must pass unchanged. + +**Rollback:** Single-PR revert restores Go 1.24. + +### PR #2 — mockgen bump + regenerate + +**Discovery:** +- Read current `go.uber.org/mock` version in `go.mod`. +- Check the latest stable release. + +**Edits:** +- `go.mod`/`go.sum`: bump `go.uber.org/mock` to latest stable. +- Run `go install go.uber.org/mock/mockgen@` and `go generate ./...`. +- Commit only the regenerated files in `internal/handler/mocks/*.go` (4 files: `mock_requirement.go`, `mock_operation.go`, `mock_cache.go`, `mock_appdeployment.go`). + +**Verification gate:** `make test test-integration` — mocks must still satisfy the handler interfaces and existing tests must pass without edits. + +**Rollback:** Single revert. Mocks regenerate from old version. + +### PR #3 — Targeted direct dependency upgrades + +**Discovery process:** +1. `go list -m -u all` → list all direct deps with available updates. +2. For each direct dep, check whether the available update has a Go 1.25+ or 1.26 awareness note in its changelog. +3. **Skip** any dep whose update would be a major-version bump (those belong in their own PRs). + +**Concrete candidates** (final list determined during execution): +- `sigs.k8s.io/controller-runtime` — minor bumps only. +- `github.com/onsi/ginkgo/v2`, `github.com/onsi/gomega` — modernization-friendly minor bumps. +- `k8s.io/api`, `k8s.io/apimachinery`, `k8s.io/client-go` — patch/minor bumps if available without forcing K8s major-version churn. +- `golang.org/x/*` — patch bumps. + +**Explicitly deferred:** +- Any dep requiring a major-version bump. +- `controller-gen` (lives in tools, separate concern). +- `kustomize` (build tool, not a Go import). + +**Edits:** `go.mod`, `go.sum` only. + +**Verification gate:** `make build fmt vet lint test test-integration`. Critical — this is where dep-induced regressions surface. + +**Rollback:** Single revert. + +### PR #4 — golangci-lint bump + new linters + suppressions + +**Discovery:** +1. Bump `golangci-lint` version in `Makefile`. +2. Edit `.golangci.yml` to add: `intrange`, `copyloopvar`, `usestdlibvars`, `perfsprint`. +3. Run `make lint` — capture every violation. +4. For each flagged file, prepend a file-level `//nolint: // TODO(modernization): remove after PR #5/#6/#7` directive. + +**Edits:** +- `Makefile`: pin new `GOLANGCI_LINT_VERSION`. +- `.golangci.yml`: add 4 linters under `linters.enable`. +- Hand-written `.go` files: insert file-level `//nolint` markers in flagged files only. **No logic edits.** + +**Verification gate:** `make lint test test-integration` — must be green with suppressions in place. + +**The TODO marker pattern:** + +```go +//nolint:intrange,perfsprint // TODO(modernization): remove after PR #5 +package controller +``` + +This makes the remaining debt grep-able: `grep -rn "TODO(modernization)" .` produces the worklist for PRs #5–#7. + +### PR #5 — Mechanical modernization + +**Discovery:** `grep -rn "TODO(modernization).*PR #5" .` + linter output for `intrange`, `copyloopvar`, `usestdlibvars`. + +**Per-pattern change list:** + +| Pattern | Find | Replace | Notes | +|---|---|---|---| +| `interface{}` | `interface{}` | `any` | Already mostly clean; verify only 2 files affected. | +| Index loops | `for i := 0; i < n; i++` | `for i := range n` | Only when `i` is unused after the loop or used purely as an index. | +| Test contexts | `ctx, cancel := context.WithCancel(context.Background())` in test funcs | `ctx := t.Context()` | Drop the matching `defer cancel()`. | +| JSON tags | `omitempty` on `time.Duration`, `time.Time`, structs, slices, maps | `omitzero` | **Only** for these types; leave `omitempty` on strings/ints/bools. | +| Benchmarks | `for i := 0; i < b.N; i++` | `for b.Loop()` | Project may have zero benchmarks; if so, no-op. | +| Split iteration | `for _, x := range strings.Split(s, sep)` | `for x := range strings.SplitSeq(s, sep)` | Only in for-range; not when the slice is otherwise used. | +| Loop variable capture | `x := x` shadow inside loops | Remove the shadow | `copyloopvar` lint will flag. | + +**Per-file workflow:** +1. Apply the mechanical edits. +2. Remove the relevant `//nolint` directives from PR #4. +3. Re-run `make lint test test-integration` per file batch. + +**Verification gate:** Full `make fmt vet lint test test-integration`. No `intrange`/`copyloopvar`/`usestdlibvars` suppressions remain. + +### PR #6 — Idiomatic library swaps + +**Discovery:** Manual reading + `perfsprint` lint output + targeted `grep`s: +- `grep -rn "for .* range" --include="*.go"` for `slices.Contains`, `slices.IndexFunc`, etc. +- `grep -rn "fmt.Errorf" --include="*.go"` for `errors.Join`. +- `grep -rn "sync.Once" --include="*.go"` for `sync.OnceValue`. +- `grep -rn "atomic.Store\|atomic.Load" --include="*.go"` for `atomic.Bool`/`atomic.Int64`. + +**Per-pattern change list (apply with judgment, not mechanically):** + +| Replace | With | When | +|---|---|---| +| Manual contains loop | `slices.Contains` | Loop's only purpose is membership test | +| Manual index search | `slices.Index` / `slices.IndexFunc` | Loop returns first match | +| Manual sort wrappers | `slices.SortFunc` + `cmp.Compare` | Custom comparators | +| Manual map clone | `maps.Clone` | Whole-map copy with no transformation | +| Manual map filter delete | `maps.DeleteFunc` | Conditional deletion | +| `if x == ""; x = "default"` | `cmp.Or(x, "default")` | Zero-value fallback chains | +| `fmt.Errorf("a: %w; b: %w", e1, e2)` | `errors.Join(e1, e2)` | Combining errors | +| `sync.Once` + value var | `sync.OnceValue` | Lazy single-value init | +| `atomic.StoreInt32` family | `atomic.Bool`/`atomic.Int64`/`atomic.Pointer[T]` | Type-safe equivalent exists | +| `time.Now().Sub(x)` | `time.Since(x)` | Already idiomatic; sweep for stragglers | + +**Verification gate:** Full `make fmt vet lint test test-integration`. `perfsprint` suppressions removed. + +### PR #7 — Go 1.26-specific features + suppression cleanup + +**Discovery:** Manual reading + `grep`s: +- `grep -rn "wg.Add\|sync.WaitGroup" --include="*.go"` for `wg.Go` candidates. +- `grep -rn "errors.As(" --include="*.go"` for `errors.AsType[T]` candidates. +- Hand-search for `x := val; ptr = &x` patterns that should become `new(val)`. + +**Per-pattern change list:** + +| Replace | With | +|---|---| +| `wg.Add(1); go func(){ defer wg.Done(); ... }()` | `wg.Go(func(){ ... })` | +| `x := val; ptr = &x` (struct field init pattern) | `ptr = new(val)` | +| `var t *T; if errors.As(err, &t) { ... }` | `if t, ok := errors.AsType[*T](err); ok { ... }` | + +**Final cleanup:** +- `grep -rn "TODO(modernization)" .` must return zero results before this PR is opened for review. +- All `//nolint` markers added in PR #4 must be gone. + +**Verification gate:** Full `make fmt vet lint test test-integration` + manual confirmation of the grep checks above. + +--- + +## 4. Interfaces, Contracts, and Verification + +### Contract 1 — Public Go API surface + +**Rule:** No exported identifier in `api/v1alpha1/` or `internal/` may change name, signature, or visibility across the entire 7-PR stack. + +**Verification:** +- `go vet ./...` after each PR. +- Manual diff check: `git diff main -- 'api/**/*.go' 'internal/handler/*.go' | grep -E '^[+-](func|type|var|const) [A-Z]'`. If anything appears, justify or revert. +- Mocks regenerate cleanly in PR #2 — if they don't, an interface changed. + +**Allowed exception:** PR #6 may swap a return value from a custom error to `errors.Join(...)` — this is a value-level change, not a signature change. + +### Contract 2 — Operator runtime behavior + +**Rule:** No reconciler logic, requeue interval, finalizer behavior, owner reference structure, label/annotation key, or status transition may change. + +| Frozen behavior | Source | +|---|---| +| Requeue intervals (10s default, 10m requirement, 60s cache) | `internal/utils/reconciler/` and per-controller | +| Finalizer strings | `internal/log/` constants and per-controller | +| Label/annotation keys (`cache-key`, `acquired`, `cache-mode`) | per-CRD type files | +| Concurrency settings (50–100 concurrent reconciles) | controller `SetupWithManager` | +| Owner reference graph (Requirement→Operation→AppDeployment→Job; Cache→Operation) | handler implementations | +| Cache hit/miss flow (acquisition, pre-provisioning) | `RequirementHandler`, `CacheHandler` | + +**Verification:** +- `make test test-integration` passes unchanged. +- Spot-check: `git diff main -- 'internal/controller/*.go' | grep -E 'RequeueAfter|Finalizer|owner'`. Hits require explicit justification. +- e2e on CI must remain green after each PR. + +### Contract 3 — Developer build & test commands + +**Rule:** Every existing `make` target keeps working with identical semantics. + +**Verification gate (per PR), executed in order:** + +```bash +# 1. Format & vet +make fmt && git diff --exit-code +make vet + +# 2. Generated artifacts in sync +make manifests generate +git diff --exit-code + +# 3. Mocks regenerate clean +go generate ./... +git diff --exit-code + +# 4. Lint +make lint + +# 5. Tests +make test +make test-integration + +# 6. Build +make build +make docker-build IMG=local-test:dev +``` + +Any of steps 1–6 failing blocks PR merge. + +### Contract 4 — Per-PR completion criteria + +| PR | Exit criteria | +|---|---| +| **#1** | `go.mod` shows `go 1.26.0`. Dockerfile/CI use Go 1.26. All 6 verification steps pass. No `.go` file under `internal/` or `api/` has changed. | +| **#2** | `go.uber.org/mock` bumped. Only `internal/handler/mocks/*.go` changed. All 6 verification steps pass. `go generate ./...` produces clean diff. | +| **#3** | Only `go.mod`/`go.sum` changed. No major-version bumps. All 6 verification steps pass. | +| **#4** | New linters enabled in `.golangci.yml`. `golangci-lint --version` reports the new pinned version. `make lint` is green. Every flagged file has a `//nolint: // TODO(modernization): remove after PR #N` directive. `grep -rn "TODO(modernization)" . \| wc -l` produces a non-zero count. | +| **#5** | All `intrange`, `copyloopvar`, `usestdlibvars` suppressions added in PR #4 are removed. `grep -rn "TODO(modernization).*PR #5" . \| wc -l == 0`. All 6 verification steps pass. | +| **#6** | All `perfsprint` suppressions removed. `grep -rn "TODO(modernization).*PR #6" . \| wc -l == 0`. All 6 verification steps pass. | +| **#7** | `grep -rn "TODO(modernization)" . \| wc -l == 0`. `grep -rn "//nolint:intrange\|//nolint:copyloopvar\|//nolint:usestdlibvars\|//nolint:perfsprint" .` returns nothing. All 6 verification steps pass. | + +### Contract 5 — Bisect safety + +**Rule:** `git bisect` between any two commits in the merged history must work — every merge commit on `main` must be buildable and test-passing. + +**PR squash policy:** Squash on merge is fine for all PRs in this stack. Commit-level bisect within a PR is not required since the PR-level bisect is sufficient. + +--- + +## 5. Risks, Rollback, and Open Questions + +### Risks + +| # | Risk | Likelihood | Impact | Mitigation | +|---|---|---|---|---| +| **R1** | A direct dep in PR #3 has a hidden behavior change (e.g., controller-runtime patch tightens validation) | Medium | High — could break reconcile loops | Run `make test-integration` and `make test-e2e` on CI for PR #3. Roll back individual deps via `go mod edit -replace` if needed. | +| **R2** | A linter in PR #4 produces thousands of false positives | Medium | Medium — bloats PR #4 | Cap at the 4 named linters. If suppression count exceeds ~50 files, defer one of the linters to a follow-up PR. | +| **R3** | mockgen regeneration produces mocks incompatible with current Ginkgo setup | Low | High — blocks PR #2 | Pin to a known-good mockgen version. Test locally before opening PR. | +| **R4** | Go 1.26 stdlib behavior change breaks an assumption in handler logic | Low | Medium | Full `make test test-integration` must pass on PR #1. e2e on CI catches anything tests miss. | +| **R5** | Reviewer demands structural changes during PRs #5–#7, pulling scope back in | Medium | Medium — derails schedule | Explicit non-goals in Section 1. Refer reviewer to spec. Open follow-up issue rather than expanding PR. | +| **R6** | Stacked-PR rebases cause merge conflicts when an unrelated PR lands on `main` | Medium | Low | Only one stack PR open at a time. Conflicts will be tiny because each PR has tightly-scoped diff. | +| **R7** | `omitzero` migration in PR #5 changes JSON serialization in a way that breaks an external CRD-status consumer | Low | High — silent data drift | `omitzero` is *more* correct than `omitempty` for `time.Duration`/`time.Time`. Verify CRD status fixtures in tests. If a fixture depends on old `omitempty` behavior, treat as a real bug to fix. | +| **R8** | `errors.AsType[T]` doesn't behave identically to `errors.As(&target)` in some edge case | Low | Medium | Limit usage to patterns shown in `use-modern-go` skill. For unusual error-handling sites, leave `errors.As` in place. | +| **R9** | CI Docker image cache misses on Go 1.26 image | High | Low — annoyance only | Pre-warm the cache by pushing a no-op commit on a throwaway branch after PR #1 merges. | +| **R10** | A `//nolint` directive in PR #4 hides a real bug | Low | Medium | Suppressions are file-scoped and time-limited (removed in #5–#7). Bugs hidden by suppressions surface as suppressions come off. | + +### Rollback strategy + +| Scenario | Response | +|---|---| +| Production incident traced to a specific PR | `git revert `. No cascade — every PR's contract guarantees buildable state on its own. | +| Production incident, unclear which PR is to blame | Bisect by PR boundaries (7 candidates max). | +| Need to revert the entire modernization | Revert PRs in reverse order (#7, then #6, etc.). | +| Regret a single mid-stack feature | Revert just that PR; PRs after it may need a small rebase. | + +**Hard rule:** No PR in this stack may merge if the previous PR isn't already on `main`. + +### Open Questions (deferred to execution) + +| # | Question | When answered | Default if unresolved | +|---|---|---|---| +| **Q1** | Exact list of direct deps to bump in PR #3 | Beginning of PR #3 (`go list -m -u all`) | Patch updates only; defer minor bumps. | +| **Q2** | Specific `golangci-lint` version to pin in PR #4 | Beginning of PR #4 | Latest stable as of PR open date. | +| **Q3** | Specific `go.uber.org/mock` version to pin in PR #2 | Beginning of PR #2 | Latest stable, validated locally. | +| **Q4** | Are there any benchmarks in the codebase? | First file scan of PR #5 | If zero, omit `b.Loop()` from PR #5. | +| **Q5** | Does the project have a `tools.go` pinning developer tools? | PR #2 | If yes, version bumps go there. If no, ad-hoc `go install` is fine. | +| **Q6** | Does CI use a build matrix? | PR #1 | Single-version matrix is the safe default. | +| **Q7** | Are all 4 modernization linters supported by the chosen `golangci-lint` version? | PR #4 | Drop any unsupported linter; don't downgrade golangci-lint. | + +### Success criteria for the whole effort + +When all 7 PRs are merged and `main` is fully modernized, these statements must all be true: + +- [ ] `go.mod` declares `go 1.26.0` and `toolchain go1.26.0`. +- [ ] `Dockerfile` uses a Go 1.26 base image. +- [ ] CI runs on Go 1.26. +- [ ] `make fmt vet lint test test-integration` is green on `main`. +- [ ] `grep -rn "interface{}" --include="*.go"` returns zero hand-written hits. +- [ ] `grep -rn "TODO(modernization)" .` returns zero hits. +- [ ] No `//nolint` markers added by PR #4 remain. +- [ ] `.golangci.yml` enables `intrange`, `copyloopvar`, `usestdlibvars`, `perfsprint`. +- [ ] `go generate ./...` and `make manifests generate` produce zero diff on a clean checkout. +- [ ] e2e CI on `main` is green. +- [ ] No exported identifier in `api/v1alpha1/` or `internal/handler/` interfaces changed. +- [ ] No reconciler logic, requeue interval, finalizer, or owner-reference structure changed. diff --git a/go.mod b/go.mod index 090d523..66527a7 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,8 @@ module github.com/Azure/operation-cache-controller -go 1.24.0 +go 1.26.0 -godebug default=go1.24 +toolchain go1.26.3 require ( github.com/go-logr/logr v1.4.2