From a515f7f96f0cefe65a9db3a0973058835c387a50 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 14:13:23 +0800 Subject: [PATCH 01/33] feat(netresize): extend interface + chVMInfoConfig.Nets --- extend/netresize/netresize.go | 48 +++++++++++++++++++++++++++++++ hypervisor/cloudhypervisor/api.go | 1 + 2 files changed, 49 insertions(+) create mode 100644 extend/netresize/netresize.go diff --git a/extend/netresize/netresize.go b/extend/netresize/netresize.go new file mode 100644 index 00000000..735d3efb --- /dev/null +++ b/extend/netresize/netresize.go @@ -0,0 +1,48 @@ +// Package netresize is the runtime interface for resizing a VM's NIC count. +// Cocoon never touches the guest — the user must quiesce in-guest NIC state +// (driver unbind / NetworkManager / NDIS) before reducing the count. +package netresize + +import ( + "context" + "errors" + "fmt" +) + +var ErrUnsupportedBackend = errors.New("backend does not support net resize") + +// Spec is one resize request. +type Spec struct { + Target int + KeepHostOnRemove bool +} + +// NIC is one NIC's summary, returned in Result.Added / Result.Removed. +type NIC struct { + Index int `json:"index"` + TAP string `json:"tap"` + MAC string `json:"mac"` +} + +// Result reports the before/after count and the NICs touched by the call. +type Result struct { + Before int `json:"before"` + After int `json:"after"` + Added []NIC `json:"added,omitempty"` + Removed []NIC `json:"removed,omitempty"` +} + +// Resizer resizes the NIC count on a running VM. +type Resizer interface { + NetResize(ctx context.Context, vmRef string, spec Spec) (Result, error) +} + +// Normalize validates the spec. Returns an error if Target is negative. +// Named for parity with extend/fs.Spec.Normalize even though no defaulting +// is needed here. +func (s *Spec) Normalize() error { + if s.Target < 0 { + return fmt.Errorf("--nics must be non-negative, got %d", s.Target) + } + return nil +} diff --git a/hypervisor/cloudhypervisor/api.go b/hypervisor/cloudhypervisor/api.go index 8a997353..9703ee44 100644 --- a/hypervisor/cloudhypervisor/api.go +++ b/hypervisor/cloudhypervisor/api.go @@ -119,4 +119,5 @@ type chVMInfoConfig struct { Memory chMemory `json:"memory"` Fs []chFs `json:"fs,omitempty"` Devices []chDevice `json:"devices,omitempty"` + Nets []chNet `json:"net,omitempty"` } From 100a08c5775db711ad552ca7c33d9af6f0c77df1 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 14:22:11 +0800 Subject: [PATCH 02/33] refactor(network): replace Network.Config with Add/Remove Add takes variadic AddSpec (per-NIC index + optional Existing for recovery). Remove tears down individual NIC plumbing by index, preserves netns. Extracted cniDel helper shared by Add (rollback+recovery), Remove, delNICs. --- cmd/vm/lifecycle.go | 2 +- cmd/vm/run.go | 2 +- network/bridge/bridge_linux.go | 63 +++++++----- network/bridge/bridge_other.go | 8 +- network/cni/cni.go | 7 +- network/cni/create.go | 169 ++++++++++++++++++++++++--------- network/cni/create_darwin.go | 4 + network/cni/create_linux.go | 11 +++ network/network.go | 41 ++++++-- 9 files changed, 217 insertions(+), 90 deletions(-) diff --git a/cmd/vm/lifecycle.go b/cmd/vm/lifecycle.go index f1a52bcb..185fdc4b 100644 --- a/cmd/vm/lifecycle.go +++ b/cmd/vm/lifecycle.go @@ -386,7 +386,7 @@ func (h Handler) recoverNetwork(ctx context.Context, conf *config.Config, hyper continue } logger.Warnf(ctx, "network missing for VM %s, recovering", vm.ID) - if _, recoverErr := netProvider.Config(ctx, vm.ID, len(vm.NetworkConfigs), &vm.Config, vm.NetworkConfigs...); recoverErr != nil { + if _, recoverErr := netProvider.Add(ctx, vm.ID, &vm.Config, network.AddRecover(vm.NetworkConfigs)...); recoverErr != nil { logger.Warnf(ctx, "recover network for VM %s: %v (start will fail)", vm.ID, recoverErr) } } diff --git a/cmd/vm/run.go b/cmd/vm/run.go index b9419fa9..8c4b955d 100644 --- a/cmd/vm/run.go +++ b/cmd/vm/run.go @@ -480,7 +480,7 @@ func initNetwork(ctx context.Context, conf *config.Config, vmID string, nics int // The network layer derives TAP queues from vmCfg.CPU. origCPU := vmCfg.CPU vmCfg.CPU = queues - configs, err := netProvider.Config(ctx, vmID, nics, vmCfg) + configs, err := netProvider.Add(ctx, vmID, vmCfg, network.AddRange(0, nics)...) vmCfg.CPU = origCPU if err != nil { return nil, nil, fmt.Errorf("configure network: %w", err) diff --git a/network/bridge/bridge_linux.go b/network/bridge/bridge_linux.go index 15ca556e..7998f675 100644 --- a/network/bridge/bridge_linux.go +++ b/network/bridge/bridge_linux.go @@ -67,53 +67,49 @@ func (b *Bridge) Verify(_ context.Context, vmID string) error { return nil } -// Config creates TAP devices and adds them to the bridge. -func (b *Bridge) Config(ctx context.Context, vmID string, numNICs int, vmCfg *types.VMConfig, existing ...*types.NetworkConfig) ([]*types.NetworkConfig, error) { - logger := log.WithFunc("bridge.Config") +// Add allocates TAP devices on the bridge for the given specs. +func (b *Bridge) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs ...network.AddSpec) ([]*types.NetworkConfig, error) { + if len(specs) == 0 { + return nil, nil + } + logger := log.WithFunc("bridge.Add") br, err := netlink.LinkByIndex(b.bridgeIdx) if err != nil { return nil, fmt.Errorf("find bridge: %w", err) } - var configs []*types.NetworkConfig - for i := range numNICs { - name := tapName(vmID, i) - - var mac string - if i < len(existing) && existing[i] != nil { - mac = existing[i].MAC - } else { - mac = generateMAC() + configs := make([]*types.NetworkConfig, 0, len(specs)) + for _, spec := range specs { + name := tapName(vmID, spec.Index) + mac := generateMAC() + if spec.Existing != nil { + mac = spec.Existing.MAC } - queues := network.NetNumQueues(vmCfg.CPU) - if err := createTAP(name, queues); err != nil { - return nil, fmt.Errorf("create tap %s: %w", name, err) + if cErr := createTAP(name, queues); cErr != nil { + return nil, fmt.Errorf("create tap %s: %w", name, cErr) } - tap, err := netlink.LinkByName(name) - if err != nil { - return nil, fmt.Errorf("find tap %s: %w", name, err) + tap, lErr := netlink.LinkByName(name) + if lErr != nil { + return nil, fmt.Errorf("find tap %s: %w", name, lErr) } - if err := netlink.LinkSetMaster(tap, br); err != nil { + if mErr := netlink.LinkSetMaster(tap, br); mErr != nil { _ = netlink.LinkDel(tap) - return nil, fmt.Errorf("add %s to %s: %w", name, b.bridgeDev, err) + return nil, fmt.Errorf("add %s to %s: %w", name, b.bridgeDev, mErr) } - // Disable FDB source MAC learning per-packet overhead. _ = netlink.LinkSetLearning(tap, false) - if mtu := br.Attrs().MTU; mtu > 0 { _ = netlink.LinkSetMTU(tap, mtu) } - _ = network.TuneTAP(tap) - if err := netlink.LinkSetUp(tap); err != nil { + if uErr := netlink.LinkSetUp(tap); uErr != nil { _ = netlink.LinkDel(tap) - return nil, fmt.Errorf("set %s up: %w", name, err) + return nil, fmt.Errorf("set %s up: %w", name, uErr) } configs = append(configs, &types.NetworkConfig{ @@ -124,11 +120,26 @@ func (b *Bridge) Config(ctx context.Context, vmID string, numNICs int, vmCfg *ty Backend: typ, BridgeDev: b.bridgeDev, }) - logger.Debugf(ctx, "NIC %d: tap=%s mac=%s bridge=%s", i, name, mac, b.bridgeDev) + logger.Debugf(ctx, "NIC %d: tap=%s mac=%s bridge=%s", spec.Index, name, mac, b.bridgeDev) } return configs, nil } +// Remove deletes the TAP devices for the given indices. +func (b *Bridge) Remove(_ context.Context, vmID string, indices ...int) error { + for _, i := range indices { + name := tapName(vmID, i) + link, err := netlink.LinkByName(name) + if err != nil { + return fmt.Errorf("find tap %s: %w", name, err) + } + if err := netlink.LinkDel(link); err != nil { + return fmt.Errorf("delete tap %s: %w", name, err) + } + } + return nil +} + // Delete removes TAP devices for the given VMs. func (b *Bridge) Delete(_ context.Context, vmIDs []string) ([]string, error) { return CleanupTAPs(vmIDs), nil diff --git a/network/bridge/bridge_other.go b/network/bridge/bridge_other.go index fb4ba4ec..031cb2e6 100644 --- a/network/bridge/bridge_other.go +++ b/network/bridge/bridge_other.go @@ -9,6 +9,7 @@ import ( "github.com/cocoonstack/cocoon/config" "github.com/cocoonstack/cocoon/gc" + "github.com/cocoonstack/cocoon/network" "github.com/cocoonstack/cocoon/types" ) @@ -28,11 +29,14 @@ func (b *Bridge) Type() string { return "bridge" } // Verify is not supported. func (b *Bridge) Verify(_ context.Context, _ string) error { return errUnsupported } -// Config is not supported. -func (b *Bridge) Config(_ context.Context, _ string, _ int, _ *types.VMConfig, _ ...*types.NetworkConfig) ([]*types.NetworkConfig, error) { +// Add is not supported. +func (b *Bridge) Add(_ context.Context, _ string, _ *types.VMConfig, _ ...network.AddSpec) ([]*types.NetworkConfig, error) { return nil, errUnsupported } +// Remove is not supported. +func (b *Bridge) Remove(_ context.Context, _ string, _ ...int) error { return errUnsupported } + // Delete is not supported. func (b *Bridge) Delete(_ context.Context, _ []string) ([]string, error) { return nil, errUnsupported } diff --git a/network/cni/cni.go b/network/cni/cni.go index bcf4cd7e..2a4775a9 100644 --- a/network/cni/cni.go +++ b/network/cni/cni.go @@ -180,12 +180,7 @@ func (c *CNI) delNICs(ctx context.Context, vmID, nsPath string, records []networ logger.Warnf(ctx, "conflist %q not found for CNI DEL %s/%s: %v", rec.Type, vmID, rec.IfName, err) continue } - rt := &libcni.RuntimeConf{ - ContainerID: vmID, - NetNS: nsPath, - IfName: rec.IfName, - } - if err := c.cniConf.DelNetworkList(ctx, cl, rt); err != nil { + if err := c.cniDel(ctx, cl, vmID, nsPath, rec.IfName); err != nil { logger.Warnf(ctx, "CNI DEL %s/%s: %v", vmID, rec.IfName, err) } } diff --git a/network/cni/create.go b/network/cni/create.go index 34d7cc87..06fc6962 100644 --- a/network/cni/create.go +++ b/network/cni/create.go @@ -2,7 +2,10 @@ package cni import ( "context" + "errors" "fmt" + "io/fs" + "os" "github.com/containernetworking/cni/libcni" cnitypes "github.com/containernetworking/cni/pkg/types" @@ -14,83 +17,85 @@ import ( "github.com/cocoonstack/cocoon/utils" ) -// Config creates the netns, runs CNI ADD, and wires each NIC to a tap via TC. -func (c *CNI) Config(ctx context.Context, vmID string, numNICs int, vmCfg *types.VMConfig, existing ...*types.NetworkConfig) (configs []*types.NetworkConfig, retErr error) { +// Add creates the netns (if absent) and allocates each NIC's CNI plumbing. +func (c *CNI) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs ...network.AddSpec) (configs []*types.NetworkConfig, retErr error) { if c.cniConf == nil { return nil, fmt.Errorf("%w: no conflist found in %s", network.ErrNotConfigured, c.conf.CNIConfDir) } + if len(specs) == 0 { + return nil, nil + } confList, err := c.confListByName(vmCfg.Network) if err != nil { return nil, err } vmCfg.Network = confList.Name - logger := log.WithFunc("cni.Config") + logger := log.WithFunc("cni.Add") nsName := netnsName(vmID) nsPath := netnsPath(vmID) - if err := createNetns(nsName); err != nil { - return nil, fmt.Errorf("create netns %s: %w", nsName, err) + createdNetns, err := ensureNetns(nsName, nsPath) + if err != nil { + return nil, fmt.Errorf("ensure netns %s: %w", nsName, err) } - var addedIFs []string + addedIFs := make([]string, 0, len(specs)) defer func() { if retErr == nil { return } for _, ifn := range addedIFs { - rt := &libcni.RuntimeConf{ - ContainerID: vmID, - NetNS: nsPath, - IfName: ifn, - } - if delErr := c.cniConf.DelNetworkList(ctx, confList, rt); delErr != nil { + if delErr := c.cniDel(ctx, confList, vmID, nsPath, ifn); delErr != nil { logger.Warnf(ctx, "rollback CNI DEL %s/%s: %v", vmID, ifn, delErr) } } - _ = deleteNetns(ctx, nsName) - }() - - for i := range numNICs { - ifName := fmt.Sprintf("eth%d", i) - tapName := tapNameForVM(vmID, i) - - rt := &libcni.RuntimeConf{ - ContainerID: vmID, - NetNS: nsPath, - IfName: ifName, + if createdNetns { + _ = deleteNetns(ctx, nsName) } + }() - if i < len(existing) && existing[i] != nil { - if delErr := c.cniConf.DelNetworkList(ctx, confList, rt); delErr != nil { + type freshNIC struct { + index int + cfg *types.NetworkConfig + } + configs = make([]*types.NetworkConfig, 0, len(specs)) + fresh := make([]freshNIC, 0, len(specs)) + for _, spec := range specs { + ifName := fmt.Sprintf("eth%d", spec.Index) + tapName := tapNameForVM(vmID, spec.Index) + + rt := &libcni.RuntimeConf{ContainerID: vmID, NetNS: nsPath, IfName: ifName} + if spec.Existing != nil { + if delErr := c.cniDel(ctx, confList, vmID, nsPath, ifName); delErr != nil { logger.Warnf(ctx, "pre-recovery CNI DEL %s/%s: %v (continuing)", vmID, ifName, delErr) } - if existing[i].Network != nil && existing[i].Network.IP != "" { - rt.Args = [][2]string{{"IgnoreUnknown", "1"}, {"IP", existing[i].Network.IP}} + if spec.Existing.Network != nil && spec.Existing.Network.IP != "" { + rt.Args = [][2]string{{"IgnoreUnknown", "1"}, {"IP", spec.Existing.Network.IP}} } } - cniResult, err := c.cniConf.AddNetworkList(ctx, confList, rt) - if err != nil { - return nil, fmt.Errorf("cni add %s/%s: %w", vmID, ifName, err) + cniResult, addErr := c.cniConf.AddNetworkList(ctx, confList, rt) + if addErr != nil { + return nil, fmt.Errorf("cni add %s/%s: %w", vmID, ifName, addErr) } addedIFs = append(addedIFs, ifName) - netInfo, err := extractNetworkInfo(cniResult) - if err != nil { - return nil, fmt.Errorf("parse CNI result: %w", err) + netInfo, parseErr := extractNetworkInfo(cniResult) + if parseErr != nil { + return nil, fmt.Errorf("parse CNI result: %w", parseErr) } var overrideMAC string - if i < len(existing) && existing[i] != nil { - overrideMAC = existing[i].MAC + if spec.Existing != nil { + overrideMAC = spec.Existing.MAC } mac, setupErr := setupTCRedirect(nsPath, ifName, tapName, network.NetNumQueues(vmCfg.CPU), overrideMAC) if setupErr != nil { return nil, fmt.Errorf("setup tc-redirect %s: %w", vmID, setupErr) } - configs = append(configs, &types.NetworkConfig{ + cfg := &types.NetworkConfig{ TAP: tapName, MAC: mac, NumQueues: network.NetNumQueues(vmCfg.CPU), @@ -98,7 +103,11 @@ func (c *CNI) Config(ctx context.Context, vmID string, numNICs int, vmCfg *types Backend: typ, NetnsPath: nsPath, Network: netInfo, - }) + } + configs = append(configs, cfg) + if spec.Existing == nil { + fresh = append(fresh, freshNIC{index: spec.Index, cfg: cfg}) + } var logIP, logGW string if netInfo != nil { @@ -106,36 +115,104 @@ func (c *CNI) Config(ctx context.Context, vmID string, numNICs int, vmCfg *types logGW = netInfo.Gateway } logger.Debugf(ctx, "NIC %d: %s ip=%s gw=%s tap=%s mac=%s", - i, ifName, logIP, logGW, tapName, mac) - } - - if len(existing) > 0 { - return configs, nil + spec.Index, ifName, logIP, logGW, tapName, mac) } return configs, c.store.Update(ctx, func(idx *networkIndex) error { - for i, cfg := range configs { + for _, f := range fresh { netID := utils.GenerateID() var net types.Network - if cfg.Network != nil { - net = *cfg.Network + if f.cfg.Network != nil { + net = *f.cfg.Network } idx.Networks[netID] = &networkRecord{ ID: netID, Type: confList.Name, Network: net, VMID: vmID, - IfName: fmt.Sprintf("eth%d", i), + IfName: fmt.Sprintf("eth%d", f.index), } } return nil }) } +// Remove tears down NIC plumbing for the given indices; preserves the netns. +func (c *CNI) Remove(ctx context.Context, vmID string, indices ...int) error { + if len(indices) == 0 { + return nil + } + if c.cniConf == nil { + return fmt.Errorf("%w: no conflist found in %s", network.ErrNotConfigured, c.conf.CNIConfDir) + } + nsPath := netnsPath(vmID) + logger := log.WithFunc("cni.Remove") + + var records []networkRecord + if err := c.store.With(ctx, func(idx *networkIndex) error { + records = idx.byVMID(vmID) + return nil + }); err != nil { + return fmt.Errorf("read network index: %w", err) + } + byIfName := make(map[string]networkRecord, len(records)) + for _, r := range records { + byIfName[r.IfName] = r + } + + idsToDelete := make([]string, 0, len(indices)) + for _, i := range indices { + ifName := fmt.Sprintf("eth%d", i) + rec, ok := byIfName[ifName] + if !ok { + return fmt.Errorf("nic %d (%s): no record", i, ifName) + } + confList, err := c.confListByName(rec.Type) + if err != nil { + return fmt.Errorf("nic %d: %w", i, err) + } + if delErr := c.cniDel(ctx, confList, vmID, nsPath, ifName); delErr != nil { + logger.Warnf(ctx, "CNI DEL %s/%s: %v", vmID, ifName, delErr) + } + if err := deleteTAPInNetns(nsPath, tapNameForVM(vmID, i)); err != nil { + return fmt.Errorf("delete tap %s: %w", tapNameForVM(vmID, i), err) + } + idsToDelete = append(idsToDelete, rec.ID) + } + + return c.store.Update(ctx, func(idx *networkIndex) error { + for _, id := range idsToDelete { + delete(idx.Networks, id) + } + return nil + }) +} + +// cniDel runs CNI DEL for one NIC; shared by Add (recovery + rollback) and Remove. +func (c *CNI) cniDel(ctx context.Context, confList *libcni.NetworkConfigList, vmID, nsPath, ifName string) error { + rt := &libcni.RuntimeConf{ContainerID: vmID, NetNS: nsPath, IfName: ifName} + return c.cniConf.DelNetworkList(ctx, confList, rt) +} + func tapNameForVM(vmID string, nic int) string { return fmt.Sprintf("tap%s-%d", network.VMIDPrefix(vmID), nic) } +// ensureNetns creates the netns when /var/run/netns/ is absent. +// The bool reports whether this call did the creation, so rollback only +// tears down a netns we just made. +func ensureNetns(name, nsPath string) (bool, error) { + if _, err := os.Stat(nsPath); err == nil { + return false, nil + } else if !errors.Is(err, fs.ErrNotExist) { + return false, err + } + if err := createNetns(name); err != nil { + return false, err + } + return true, nil +} + // extractNetworkInfo converts a CNI ADD result into types.Network. func extractNetworkInfo(result cnitypes.Result) (*types.Network, error) { newResult, err := current.NewResultFromResult(result) diff --git a/network/cni/create_darwin.go b/network/cni/create_darwin.go index 9fd783ea..380e7411 100644 --- a/network/cni/create_darwin.go +++ b/network/cni/create_darwin.go @@ -18,3 +18,7 @@ func deleteNetns(_ context.Context, _ string) error { func setupTCRedirect(_, _, _ string, _ int, _ string) (string, error) { return "", errNotSupported } + +func deleteTAPInNetns(_, _ string) error { + return errNotSupported +} diff --git a/network/cni/create_linux.go b/network/cni/create_linux.go index 2ad228f8..39800604 100644 --- a/network/cni/create_linux.go +++ b/network/cni/create_linux.go @@ -52,6 +52,17 @@ func deleteNetns(ctx context.Context, name string) error { }) } +// deleteTAPInNetns deletes a named TAP device inside target netns. +func deleteTAPInNetns(nsPath, tapName string) error { + return cns.WithNetNSPath(nsPath, func(_ cns.NetNS) error { + link, err := netlink.LinkByName(tapName) + if err != nil { + return fmt.Errorf("find %s: %w", tapName, err) + } + return netlink.LinkDel(link) + }) +} + // setupTCRedirect wires ifName <-> tapName inside target netns, returns MAC. func setupTCRedirect(nsPath, ifName, tapName string, queues int, overrideMAC string) (string, error) { var mac string diff --git a/network/network.go b/network/network.go index e9c5d726..293b8398 100644 --- a/network/network.go +++ b/network/network.go @@ -13,21 +13,46 @@ var ( ErrNotConfigured = errors.New("network provider not configured") ) -// Network defines the interface for a network provider (CNI, etc.). +// AddSpec is one NIC's add request; Existing != nil reuses MAC/IP for recovery. +type AddSpec struct { + Index int + Existing *types.NetworkConfig +} + +// Network defines the interface for a network provider (CNI, bridge, ...). type Network interface { Type() string - // Verify checks whether the network namespace for a VM exists. - // Returns nil if the netns is present, an error otherwise. + // Verify checks whether the network namespace / TAPs for a VM exist. Verify(ctx context.Context, vmID string) error - // Config creates network namespace, bridge, and tap for a VM. - // When existing configs are provided (recovery after host reboot), - // the netns and tap devices are recreated using the persisted MAC addresses. - // NOTE: vmCfg.Network may be mutated to record the resolved conflist name. - Config(ctx context.Context, vmID string, numNICs int, vmCfg *types.VMConfig, existing ...*types.NetworkConfig) ([]*types.NetworkConfig, error) + + // Add allocates NIC host plumbing for the given specs; idempotent re: netns. + Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs ...AddSpec) ([]*types.NetworkConfig, error) + + // Remove tears down NIC host plumbing for the given indices; preserves netns. + Remove(ctx context.Context, vmID string, indices ...int) error + Delete(context.Context, []string) ([]string, error) Inspect(context.Context, string) (*types.Network, error) List(context.Context) ([]*types.Network, error) RegisterGC(*gc.Orchestrator) } + +// AddRange returns specs for a contiguous range of fresh NICs. +func AddRange(from, count int) []AddSpec { + out := make([]AddSpec, count) + for i := range out { + out[i] = AddSpec{Index: from + i} + } + return out +} + +// AddRecover returns specs for re-creating existing NICs at slots 0..len-1. +func AddRecover(existing []*types.NetworkConfig) []AddSpec { + out := make([]AddSpec, len(existing)) + for i, e := range existing { + out[i] = AddSpec{Index: i, Existing: e} + } + return out +} From 5c65f0c14e3d7eef7794f12a82cfa3aa63c19858 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 14:29:15 +0800 Subject: [PATCH 03/33] refactor(network): dedup Delete/Remove via shared tearDown helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CNI: tearDownNICs(records, deleteTAP, bestEffort) → ids; both Delete and Remove go through it. deleteRecords sweeps DB by id in one Update. delNICs stays for gc.Collect (different lock context). Bridge: tearDownTAPs(indices, bestEffort) shared by Remove and CleanupTAPs. --- network/bridge/bridge_linux.go | 49 +++++++++++--------- network/cni/cni.go | 81 ++++++++++++++++++++++------------ network/cni/create.go | 33 +++----------- 3 files changed, 89 insertions(+), 74 deletions(-) diff --git a/network/bridge/bridge_linux.go b/network/bridge/bridge_linux.go index 7998f675..6d7cb7a8 100644 --- a/network/bridge/bridge_linux.go +++ b/network/bridge/bridge_linux.go @@ -127,17 +127,7 @@ func (b *Bridge) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, sp // Remove deletes the TAP devices for the given indices. func (b *Bridge) Remove(_ context.Context, vmID string, indices ...int) error { - for _, i := range indices { - name := tapName(vmID, i) - link, err := netlink.LinkByName(name) - if err != nil { - return fmt.Errorf("find tap %s: %w", name, err) - } - if err := netlink.LinkDel(link); err != nil { - return fmt.Errorf("delete tap %s: %w", name, err) - } - } - return nil + return tearDownTAPs(vmID, indices, false) } // Delete removes TAP devices for the given VMs. @@ -160,25 +150,44 @@ func (b *Bridge) RegisterGC(orch *gc.Orchestrator) { gc.Register(orch, GCModule(b.conf.RootDir)) } -// CleanupTAPs removes bridge TAP devices for the given VM IDs. -// It does not require a Bridge instance and is safe to call -// even when no bridge TAPs exist (no-op per VM). +// CleanupTAPs probes and removes bridge TAP devices for the given VM IDs. +// No-op per VM if none exist; safe without a Bridge instance. func CleanupTAPs(vmIDs []string) []string { - var cleaned []string + cleaned := make([]string, 0, len(vmIDs)) for _, vmID := range vmIDs { + var indices []int for i := 0; ; i++ { - name := tapName(vmID, i) - l, err := netlink.LinkByName(name) - if err != nil { - break // no more TAPs for this VM + if _, err := netlink.LinkByName(tapName(vmID, i)); err != nil { + break } - _ = netlink.LinkDel(l) + indices = append(indices, i) } + _ = tearDownTAPs(vmID, indices, true) cleaned = append(cleaned, vmID) } return cleaned } +func tearDownTAPs(vmID string, indices []int, bestEffort bool) error { + for _, i := range indices { + name := tapName(vmID, i) + link, err := netlink.LinkByName(name) + if err != nil { + if bestEffort { + continue + } + return fmt.Errorf("find tap %s: %w", name, err) + } + if err := netlink.LinkDel(link); err != nil { + if bestEffort { + continue + } + return fmt.Errorf("delete tap %s: %w", name, err) + } + } + return nil +} + func createTAP(name string, numQueues int) error { // queue_pairs = num_queues / 2 (TX+RX pair per queue). // Multi-queue requires queue_pairs > 1. diff --git a/network/cni/cni.go b/network/cni/cni.go index 2a4775a9..a85104e2 100644 --- a/network/cni/cni.go +++ b/network/cni/cni.go @@ -113,13 +113,7 @@ func (c *CNI) List(ctx context.Context) ([]*types.Network, error) { }) } -// Delete removes all network resources for the given VM IDs: -// 1. CNI DEL for each NIC (releases IP from IPAM, removes veth pair). -// 2. Remove the named netns (kernel cleans up bridge + tap automatically). -// 3. Remove network records from the DB. -// -// Best-effort: failing to clean one VM does not block others. -// Returns the VM IDs that were fully cleaned. +// Delete tears down all NICs for each VM and removes the netns. Best-effort. func (c *CNI) Delete(ctx context.Context, vmIDs []string) ([]string, error) { result := utils.ForEach(ctx, vmIDs, func(ctx context.Context, vmID string) error { return c.deleteVM(ctx, vmID) @@ -127,9 +121,7 @@ func (c *CNI) Delete(ctx context.Context, vmIDs []string) ([]string, error) { return result.Succeeded, result.Err() } -// deleteVM cleans up all network resources for a single VM. func (c *CNI) deleteVM(ctx context.Context, vmID string) error { - // Collect value-copies of records for this VM. var records []networkRecord if err := c.store.With(ctx, func(idx *networkIndex) error { records = idx.byVMID(vmID) @@ -137,38 +129,59 @@ func (c *CNI) deleteVM(ctx context.Context, vmID string) error { }); err != nil { return fmt.Errorf("read network index: %w", err) } - - // Nothing to clean — VM had no network or was already cleaned. if len(records) == 0 { return nil } - nsPath := netnsPath(vmID) - - // CNI DEL for each NIC — releases IPs from IPAM and removes veth pairs. - // Best-effort: log failures but continue. Netns deletion cleans up - // devices anyway; CNI DEL is primarily for IPAM bookkeeping. - c.delNICs(ctx, vmID, nsPath, records) - - // deleteNetns retries briefly to handle async fd cleanup after process kill. + ids, _ := c.tearDownNICs(ctx, vmID, nsPath, records, false, true) nsName := netnsName(vmID) if err := deleteNetns(ctx, nsName); err != nil && !errors.Is(err, fs.ErrNotExist) { return fmt.Errorf("remove netns %s: %w", nsPath, err) } + return c.deleteRecords(ctx, ids) +} - return c.store.Update(ctx, func(idx *networkIndex) error { - for id, rec := range idx.Networks { - if rec != nil && rec.VMID == vmID { - delete(idx.Networks, id) +// tearDownNICs runs CNI DEL (and optionally TAP delete) for each record. +// bestEffort=true logs errors and continues; false returns on the first. +// Returns the record IDs successfully torn down so the caller can sweep them +// from the index in one Update. +func (c *CNI) tearDownNICs(ctx context.Context, vmID, nsPath string, records []networkRecord, deleteTAP, bestEffort bool) ([]string, error) { + if c.cniConf == nil { + if bestEffort { + return nil, nil + } + return nil, fmt.Errorf("%w: no conflist found in %s", network.ErrNotConfigured, c.conf.CNIConfDir) + } + logger := log.WithFunc("cni.tearDownNICs") + ids := make([]string, 0, len(records)) + for _, rec := range records { + cl, err := c.confListByName(rec.Type) + if err != nil { + if !bestEffort { + return ids, err } + logger.Warnf(ctx, "conflist %q not found for CNI DEL %s/%s: %v", rec.Type, vmID, rec.IfName, err) + continue } - return nil - }) + if err := c.cniDel(ctx, cl, vmID, nsPath, rec.IfName); err != nil { + logger.Warnf(ctx, "CNI DEL %s/%s: %v", vmID, rec.IfName, err) + } + if deleteTAP { + var i int + if _, err := fmt.Sscanf(rec.IfName, "eth%d", &i); err == nil { + if err := deleteTAPInNetns(nsPath, tapNameForVM(vmID, i)); err != nil && !bestEffort { + return ids, fmt.Errorf("delete tap %s: %w", tapNameForVM(vmID, i), err) + } + } + } + ids = append(ids, rec.ID) + } + return ids, nil } -// delNICs performs best-effort CNI DEL for each NIC record. -// Failures are logged but never returned — netns deletion cleans up -// devices anyway; CNI DEL is primarily for IPAM bookkeeping. +// delNICs is the per-record CNI DEL loop without store access; used by the +// lockless gc.Collect path. The store-aware Delete/Remove paths go through +// tearDownNICs. func (c *CNI) delNICs(ctx context.Context, vmID, nsPath string, records []networkRecord) { if c.cniConf == nil { return @@ -186,6 +199,18 @@ func (c *CNI) delNICs(ctx context.Context, vmID, nsPath string, records []networ } } +func (c *CNI) deleteRecords(ctx context.Context, ids []string) error { + if len(ids) == 0 { + return nil + } + return c.store.Update(ctx, func(idx *networkIndex) error { + for _, id := range ids { + delete(idx.Networks, id) + } + return nil + }) +} + // confListByName resolves a conflist by name. // Empty name returns the default (first alphabetically). func (c *CNI) confListByName(name string) (*libcni.NetworkConfigList, error) { diff --git a/network/cni/create.go b/network/cni/create.go index 06fc6962..b3a57ebc 100644 --- a/network/cni/create.go +++ b/network/cni/create.go @@ -142,12 +142,6 @@ func (c *CNI) Remove(ctx context.Context, vmID string, indices ...int) error { if len(indices) == 0 { return nil } - if c.cniConf == nil { - return fmt.Errorf("%w: no conflist found in %s", network.ErrNotConfigured, c.conf.CNIConfDir) - } - nsPath := netnsPath(vmID) - logger := log.WithFunc("cni.Remove") - var records []networkRecord if err := c.store.With(ctx, func(idx *networkIndex) error { records = idx.byVMID(vmID) @@ -159,33 +153,20 @@ func (c *CNI) Remove(ctx context.Context, vmID string, indices ...int) error { for _, r := range records { byIfName[r.IfName] = r } - - idsToDelete := make([]string, 0, len(indices)) + picked := make([]networkRecord, 0, len(indices)) for _, i := range indices { ifName := fmt.Sprintf("eth%d", i) rec, ok := byIfName[ifName] if !ok { return fmt.Errorf("nic %d (%s): no record", i, ifName) } - confList, err := c.confListByName(rec.Type) - if err != nil { - return fmt.Errorf("nic %d: %w", i, err) - } - if delErr := c.cniDel(ctx, confList, vmID, nsPath, ifName); delErr != nil { - logger.Warnf(ctx, "CNI DEL %s/%s: %v", vmID, ifName, delErr) - } - if err := deleteTAPInNetns(nsPath, tapNameForVM(vmID, i)); err != nil { - return fmt.Errorf("delete tap %s: %w", tapNameForVM(vmID, i), err) - } - idsToDelete = append(idsToDelete, rec.ID) + picked = append(picked, rec) } - - return c.store.Update(ctx, func(idx *networkIndex) error { - for _, id := range idsToDelete { - delete(idx.Networks, id) - } - return nil - }) + ids, err := c.tearDownNICs(ctx, vmID, netnsPath(vmID), picked, true, false) + if err != nil { + return err + } + return c.deleteRecords(ctx, ids) } // cniDel runs CNI DEL for one NIC; shared by Add (recovery + rollback) and Remove. From 68a58b889adc8a89761fddc34164831a3a739210 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 14:36:39 +0800 Subject: [PATCH 04/33] feat(cloudhypervisor): implement netresize.Resizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NetResize dispatches to per-NIC add or LIFO remove. Add path: plumbing.Add → vm.add-net with deterministic cocoon-net- id → DB append; rolls back the current NIC's host plumbing if vm.add-net fails. Remove path: build MAC→id map from vm.info → vm.remove-device → optional plumbing.Remove (skipped via --keep-host-on-remove) → DB truncate. Persists per NIC so a mid-op failure leaves the record matching CH state. --- extend/netresize/netresize.go | 16 ++- hypervisor/cloudhypervisor/args.go | 7 ++ hypervisor/cloudhypervisor/extend.go | 10 +- hypervisor/cloudhypervisor/netresize.go | 124 ++++++++++++++++++++++++ 4 files changed, 149 insertions(+), 8 deletions(-) create mode 100644 hypervisor/cloudhypervisor/netresize.go diff --git a/extend/netresize/netresize.go b/extend/netresize/netresize.go index 735d3efb..742b9b8b 100644 --- a/extend/netresize/netresize.go +++ b/extend/netresize/netresize.go @@ -7,6 +7,9 @@ import ( "context" "errors" "fmt" + + "github.com/cocoonstack/cocoon/network" + "github.com/cocoonstack/cocoon/types" ) var ErrUnsupportedBackend = errors.New("backend does not support net resize") @@ -32,14 +35,19 @@ type Result struct { Removed []NIC `json:"removed,omitempty"` } +// Plumbing is the host-side network operations NetResize delegates to. +// network.Network satisfies this implicitly. +type Plumbing interface { + Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs ...network.AddSpec) ([]*types.NetworkConfig, error) + Remove(ctx context.Context, vmID string, indices ...int) error +} + // Resizer resizes the NIC count on a running VM. type Resizer interface { - NetResize(ctx context.Context, vmRef string, spec Spec) (Result, error) + NetResize(ctx context.Context, vmRef string, spec Spec, plumbing Plumbing) (Result, error) } -// Normalize validates the spec. Returns an error if Target is negative. -// Named for parity with extend/fs.Spec.Normalize even though no defaulting -// is needed here. +// Normalize validates the spec. func (s *Spec) Normalize() error { if s.Target < 0 { return fmt.Errorf("--nics must be non-negative, got %d", s.Target) diff --git a/hypervisor/cloudhypervisor/args.go b/hypervisor/cloudhypervisor/args.go index c31de8ef..723d4b37 100644 --- a/hypervisor/cloudhypervisor/args.go +++ b/hypervisor/cloudhypervisor/args.go @@ -17,6 +17,8 @@ const ( // defaultDiskQueueSize is the virtio-blk queue depth per device. defaultDiskQueueSize = 512 cidataFile = "cidata.img" + + cocoonNetIDPrefix = "cocoon-net-" ) // kvBuilder accumulates key=value CLI fragments. @@ -179,6 +181,11 @@ func networkConfigToNet(nc *types.NetworkConfig) chNet { } } +// cocoonNetID is the deterministic CH device id for a cocoon-managed NIC. +func cocoonNetID(mac string) string { + return cocoonNetIDPrefix + strings.ReplaceAll(mac, ":", "") +} + func storageConfigToDisk(storageConfig *types.StorageConfig, cpuCount, diskQueueSize int, noDirectIO bool) chDisk { if diskQueueSize <= 0 { diskQueueSize = defaultDiskQueueSize diff --git a/hypervisor/cloudhypervisor/extend.go b/hypervisor/cloudhypervisor/extend.go index f1e37ff4..61cdf795 100644 --- a/hypervisor/cloudhypervisor/extend.go +++ b/hypervisor/cloudhypervisor/extend.go @@ -10,6 +10,7 @@ import ( "strings" "github.com/cocoonstack/cocoon/extend/fs" + "github.com/cocoonstack/cocoon/extend/netresize" "github.com/cocoonstack/cocoon/extend/vfio" "github.com/cocoonstack/cocoon/hypervisor" "github.com/cocoonstack/cocoon/types" @@ -17,10 +18,11 @@ import ( ) var ( - _ fs.Attacher = (*CloudHypervisor)(nil) - _ fs.Lister = (*CloudHypervisor)(nil) - _ vfio.Attacher = (*CloudHypervisor)(nil) - _ vfio.Lister = (*CloudHypervisor)(nil) + _ fs.Attacher = (*CloudHypervisor)(nil) + _ fs.Lister = (*CloudHypervisor)(nil) + _ vfio.Attacher = (*CloudHypervisor)(nil) + _ vfio.Lister = (*CloudHypervisor)(nil) + _ netresize.Resizer = (*CloudHypervisor)(nil) ) // FsAttach hot-plugs a vhost-user-fs device onto a running CH VM. diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go new file mode 100644 index 00000000..e410ed5d --- /dev/null +++ b/hypervisor/cloudhypervisor/netresize.go @@ -0,0 +1,124 @@ +package cloudhypervisor + +import ( + "context" + "fmt" + "net/http" + "strings" + + "github.com/projecteru2/core/log" + + "github.com/cocoonstack/cocoon/extend/netresize" + "github.com/cocoonstack/cocoon/hypervisor" + "github.com/cocoonstack/cocoon/network" + "github.com/cocoonstack/cocoon/types" +) + +// NetResize brings the VM's NIC count to spec.Target. +func (ch *CloudHypervisor) NetResize(ctx context.Context, vmRef string, spec netresize.Spec, plumbing netresize.Plumbing) (netresize.Result, error) { + if err := spec.Normalize(); err != nil { + return netresize.Result{}, err + } + hc, info, err := ch.inspectRunning(ctx, vmRef) + if err != nil { + return netresize.Result{}, err + } + vmID, err := ch.ResolveRef(ctx, vmRef) + if err != nil { + return netresize.Result{}, err + } + rec, err := ch.LoadRecord(ctx, vmID) + if err != nil { + return netresize.Result{}, err + } + current := len(rec.NetworkConfigs) + res := netresize.Result{Before: current, After: current} + switch { + case spec.Target == current: + return res, nil + case spec.Target > current: + return ch.netResizeAdd(ctx, hc, vmID, &rec.Config, plumbing, current, spec.Target, res) + default: + return ch.netResizeRemove(ctx, hc, info, vmID, rec.NetworkConfigs, plumbing, current, spec.Target, spec.KeepHostOnRemove, res) + } +} + +func (ch *CloudHypervisor) netResizeAdd(ctx context.Context, hc *http.Client, vmID string, vmCfg *types.VMConfig, plumbing netresize.Plumbing, from, target int, res netresize.Result) (netresize.Result, error) { + logger := log.WithFunc("cloudhypervisor.NetResize.add") + for i := from; i < target; i++ { + ncs, err := plumbing.Add(ctx, vmID, vmCfg, network.AddSpec{Index: i}) + if err != nil { + return res, fmt.Errorf("nic %d host plumbing: %w", i, err) + } + if len(ncs) != 1 || ncs[0] == nil { + return res, fmt.Errorf("nic %d: plumbing returned %d configs", i, len(ncs)) + } + nc := ncs[0] + chN := networkConfigToNet(nc) + chN.ID = cocoonNetID(nc.MAC) + if err := addNetVM(ctx, hc, chN); err != nil { + if rmErr := plumbing.Remove(ctx, vmID, i); rmErr != nil { + logger.Warnf(ctx, "rollback host plumbing for nic %d: %v", i, rmErr) + } + return res, fmt.Errorf("vm.add-net nic %d: %w", i, err) + } + if err := ch.appendNetworkConfig(ctx, vmID, nc); err != nil { + return res, fmt.Errorf("persist nic %d: %w", i, err) + } + res.Added = append(res.Added, netresize.NIC{Index: i, TAP: nc.TAP, MAC: nc.MAC}) + res.After = i + 1 + } + return res, nil +} + +func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, info *chVMInfoResponse, vmID string, ncs []*types.NetworkConfig, plumbing netresize.Plumbing, current, target int, keepHost bool, res netresize.Result) (netresize.Result, error) { + macToID := make(map[string]string, len(info.Config.Nets)) + for _, n := range info.Config.Nets { + macToID[strings.ToLower(n.MAC)] = n.ID + } + for i := current - 1; i >= target; i-- { + nc := ncs[i] + chID := macToID[strings.ToLower(nc.MAC)] + if chID == "" { + return res, fmt.Errorf("nic %d MAC %s: no live device", i, nc.MAC) + } + if err := removeDeviceVM(ctx, hc, chID); err != nil { + return res, fmt.Errorf("vm.remove-device nic %d (%s): %w", i, chID, err) + } + if !keepHost { + if err := plumbing.Remove(ctx, vmID, i); err != nil { + return res, fmt.Errorf("nic %d host plumbing: %w", i, err) + } + } + if err := ch.truncateNetworkConfigs(ctx, vmID, i); err != nil { + return res, fmt.Errorf("persist remove nic %d: %w", i, err) + } + res.Removed = append(res.Removed, netresize.NIC{Index: i, TAP: nc.TAP, MAC: nc.MAC}) + res.After = i + } + return res, nil +} + +func (ch *CloudHypervisor) appendNetworkConfig(ctx context.Context, vmID string, nc *types.NetworkConfig) error { + return ch.DB.Update(ctx, func(idx *hypervisor.VMIndex) error { + r := idx.VMs[vmID] + if r == nil { + return hypervisor.ErrNotFound + } + r.NetworkConfigs = append(r.NetworkConfigs, nc) + return nil + }) +} + +func (ch *CloudHypervisor) truncateNetworkConfigs(ctx context.Context, vmID string, length int) error { + return ch.DB.Update(ctx, func(idx *hypervisor.VMIndex) error { + r := idx.VMs[vmID] + if r == nil { + return hypervisor.ErrNotFound + } + if length < len(r.NetworkConfigs) { + r.NetworkConfigs = r.NetworkConfigs[:length] + } + return nil + }) +} From 1e23e1a5917699982406945210083df61781618d Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 14:43:58 +0800 Subject: [PATCH 05/33] feat(vm): cocoon vm net --nics N CLI for resizing a running VM's NIC count. Wires the resolved Resizer (currently CH only) with the matching host plumbing. Refactors resolveAttacher to also return conf + hypervisor.Hypervisor so NetResize can call Inspect to discover the active backend without re-resolving. Hoists "bridge" / "cni" backend literals to types.BackendBridge / types.BackendCNI so cmd, network/cni, and network/bridge share one identifier. --- cmd/vm/attach.go | 24 +++++++------- cmd/vm/commands.go | 16 ++++++++++ cmd/vm/lifecycle.go | 2 +- cmd/vm/netresize.go | 57 ++++++++++++++++++++++++++++++++++ network/bridge/bridge_linux.go | 2 +- network/cni/create.go | 2 +- types/network.go | 6 ++++ 7 files changed, 95 insertions(+), 14 deletions(-) create mode 100644 cmd/vm/netresize.go diff --git a/cmd/vm/attach.go b/cmd/vm/attach.go index 8cc1d03d..7b824b2e 100644 --- a/cmd/vm/attach.go +++ b/cmd/vm/attach.go @@ -9,13 +9,14 @@ import ( "github.com/spf13/cobra" cmdcore "github.com/cocoonstack/cocoon/cmd/core" + "github.com/cocoonstack/cocoon/config" "github.com/cocoonstack/cocoon/extend/fs" "github.com/cocoonstack/cocoon/extend/vfio" "github.com/cocoonstack/cocoon/hypervisor" ) func (h Handler) FsAttach(cmd *cobra.Command, args []string) error { - ctx, a, err := resolveAttacher[fs.Attacher](h, cmd, args, "fs attach", fs.ErrUnsupportedBackend) + ctx, _, _, a, err := resolveAttacher[fs.Attacher](h, cmd, args, "fs attach", fs.ErrUnsupportedBackend) if err != nil { return err } @@ -35,7 +36,7 @@ func (h Handler) FsAttach(cmd *cobra.Command, args []string) error { } func (h Handler) FsDetach(cmd *cobra.Command, args []string) error { - ctx, a, err := resolveAttacher[fs.Attacher](h, cmd, args, "fs detach", fs.ErrUnsupportedBackend) + ctx, _, _, a, err := resolveAttacher[fs.Attacher](h, cmd, args, "fs detach", fs.ErrUnsupportedBackend) if err != nil { return err } @@ -51,7 +52,7 @@ func (h Handler) FsDetach(cmd *cobra.Command, args []string) error { } func (h Handler) DeviceAttach(cmd *cobra.Command, args []string) error { - ctx, a, err := resolveAttacher[vfio.Attacher](h, cmd, args, "device attach", vfio.ErrUnsupportedBackend) + ctx, _, _, a, err := resolveAttacher[vfio.Attacher](h, cmd, args, "device attach", vfio.ErrUnsupportedBackend) if err != nil { return err } @@ -69,7 +70,7 @@ func (h Handler) DeviceAttach(cmd *cobra.Command, args []string) error { } func (h Handler) DeviceDetach(cmd *cobra.Command, args []string) error { - ctx, a, err := resolveAttacher[vfio.Attacher](h, cmd, args, "device detach", vfio.ErrUnsupportedBackend) + ctx, _, _, a, err := resolveAttacher[vfio.Attacher](h, cmd, args, "device detach", vfio.ErrUnsupportedBackend) if err != nil { return err } @@ -85,23 +86,24 @@ func (h Handler) DeviceDetach(cmd *cobra.Command, args []string) error { } // resolveAttacher resolves args[0] to a hypervisor implementing A -// (fs.Attacher / vfio.Attacher). op ("fs attach", "device detach") prefixes -// both error wraps so callers see the operation, not just the type-assert. -func resolveAttacher[A any](h Handler, cmd *cobra.Command, args []string, op string, errUnsupported error) (context.Context, A, error) { +// (fs.Attacher / vfio.Attacher / netresize.Resizer). op prefixes both error +// wraps so callers see the operation. Returns the resolved hypervisor too +// so callers can issue further generic ops (e.g. Inspect) without re-finding. +func resolveAttacher[A any](h Handler, cmd *cobra.Command, args []string, op string, errUnsupported error) (context.Context, *config.Config, hypervisor.Hypervisor, A, error) { var zero A ctx, conf, err := h.Init(cmd) if err != nil { - return ctx, zero, err + return ctx, nil, nil, zero, err } hyper, err := cmdcore.FindHypervisor(ctx, conf, args[0]) if err != nil { - return ctx, zero, fmt.Errorf("%s: %w", op, err) + return ctx, conf, nil, zero, fmt.Errorf("%s: %w", op, err) } a, ok := hyper.(A) if !ok { - return ctx, zero, fmt.Errorf("%s: backend %s: %w", op, hyper.Type(), errUnsupported) + return ctx, conf, hyper, zero, fmt.Errorf("%s: backend %s: %w", op, hyper.Type(), errUnsupported) } - return ctx, a, nil + return ctx, conf, hyper, a, nil } // classifyAttachErr surfaces ErrNotRunning more clearly than the generic wrap. diff --git a/cmd/vm/commands.go b/cmd/vm/commands.go index b5809f1a..d5af9900 100644 --- a/cmd/vm/commands.go +++ b/cmd/vm/commands.go @@ -28,6 +28,7 @@ type Actions interface { FsDetach(cmd *cobra.Command, args []string) error DeviceAttach(cmd *cobra.Command, args []string) error DeviceDetach(cmd *cobra.Command, args []string) error + NetResize(cmd *cobra.Command, args []string) error } // Command builds the "vm" parent command with all subcommands. @@ -188,10 +189,25 @@ func Command(h Actions) *cobra.Command { statusCmd, buildFsCommand(h), buildDeviceCommand(h), + buildNetCommand(h), ) return vmCmd } +func buildNetCommand(h Actions) *cobra.Command { + cmd := &cobra.Command{ + Use: "net VM", + Short: "Resize a running VM's NIC count (CH only); quiesce in-guest NIC state before reducing", + Args: cobra.ExactArgs(1), + RunE: h.NetResize, + } + cmd.Flags().Int("nics", -1, "target NIC count (required, >= 0)") + cmd.Flags().Bool("keep-host-on-remove", false, "skip host TAP/veth teardown after vm.remove-device") + _ = cmd.MarkFlagRequired("nics") + cmdcore.AddOutputFlag(cmd) + return cmd +} + func buildFsCommand(h Actions) *cobra.Command { parent := &cobra.Command{ Use: "fs", diff --git a/cmd/vm/lifecycle.go b/cmd/vm/lifecycle.go index 185fdc4b..e4149e63 100644 --- a/cmd/vm/lifecycle.go +++ b/cmd/vm/lifecycle.go @@ -399,7 +399,7 @@ func providerForVM(conf *config.Config, cniProvider network.Network, bridgeCache } // All NICs on a VM share the same backend. cfg := configs[0] - if cfg.Backend == "bridge" { + if cfg.Backend == types.BackendBridge { if cfg.BridgeDev == "" { return nil, fmt.Errorf("bridge backend but no bridge device persisted") } diff --git a/cmd/vm/netresize.go b/cmd/vm/netresize.go new file mode 100644 index 00000000..d59a0874 --- /dev/null +++ b/cmd/vm/netresize.go @@ -0,0 +1,57 @@ +package vm + +import ( + "fmt" + + "github.com/projecteru2/core/log" + "github.com/spf13/cobra" + + cmdcore "github.com/cocoonstack/cocoon/cmd/core" + "github.com/cocoonstack/cocoon/config" + "github.com/cocoonstack/cocoon/extend/netresize" + "github.com/cocoonstack/cocoon/network" + "github.com/cocoonstack/cocoon/types" +) + +func (h Handler) NetResize(cmd *cobra.Command, args []string) error { + ctx, conf, hyper, resizer, err := resolveAttacher[netresize.Resizer](h, cmd, args, "vm net", netresize.ErrUnsupportedBackend) + if err != nil { + return err + } + vm, err := hyper.Inspect(ctx, args[0]) + if err != nil { + return fmt.Errorf("vm net: %w", err) + } + plumbing, err := plumbingForVM(conf, vm.NetworkConfigs) + if err != nil { + return fmt.Errorf("vm net: %w", err) + } + target, _ := cmd.Flags().GetInt("nics") + keepHost, _ := cmd.Flags().GetBool("keep-host-on-remove") + res, err := resizer.NetResize(ctx, args[0], netresize.Spec{Target: target, KeepHostOnRemove: keepHost}, plumbing) + if err != nil { + return classifyAttachErr(err) + } + if done, jsonErr := cmdcore.MaybeOutputJSON(cmd, res); done { + return jsonErr + } + log.WithFunc("cmd.vm.net").Infof(ctx, "resized %s: before=%d after=%d added=%d removed=%d", + args[0], res.Before, res.After, len(res.Added), len(res.Removed)) + return nil +} + +// plumbingForVM picks the network provider matching the VM's existing NICs. +// Requires at least one NIC: VMConfig has no bridge hint, so a zero-NIC VM +// can't be resized up without losing the original provider choice. +func plumbingForVM(conf *config.Config, configs []*types.NetworkConfig) (network.Network, error) { + if len(configs) == 0 { + return nil, fmt.Errorf("vm has zero NICs; resize up is not supported (start the VM with at least one NIC)") + } + if configs[0].Backend == types.BackendBridge { + if configs[0].BridgeDev == "" { + return nil, fmt.Errorf("bridge backend without bridge device") + } + return cmdcore.InitBridgeNetwork(conf, configs[0].BridgeDev) + } + return cmdcore.InitNetwork(conf) +} diff --git a/network/bridge/bridge_linux.go b/network/bridge/bridge_linux.go index 6d7cb7a8..349290ee 100644 --- a/network/bridge/bridge_linux.go +++ b/network/bridge/bridge_linux.go @@ -117,7 +117,7 @@ func (b *Bridge) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, sp MAC: mac, NumQueues: queues, QueueSize: network.ResolveQueueSize(vmCfg.QueueSize), - Backend: typ, + Backend: types.BackendBridge, BridgeDev: b.bridgeDev, }) logger.Debugf(ctx, "NIC %d: tap=%s mac=%s bridge=%s", spec.Index, name, mac, b.bridgeDev) diff --git a/network/cni/create.go b/network/cni/create.go index b3a57ebc..76f8673f 100644 --- a/network/cni/create.go +++ b/network/cni/create.go @@ -100,7 +100,7 @@ func (c *CNI) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs MAC: mac, NumQueues: network.NetNumQueues(vmCfg.CPU), QueueSize: network.ResolveQueueSize(vmCfg.QueueSize), - Backend: typ, + Backend: types.BackendCNI, NetnsPath: nsPath, Network: netInfo, } diff --git a/types/network.go b/types/network.go index 399d9d77..d7b6ffc1 100644 --- a/types/network.go +++ b/types/network.go @@ -1,5 +1,11 @@ package types +// Network backend identifiers stored in NetworkConfig.Backend. +const ( + BackendCNI = "cni" + BackendBridge = "bridge" +) + // NetworkConfig describes a single NIC attached to a VM. type NetworkConfig struct { TAP string `json:"tap"` From 20578bedb1b81e3cf46089f515bebabfdc98bf3f Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 14:45:15 +0800 Subject: [PATCH 06/33] fix(clone): verify each NIC removed before re-adding in hotSwapNets After the remove-device loop, re-fetch vm.info and warn for any old ID still present in info.Config.Nets. The warning lets us trace bad clone states without failing the flow (the eject pending state is a CH-side condition the user accepted). Also gitignore /scratch/ for local test scripts. --- .gitignore | 3 +++ hypervisor/cloudhypervisor/clone.go | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/.gitignore b/.gitignore index 93e85512..d0202079 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,6 @@ tmp/* .claude/* cocoon-test + +# Local scratch / test scripts (not for upstream) +/scratch/ diff --git a/hypervisor/cloudhypervisor/clone.go b/hypervisor/cloudhypervisor/clone.go index 3e04786f..f132bbcc 100644 --- a/hypervisor/cloudhypervisor/clone.go +++ b/hypervisor/cloudhypervisor/clone.go @@ -310,6 +310,7 @@ func buildStateReplacements(chCfg *chVMConfig, storageConfigs []*types.StorageCo // adds fresh ones. Must run between vm.restore and vm.resume (VM paused). func hotSwapNets(ctx context.Context, hc *http.Client, oldNets []chNet, networkConfigs []*types.NetworkConfig) error { logger := log.WithFunc("cloudhypervisor.hotSwapNets") + removedIDs := make([]string, 0, len(oldNets)) for _, oldNet := range oldNets { if oldNet.ID == "" { continue @@ -317,8 +318,24 @@ func hotSwapNets(ctx context.Context, hc *http.Client, oldNets []chNet, networkC if err := removeDeviceVM(ctx, hc, oldNet.ID); err != nil { return fmt.Errorf("remove net device %s: %w", oldNet.ID, err) } + removedIDs = append(removedIDs, oldNet.ID) logger.Infof(ctx, "removed snapshot NIC %s (old MAC %s)", oldNet.ID, oldNet.MAC) } + if len(removedIDs) > 0 { + info, err := getVMInfo(ctx, hc) + if err != nil { + return fmt.Errorf("verify post-remove vm.info: %w", err) + } + live := make(map[string]struct{}, len(info.Config.Nets)) + for _, n := range info.Config.Nets { + live[n.ID] = struct{}{} + } + for _, id := range removedIDs { + if _, still := live[id]; still { + logger.Warnf(ctx, "NIC %s still in vm.info after vm.remove-device — eject pending in CH", id) + } + } + } for i, nc := range networkConfigs { newNet := networkConfigToNet(nc) if err := addNetVM(ctx, hc, newNet); err != nil { From 01187b3f97490dbeb8bff2b8f093514f644b4c7b Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 14:46:50 +0800 Subject: [PATCH 07/33] docs: cocoon vm net --nics N --- KNOWN_ISSUES.md | 13 +++++++++++++ README.md | 20 ++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/KNOWN_ISSUES.md b/KNOWN_ISSUES.md index dc5014e7..90bfea32 100644 --- a/KNOWN_ISSUES.md +++ b/KNOWN_ISSUES.md @@ -242,6 +242,19 @@ Mitigations for production users: Control-plane traffic from cocoon-managed hosts (vk-cocoon, `cocoon vm exec`) goes through cocoon-agent over vsock and never depends on SSH credentials. +## NIC hot-remove leaves the PCI slot pending on Cloud Hypervisor + +`cocoon vm net --nics N` tears down host TAP/veth/CNI plumbing immediately after `vm.remove-device` returns. CH only frees the PCI slot (and unregisters the device's ioeventfds) when the guest writes to the ACPI hot-plug controller (B0EJ) in response to the SCI raised by `remove-device`. If the guest is stopped, paused, or its NIC driver is wedged, the slot stays `Allocated` until the guest cooperates. + +Cocoon does not wait for B0EJ — there is no reliable signal from the CH HTTP API. The user accepts: + +- The guest may continue to reference a NIC whose host plumbing is gone, hanging the in-guest driver. +- The pending eject may surface later as `Cannot register ioevent: File exists` when CH next tries to reuse that slot (e.g. a subsequent hot-add). + +Quiesce the guest NIC (ip link set down + NetworkManager remove on Linux; Disable-PnpDevice + driver unbind on Windows) before reducing the count. Or pass `--keep-host-on-remove` to defer the host teardown. + +Firecracker is not supported: FC has no NIC hot-plug / hot-unplug API. + ## Android cocoon-agent service may be blocked by SELinux `os-image/android/{14.0,15.0}` install the cocoon-agent binary at `/system/bin/cocoon-agent` and register it via `/system/etc/init/cocoon-agent.rc`. Android's SELinux policies don't ship with a domain for cocoon-agent, so the service may run in `init`'s domain or be denied outright depending on the redroid build. diff --git a/README.md b/README.md index 9d9ff369..3f0a7136 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,7 @@ cocoon │ ├── device │ │ ├── attach [flags] VM Attach a VFIO PCI device (CH only) │ │ └── detach [flags] VM Detach a VFIO PCI device by --id +│ ├── net [flags] VM Resize NIC count on a running VM (CH only) │ └── debug [flags] IMAGE Generate hypervisor launch command (dry run) ├── snapshot │ ├── save [flags] VM Create a snapshot from a running VM @@ -520,6 +521,25 @@ cocoon vm device detach my-vm --id mygpu `cocoon vm inspect VM` includes an `attached_devices` field for running VMs that surfaces every attached vhost-user-fs share and VFIO device, read live from CH `vm.info`. The field is omitted for stopped VMs. +## NIC Hot-Resize (Cloud Hypervisor only) + +`cocoon vm net --nics N VM` brings the running VM's NIC count to `N`. Adds allocate new TAP/CNI/bridge plumbing on the host and hot-plug a fresh NIC into the guest; removes pop NICs from the tail (LIFO) via `vm.remove-device` and tear down the host plumbing. + +```bash +# Add a second NIC (or any number). +cocoon vm net my-vm --nics 2 + +# Remove a NIC (LIFO from the tail). +cocoon vm net my-vm --nics 1 + +# Keep host plumbing in place after vm.remove-device. +cocoon vm net my-vm --nics 1 --keep-host-on-remove +``` + +Cocoon manages **host-side** plumbing only. CH's `vm.remove-device` marks the slot for ejection but the actual eject only happens when the guest cooperates via ACPI (B0EJ write). The host TAP / veth / CNI lease are torn down immediately after the API call regardless. Quiesce in-guest NIC state (driver unbind, NetworkManager removal, Windows NDIS halt) **before** reducing the count, or the in-guest driver will reference plumbing that no longer exists. Use `--keep-host-on-remove` if you want to defer host teardown. + +A VM started with zero NICs cannot be resized up (the VM record carries no provider hint). Start with at least one NIC if you plan to resize. + ## Windows Support Cocoon supports Windows guests via the `--windows` flag: From 8067417c01c410df834123d89bd9b6a4896996c8 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 14:59:15 +0800 Subject: [PATCH 08/33] revert(clone): drop post-remove vm.info check in hotSwapNets Signal isn't useful in the paused-clone window: vm.info reflects device_tree which only updates when guest writes B0EJ via SCI; during pause B0EJ never fires, so the check would always warn. The genuine remove-failure case (unknown id) already errors out of removeDeviceVM. --- hypervisor/cloudhypervisor/clone.go | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/hypervisor/cloudhypervisor/clone.go b/hypervisor/cloudhypervisor/clone.go index f132bbcc..3e04786f 100644 --- a/hypervisor/cloudhypervisor/clone.go +++ b/hypervisor/cloudhypervisor/clone.go @@ -310,7 +310,6 @@ func buildStateReplacements(chCfg *chVMConfig, storageConfigs []*types.StorageCo // adds fresh ones. Must run between vm.restore and vm.resume (VM paused). func hotSwapNets(ctx context.Context, hc *http.Client, oldNets []chNet, networkConfigs []*types.NetworkConfig) error { logger := log.WithFunc("cloudhypervisor.hotSwapNets") - removedIDs := make([]string, 0, len(oldNets)) for _, oldNet := range oldNets { if oldNet.ID == "" { continue @@ -318,24 +317,8 @@ func hotSwapNets(ctx context.Context, hc *http.Client, oldNets []chNet, networkC if err := removeDeviceVM(ctx, hc, oldNet.ID); err != nil { return fmt.Errorf("remove net device %s: %w", oldNet.ID, err) } - removedIDs = append(removedIDs, oldNet.ID) logger.Infof(ctx, "removed snapshot NIC %s (old MAC %s)", oldNet.ID, oldNet.MAC) } - if len(removedIDs) > 0 { - info, err := getVMInfo(ctx, hc) - if err != nil { - return fmt.Errorf("verify post-remove vm.info: %w", err) - } - live := make(map[string]struct{}, len(info.Config.Nets)) - for _, n := range info.Config.Nets { - live[n.ID] = struct{}{} - } - for _, id := range removedIDs { - if _, still := live[id]; still { - logger.Warnf(ctx, "NIC %s still in vm.info after vm.remove-device — eject pending in CH", id) - } - } - } for i, nc := range networkConfigs { newNet := networkConfigToNet(nc) if err := addNetVM(ctx, hc, newNet); err != nil { From bebc11c858d733f18b26251a51bd0b16749f2166 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 15:01:59 +0800 Subject: [PATCH 09/33] remove(netresize): drop --keep-host-on-remove flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flag had no proper lifecycle: host TAP/veth/CNI would leak, then a later resize-up at the same index would collide. Closing it cleanly needs orphan tracking (pending-cleanup state on the VM record). Out of scope for v1 — drop the flag entirely and always tear down host plumbing after vm.remove-device. Bring it back later with full pending-state tracking if a real user needs it. --- KNOWN_ISSUES.md | 2 +- README.md | 5 +---- cmd/vm/commands.go | 1 - cmd/vm/netresize.go | 3 +-- extend/netresize/netresize.go | 3 +-- hypervisor/cloudhypervisor/netresize.go | 10 ++++------ 6 files changed, 8 insertions(+), 16 deletions(-) diff --git a/KNOWN_ISSUES.md b/KNOWN_ISSUES.md index 90bfea32..7a43e031 100644 --- a/KNOWN_ISSUES.md +++ b/KNOWN_ISSUES.md @@ -251,7 +251,7 @@ Cocoon does not wait for B0EJ — there is no reliable signal from the CH HTTP A - The guest may continue to reference a NIC whose host plumbing is gone, hanging the in-guest driver. - The pending eject may surface later as `Cannot register ioevent: File exists` when CH next tries to reuse that slot (e.g. a subsequent hot-add). -Quiesce the guest NIC (ip link set down + NetworkManager remove on Linux; Disable-PnpDevice + driver unbind on Windows) before reducing the count. Or pass `--keep-host-on-remove` to defer the host teardown. +Quiesce the guest NIC (ip link set down + NetworkManager remove on Linux; Disable-PnpDevice + driver unbind on Windows) before reducing the count. Firecracker is not supported: FC has no NIC hot-plug / hot-unplug API. diff --git a/README.md b/README.md index 3f0a7136..12bcaad8 100644 --- a/README.md +++ b/README.md @@ -531,12 +531,9 @@ cocoon vm net my-vm --nics 2 # Remove a NIC (LIFO from the tail). cocoon vm net my-vm --nics 1 - -# Keep host plumbing in place after vm.remove-device. -cocoon vm net my-vm --nics 1 --keep-host-on-remove ``` -Cocoon manages **host-side** plumbing only. CH's `vm.remove-device` marks the slot for ejection but the actual eject only happens when the guest cooperates via ACPI (B0EJ write). The host TAP / veth / CNI lease are torn down immediately after the API call regardless. Quiesce in-guest NIC state (driver unbind, NetworkManager removal, Windows NDIS halt) **before** reducing the count, or the in-guest driver will reference plumbing that no longer exists. Use `--keep-host-on-remove` if you want to defer host teardown. +Cocoon manages **host-side** plumbing only. CH's `vm.remove-device` marks the slot for ejection but the actual eject only happens when the guest cooperates via ACPI (B0EJ write). The host TAP / veth / CNI lease are torn down immediately after the API call regardless. Quiesce in-guest NIC state (driver unbind, NetworkManager removal, Windows NDIS halt) **before** reducing the count, or the in-guest driver will reference plumbing that no longer exists. A VM started with zero NICs cannot be resized up (the VM record carries no provider hint). Start with at least one NIC if you plan to resize. diff --git a/cmd/vm/commands.go b/cmd/vm/commands.go index d5af9900..036b0dac 100644 --- a/cmd/vm/commands.go +++ b/cmd/vm/commands.go @@ -202,7 +202,6 @@ func buildNetCommand(h Actions) *cobra.Command { RunE: h.NetResize, } cmd.Flags().Int("nics", -1, "target NIC count (required, >= 0)") - cmd.Flags().Bool("keep-host-on-remove", false, "skip host TAP/veth teardown after vm.remove-device") _ = cmd.MarkFlagRequired("nics") cmdcore.AddOutputFlag(cmd) return cmd diff --git a/cmd/vm/netresize.go b/cmd/vm/netresize.go index d59a0874..9a8e69ce 100644 --- a/cmd/vm/netresize.go +++ b/cmd/vm/netresize.go @@ -27,8 +27,7 @@ func (h Handler) NetResize(cmd *cobra.Command, args []string) error { return fmt.Errorf("vm net: %w", err) } target, _ := cmd.Flags().GetInt("nics") - keepHost, _ := cmd.Flags().GetBool("keep-host-on-remove") - res, err := resizer.NetResize(ctx, args[0], netresize.Spec{Target: target, KeepHostOnRemove: keepHost}, plumbing) + res, err := resizer.NetResize(ctx, args[0], netresize.Spec{Target: target}, plumbing) if err != nil { return classifyAttachErr(err) } diff --git a/extend/netresize/netresize.go b/extend/netresize/netresize.go index 742b9b8b..41e9bff7 100644 --- a/extend/netresize/netresize.go +++ b/extend/netresize/netresize.go @@ -16,8 +16,7 @@ var ErrUnsupportedBackend = errors.New("backend does not support net resize") // Spec is one resize request. type Spec struct { - Target int - KeepHostOnRemove bool + Target int } // NIC is one NIC's summary, returned in Result.Added / Result.Removed. diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index e410ed5d..3cdf1e0d 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -39,7 +39,7 @@ func (ch *CloudHypervisor) NetResize(ctx context.Context, vmRef string, spec net case spec.Target > current: return ch.netResizeAdd(ctx, hc, vmID, &rec.Config, plumbing, current, spec.Target, res) default: - return ch.netResizeRemove(ctx, hc, info, vmID, rec.NetworkConfigs, plumbing, current, spec.Target, spec.KeepHostOnRemove, res) + return ch.netResizeRemove(ctx, hc, info, vmID, rec.NetworkConfigs, plumbing, current, spec.Target, res) } } @@ -71,7 +71,7 @@ func (ch *CloudHypervisor) netResizeAdd(ctx context.Context, hc *http.Client, vm return res, nil } -func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, info *chVMInfoResponse, vmID string, ncs []*types.NetworkConfig, plumbing netresize.Plumbing, current, target int, keepHost bool, res netresize.Result) (netresize.Result, error) { +func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, info *chVMInfoResponse, vmID string, ncs []*types.NetworkConfig, plumbing netresize.Plumbing, current, target int, res netresize.Result) (netresize.Result, error) { macToID := make(map[string]string, len(info.Config.Nets)) for _, n := range info.Config.Nets { macToID[strings.ToLower(n.MAC)] = n.ID @@ -85,10 +85,8 @@ func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, if err := removeDeviceVM(ctx, hc, chID); err != nil { return res, fmt.Errorf("vm.remove-device nic %d (%s): %w", i, chID, err) } - if !keepHost { - if err := plumbing.Remove(ctx, vmID, i); err != nil { - return res, fmt.Errorf("nic %d host plumbing: %w", i, err) - } + if err := plumbing.Remove(ctx, vmID, i); err != nil { + return res, fmt.Errorf("nic %d host plumbing: %w", i, err) } if err := ch.truncateNetworkConfigs(ctx, vmID, i); err != nil { return res, fmt.Errorf("persist remove nic %d: %w", i, err) From c95d79621e4fd6bbbbfada1fbe07b1db38135065 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 15:35:28 +0800 Subject: [PATCH 10/33] refactor(cloudhypervisor): single inspect bootstrap + consolidate provider lookup - runningVMClientWithRecord exposes (hc, vmID, rec) so NetResize avoids the duplicate ResolveRef+LoadRecord that inspectRunning already triggered. - plumbingForVM delegates to providerForVM (now lazy-inits CNI when nil), eliminating the duplicated backend switch / BridgeDev check. - hotSwapNets sets the deterministic cocoonNetID(MAC) so clone-added NICs match netresize's id convention (both still resolved by MAC, but consistent in vm.info output). - Trim netresize package comment; the operational caveats already live in README + KNOWN_ISSUES. --- cmd/vm/lifecycle.go | 8 +++++--- cmd/vm/netresize.go | 12 +++--------- extend/netresize/netresize.go | 2 -- hypervisor/cloudhypervisor/clone.go | 1 + hypervisor/cloudhypervisor/extend.go | 19 +++++++++++++------ 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/cmd/vm/lifecycle.go b/cmd/vm/lifecycle.go index e4149e63..00136d4f 100644 --- a/cmd/vm/lifecycle.go +++ b/cmd/vm/lifecycle.go @@ -393,6 +393,8 @@ func (h Handler) recoverNetwork(ctx context.Context, conf *config.Config, hyper } // providerForVM selects network provider from persisted NetworkConfig. +// cniProvider may be nil — lazy-inited from conf when configs need CNI. +// bridgeCache must be non-nil; resolved entries are inserted for reuse. func providerForVM(conf *config.Config, cniProvider network.Network, bridgeCache map[string]network.Network, configs []*types.NetworkConfig) (network.Network, error) { if len(configs) == 0 { return nil, fmt.Errorf("no network configs") @@ -414,10 +416,10 @@ func providerForVM(conf *config.Config, cniProvider network.Network, bridgeCache return p, nil } // "cni" or empty (backward compat). - if cniProvider == nil { - return nil, fmt.Errorf("cni provider not available") + if cniProvider != nil { + return cniProvider, nil } - return cniProvider, nil + return cmdcore.InitNetwork(conf) } func batchRoutedCmd(ctx context.Context, cmd *cobra.Command, name, pastTense string, routed map[hypervisor.Hypervisor][]string, fn func(hypervisor.Hypervisor, []string) ([]string, error)) error { diff --git a/cmd/vm/netresize.go b/cmd/vm/netresize.go index 9a8e69ce..fd0a6008 100644 --- a/cmd/vm/netresize.go +++ b/cmd/vm/netresize.go @@ -40,17 +40,11 @@ func (h Handler) NetResize(cmd *cobra.Command, args []string) error { } // plumbingForVM picks the network provider matching the VM's existing NICs. -// Requires at least one NIC: VMConfig has no bridge hint, so a zero-NIC VM -// can't be resized up without losing the original provider choice. +// VMConfig has no bridge hint, so a zero-NIC VM can't be resized up without +// losing the original provider choice. func plumbingForVM(conf *config.Config, configs []*types.NetworkConfig) (network.Network, error) { if len(configs) == 0 { return nil, fmt.Errorf("vm has zero NICs; resize up is not supported (start the VM with at least one NIC)") } - if configs[0].Backend == types.BackendBridge { - if configs[0].BridgeDev == "" { - return nil, fmt.Errorf("bridge backend without bridge device") - } - return cmdcore.InitBridgeNetwork(conf, configs[0].BridgeDev) - } - return cmdcore.InitNetwork(conf) + return providerForVM(conf, nil, map[string]network.Network{}, configs) } diff --git a/extend/netresize/netresize.go b/extend/netresize/netresize.go index 41e9bff7..3d032606 100644 --- a/extend/netresize/netresize.go +++ b/extend/netresize/netresize.go @@ -1,6 +1,4 @@ // Package netresize is the runtime interface for resizing a VM's NIC count. -// Cocoon never touches the guest — the user must quiesce in-guest NIC state -// (driver unbind / NetworkManager / NDIS) before reducing the count. package netresize import ( diff --git a/hypervisor/cloudhypervisor/clone.go b/hypervisor/cloudhypervisor/clone.go index 3e04786f..5092c6cc 100644 --- a/hypervisor/cloudhypervisor/clone.go +++ b/hypervisor/cloudhypervisor/clone.go @@ -321,6 +321,7 @@ func hotSwapNets(ctx context.Context, hc *http.Client, oldNets []chNet, networkC } for i, nc := range networkConfigs { newNet := networkConfigToNet(nc) + newNet.ID = cocoonNetID(nc.MAC) if err := addNetVM(ctx, hc, newNet); err != nil { return fmt.Errorf("add net device %d/%d (MAC %s, TAP %s): %w", i+1, len(networkConfigs), nc.MAC, nc.TAP, err) diff --git a/hypervisor/cloudhypervisor/extend.go b/hypervisor/cloudhypervisor/extend.go index 61cdf795..8f17e51a 100644 --- a/hypervisor/cloudhypervisor/extend.go +++ b/hypervisor/cloudhypervisor/extend.go @@ -216,26 +216,33 @@ func (ch *CloudHypervisor) detachWith( // alive (PID file + cmdline match — same gate as Backend.WithRunningVM), // and returns an http.Client connected to its CH API socket. func (ch *CloudHypervisor) runningVMClient(ctx context.Context, vmRef string) (*http.Client, error) { + hc, _, _, err := ch.runningVMClientWithRecord(ctx, vmRef) + return hc, err +} + +// runningVMClientWithRecord is runningVMClient + (vmID, *VMRecord) so callers +// that need the persisted record (e.g. netresize) avoid a second resolve+load. +func (ch *CloudHypervisor) runningVMClientWithRecord(ctx context.Context, vmRef string) (*http.Client, string, hypervisor.VMRecord, error) { vmID, err := ch.ResolveRef(ctx, vmRef) if err != nil { - return nil, err + return nil, "", hypervisor.VMRecord{}, err } rec, err := ch.LoadRecord(ctx, vmID) if err != nil { - return nil, err + return nil, "", hypervisor.VMRecord{}, err } if rec.State != types.VMStateRunning { - return nil, fmt.Errorf("vm %s is %s: %w", vmID, rec.State, hypervisor.ErrNotRunning) + return nil, "", hypervisor.VMRecord{}, fmt.Errorf("vm %s is %s: %w", vmID, rec.State, hypervisor.ErrNotRunning) } sockPath := hypervisor.SocketPath(rec.RunDir) pid, pidErr := utils.ReadPIDFile(ch.PIDFilePath(rec.RunDir)) if pidErr != nil { - return nil, fmt.Errorf("vm %s read pidfile: %w: %w", vmID, pidErr, hypervisor.ErrNotRunning) + return nil, "", hypervisor.VMRecord{}, fmt.Errorf("vm %s read pidfile: %w: %w", vmID, pidErr, hypervisor.ErrNotRunning) } if !utils.VerifyProcessCmdline(pid, ch.conf.BinaryName(), sockPath) { - return nil, fmt.Errorf("vm %s pid %d not %s: %w", vmID, pid, ch.conf.BinaryName(), hypervisor.ErrNotRunning) + return nil, "", hypervisor.VMRecord{}, fmt.Errorf("vm %s pid %d not %s: %w", vmID, pid, ch.conf.BinaryName(), hypervisor.ErrNotRunning) } - return utils.NewSocketHTTPClient(sockPath), nil + return utils.NewSocketHTTPClient(sockPath), vmID, rec, nil } // listWith is the shared skeleton for inspect-time enumeration. From a90a5d50812eb2f010d22089ddd68b1b9f5dc332 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 15:35:36 +0800 Subject: [PATCH 11/33] fix(netresize): rollback CH device + always truncate on partial failure - netResizeAdd: if appendNetworkConfig fails after addNetVM succeeds, roll back vm.remove-device + host plumbing. Without this, the live NIC with cocoon-net- id stays in CH while cocoon's record drops, and the next resize collides on the deterministic id. - netResizeRemove: once removeDeviceVM succeeds the CH eject is irrevocable, so truncate the cocoon record even when plumbing.Remove fails (leaked TAP/CNI is gc'd later). Skipping truncate left the next resize unable to resolve the stale NIC's MAC against vm.info. --- hypervisor/cloudhypervisor/netresize.go | 28 +++++++++++++++++-------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index 3cdf1e0d..74e3b17a 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -19,15 +19,11 @@ func (ch *CloudHypervisor) NetResize(ctx context.Context, vmRef string, spec net if err := spec.Normalize(); err != nil { return netresize.Result{}, err } - hc, info, err := ch.inspectRunning(ctx, vmRef) + hc, vmID, rec, err := ch.runningVMClientWithRecord(ctx, vmRef) if err != nil { return netresize.Result{}, err } - vmID, err := ch.ResolveRef(ctx, vmRef) - if err != nil { - return netresize.Result{}, err - } - rec, err := ch.LoadRecord(ctx, vmID) + info, err := getVMInfo(ctx, hc) if err != nil { return netresize.Result{}, err } @@ -63,6 +59,14 @@ func (ch *CloudHypervisor) netResizeAdd(ctx context.Context, hc *http.Client, vm return res, fmt.Errorf("vm.add-net nic %d: %w", i, err) } if err := ch.appendNetworkConfig(ctx, vmID, nc); err != nil { + // CH already accepted the device; without rollback, the next + // resize would mis-count or collide on the deterministic id. + if rmErr := removeDeviceVM(ctx, hc, chN.ID); rmErr != nil { + logger.Warnf(ctx, "rollback vm.remove-device %s after persist failure: %v", chN.ID, rmErr) + } + if rmErr := plumbing.Remove(ctx, vmID, i); rmErr != nil { + logger.Warnf(ctx, "rollback host plumbing for nic %d: %v", i, rmErr) + } return res, fmt.Errorf("persist nic %d: %w", i, err) } res.Added = append(res.Added, netresize.NIC{Index: i, TAP: nc.TAP, MAC: nc.MAC}) @@ -72,6 +76,7 @@ func (ch *CloudHypervisor) netResizeAdd(ctx context.Context, hc *http.Client, vm } func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, info *chVMInfoResponse, vmID string, ncs []*types.NetworkConfig, plumbing netresize.Plumbing, current, target int, res netresize.Result) (netresize.Result, error) { + logger := log.WithFunc("cloudhypervisor.NetResize.remove") macToID := make(map[string]string, len(info.Config.Nets)) for _, n := range info.Config.Nets { macToID[strings.ToLower(n.MAC)] = n.ID @@ -85,12 +90,17 @@ func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, if err := removeDeviceVM(ctx, hc, chID); err != nil { return res, fmt.Errorf("vm.remove-device nic %d (%s): %w", i, chID, err) } - if err := plumbing.Remove(ctx, vmID, i); err != nil { - return res, fmt.Errorf("nic %d host plumbing: %w", i, err) - } + // CH accepted the eject; cocoon must drop the record even if host + // plumbing teardown leaks. Skipping truncate would block convergence + // — the next resize would re-read the stale NIC and fail MAC lookup. + plumbingErr := plumbing.Remove(ctx, vmID, i) if err := ch.truncateNetworkConfigs(ctx, vmID, i); err != nil { + logger.Errorf(ctx, err, "persistence diverged from CH for vm %s nic %d (%s): live device removed, cocoon record retained", vmID, i, chID) return res, fmt.Errorf("persist remove nic %d: %w", i, err) } + if plumbingErr != nil { + logger.Warnf(ctx, "host plumbing leaked for vm %s nic %d (gc will reclaim): %v", vmID, i, plumbingErr) + } res.Removed = append(res.Removed, netresize.NIC{Index: i, TAP: nc.TAP, MAC: nc.MAC}) res.After = i } From 0333437fc075b89585b41776787cd584a1e4c0d0 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 15:35:43 +0800 Subject: [PATCH 12/33] fix(network): propagate teardown errors and rollback bridge.Add partials MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - cni.tearDownNICs: when cniConf is nil and bestEffort, return all record ids so deleteVM still sweeps the DB instead of stranding them. In the non-bestEffort path (Remove), CNI DEL / TAP delete failures now propagate — silently logging would leak IPAM and report a misleading success. - bridge.Add: track successfully-created TAPs and tear them down on any later failure. Reachable from boot (AddRange(0, nics)) where a partial failure previously leaked the earlier-created TAPs on the bridge. --- network/bridge/bridge_linux.go | 15 ++++++++++---- network/cni/cni.go | 36 +++++++++++++++++++++++----------- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/network/bridge/bridge_linux.go b/network/bridge/bridge_linux.go index 349290ee..d8962521 100644 --- a/network/bridge/bridge_linux.go +++ b/network/bridge/bridge_linux.go @@ -68,7 +68,7 @@ func (b *Bridge) Verify(_ context.Context, vmID string) error { } // Add allocates TAP devices on the bridge for the given specs. -func (b *Bridge) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs ...network.AddSpec) ([]*types.NetworkConfig, error) { +func (b *Bridge) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs ...network.AddSpec) (configs []*types.NetworkConfig, retErr error) { if len(specs) == 0 { return nil, nil } @@ -79,7 +79,15 @@ func (b *Bridge) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, sp return nil, fmt.Errorf("find bridge: %w", err) } - configs := make([]*types.NetworkConfig, 0, len(specs)) + var added []int + defer func() { + if retErr == nil || len(added) == 0 { + return + } + _ = tearDownTAPs(vmID, added, true) + }() + + configs = make([]*types.NetworkConfig, 0, len(specs)) for _, spec := range specs { name := tapName(vmID, spec.Index) mac := generateMAC() @@ -90,6 +98,7 @@ func (b *Bridge) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, sp if cErr := createTAP(name, queues); cErr != nil { return nil, fmt.Errorf("create tap %s: %w", name, cErr) } + added = append(added, spec.Index) tap, lErr := netlink.LinkByName(name) if lErr != nil { @@ -97,7 +106,6 @@ func (b *Bridge) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, sp } if mErr := netlink.LinkSetMaster(tap, br); mErr != nil { - _ = netlink.LinkDel(tap) return nil, fmt.Errorf("add %s to %s: %w", name, b.bridgeDev, mErr) } @@ -108,7 +116,6 @@ func (b *Bridge) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, sp _ = network.TuneTAP(tap) if uErr := netlink.LinkSetUp(tap); uErr != nil { - _ = netlink.LinkDel(tap) return nil, fmt.Errorf("set %s up: %w", name, uErr) } diff --git a/network/cni/cni.go b/network/cni/cni.go index a85104e2..e77237a4 100644 --- a/network/cni/cni.go +++ b/network/cni/cni.go @@ -142,17 +142,25 @@ func (c *CNI) deleteVM(ctx context.Context, vmID string) error { } // tearDownNICs runs CNI DEL (and optionally TAP delete) for each record. -// bestEffort=true logs errors and continues; false returns on the first. -// Returns the record IDs successfully torn down so the caller can sweep them -// from the index in one Update. +// bestEffort=true logs errors and always appends the id (caller still sweeps +// the DB; the netns deletion that follows reclaims kernel-side state anyway). +// bestEffort=false returns on the first CNI/TAP failure so the caller can +// surface "remove" intent failures without dropping live DB records. +// Returns the record IDs eligible for index removal. func (c *CNI) tearDownNICs(ctx context.Context, vmID, nsPath string, records []networkRecord, deleteTAP, bestEffort bool) ([]string, error) { + logger := log.WithFunc("cni.tearDownNICs") if c.cniConf == nil { - if bestEffort { - return nil, nil + if !bestEffort { + return nil, fmt.Errorf("%w: no conflist found in %s", network.ErrNotConfigured, c.conf.CNIConfDir) } - return nil, fmt.Errorf("%w: no conflist found in %s", network.ErrNotConfigured, c.conf.CNIConfDir) + // No CNI configured: nothing to DEL, but the DB records must still + // be reclaimable. Return every id so the caller can sweep them. + ids := make([]string, 0, len(records)) + for _, rec := range records { + ids = append(ids, rec.ID) + } + return ids, nil } - logger := log.WithFunc("cni.tearDownNICs") ids := make([]string, 0, len(records)) for _, rec := range records { cl, err := c.confListByName(rec.Type) @@ -164,14 +172,20 @@ func (c *CNI) tearDownNICs(ctx context.Context, vmID, nsPath string, records []n continue } if err := c.cniDel(ctx, cl, vmID, nsPath, rec.IfName); err != nil { + if !bestEffort { + return ids, fmt.Errorf("cni del %s/%s: %w", vmID, rec.IfName, err) + } logger.Warnf(ctx, "CNI DEL %s/%s: %v", vmID, rec.IfName, err) } if deleteTAP { - var i int - if _, err := fmt.Sscanf(rec.IfName, "eth%d", &i); err == nil { - if err := deleteTAPInNetns(nsPath, tapNameForVM(vmID, i)); err != nil && !bestEffort { - return ids, fmt.Errorf("delete tap %s: %w", tapNameForVM(vmID, i), err) + var idx int + if _, scanErr := fmt.Sscanf(rec.IfName, "eth%d", &idx); scanErr != nil { + logger.Warnf(ctx, "parse ifname %q for %s: %v (skip tap delete)", rec.IfName, vmID, scanErr) + } else if delErr := deleteTAPInNetns(nsPath, tapNameForVM(vmID, idx)); delErr != nil { + if !bestEffort { + return ids, fmt.Errorf("delete tap %s: %w", tapNameForVM(vmID, idx), delErr) } + logger.Warnf(ctx, "delete tap %s in netns %s: %v", tapNameForVM(vmID, idx), nsPath, delErr) } } ids = append(ids, rec.ID) From f5cf1cee46cd3f9331ad0528df0e7155994b363c Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 15:35:47 +0800 Subject: [PATCH 13/33] docs(readme): rephrase NIC hot-resize add/remove summary --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 12bcaad8..aa2e68ba 100644 --- a/README.md +++ b/README.md @@ -523,7 +523,7 @@ cocoon vm device detach my-vm --id mygpu ## NIC Hot-Resize (Cloud Hypervisor only) -`cocoon vm net --nics N VM` brings the running VM's NIC count to `N`. Adds allocate new TAP/CNI/bridge plumbing on the host and hot-plug a fresh NIC into the guest; removes pop NICs from the tail (LIFO) via `vm.remove-device` and tear down the host plumbing. +`cocoon vm net --nics N VM` brings the running VM's NIC count to `N`. To add NICs, cocoon allocates new host TAP/CNI/bridge plumbing and hot-plugs a fresh NIC into the guest. To remove NICs, it pops from the tail (LIFO) via `vm.remove-device` and tears down the host plumbing. ```bash # Add a second NIC (or any number). From 1dc67a7d6bf03b9335e7c1459d2d241457e435f0 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 15:44:39 +0800 Subject: [PATCH 14/33] docs(netresize): correct leak-reclaim hint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "gc will reclaim" is misleading — running-VM mid-state leaks aren't in the GC orphan scan; only stop+restart sweeps them. Adjust the warn message accordingly. --- hypervisor/cloudhypervisor/netresize.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index 74e3b17a..87851add 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -99,7 +99,7 @@ func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, return res, fmt.Errorf("persist remove nic %d: %w", i, err) } if plumbingErr != nil { - logger.Warnf(ctx, "host plumbing leaked for vm %s nic %d (gc will reclaim): %v", vmID, i, plumbingErr) + logger.Warnf(ctx, "host plumbing leaked for vm %s nic %d (stop+restart will reclaim): %v", vmID, i, plumbingErr) } res.Removed = append(res.Removed, netresize.NIC{Index: i, TAP: nc.TAP, MAC: nc.MAC}) res.After = i From 7c97cbf54b2a3598b2e7b4bc8933ddcb62359b17 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 15:52:31 +0800 Subject: [PATCH 15/33] fix: CNI Add rollback TAP cleanup + skip vm.info on add-only resize - CNI Add: rollback defer now deletes TAPs left behind by setupTCRedirect when the netns was pre-existing (createdNetns=false). Without this, a partial failure inside the persistent netns leaked tap-, and the next retry hit "file exists" in createTAP. - NetResize: defer getVMInfo to the remove branch only; the add path doesn't need vm.info, so an add resize no longer depends on it. --- hypervisor/cloudhypervisor/netresize.go | 21 ++++++++++----------- network/cni/create.go | 18 +++++++++++------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index 87851add..919cf69a 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -14,7 +14,6 @@ import ( "github.com/cocoonstack/cocoon/types" ) -// NetResize brings the VM's NIC count to spec.Target. func (ch *CloudHypervisor) NetResize(ctx context.Context, vmRef string, spec netresize.Spec, plumbing netresize.Plumbing) (netresize.Result, error) { if err := spec.Normalize(); err != nil { return netresize.Result{}, err @@ -23,10 +22,6 @@ func (ch *CloudHypervisor) NetResize(ctx context.Context, vmRef string, spec net if err != nil { return netresize.Result{}, err } - info, err := getVMInfo(ctx, hc) - if err != nil { - return netresize.Result{}, err - } current := len(rec.NetworkConfigs) res := netresize.Result{Before: current, After: current} switch { @@ -35,6 +30,10 @@ func (ch *CloudHypervisor) NetResize(ctx context.Context, vmRef string, spec net case spec.Target > current: return ch.netResizeAdd(ctx, hc, vmID, &rec.Config, plumbing, current, spec.Target, res) default: + info, infoErr := getVMInfo(ctx, hc) + if infoErr != nil { + return res, infoErr + } return ch.netResizeRemove(ctx, hc, info, vmID, rec.NetworkConfigs, plumbing, current, spec.Target, res) } } @@ -59,8 +58,7 @@ func (ch *CloudHypervisor) netResizeAdd(ctx context.Context, hc *http.Client, vm return res, fmt.Errorf("vm.add-net nic %d: %w", i, err) } if err := ch.appendNetworkConfig(ctx, vmID, nc); err != nil { - // CH already accepted the device; without rollback, the next - // resize would mis-count or collide on the deterministic id. + // without rollback the deterministic id collides on the next resize. if rmErr := removeDeviceVM(ctx, hc, chN.ID); rmErr != nil { logger.Warnf(ctx, "rollback vm.remove-device %s after persist failure: %v", chN.ID, rmErr) } @@ -90,16 +88,17 @@ func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, if err := removeDeviceVM(ctx, hc, chID); err != nil { return res, fmt.Errorf("vm.remove-device nic %d (%s): %w", i, chID, err) } - // CH accepted the eject; cocoon must drop the record even if host - // plumbing teardown leaks. Skipping truncate would block convergence - // — the next resize would re-read the stale NIC and fail MAC lookup. + // CH eject is irrevocable; truncate even if plumbing leaks, else the + // next resize re-reads the stale NIC and fails MAC lookup. plumbingErr := plumbing.Remove(ctx, vmID, i) if err := ch.truncateNetworkConfigs(ctx, vmID, i); err != nil { logger.Errorf(ctx, err, "persistence diverged from CH for vm %s nic %d (%s): live device removed, cocoon record retained", vmID, i, chID) return res, fmt.Errorf("persist remove nic %d: %w", i, err) } if plumbingErr != nil { - logger.Warnf(ctx, "host plumbing leaked for vm %s nic %d (stop+restart will reclaim): %v", vmID, i, plumbingErr) + msg := fmt.Sprintf("nic %d (%s) host plumbing leaked, stop+restart will reclaim: %v", i, chID, plumbingErr) + logger.Warn(ctx, msg) + res.Warnings = append(res.Warnings, msg) } res.Removed = append(res.Removed, netresize.NIC{Index: i, TAP: nc.TAP, MAC: nc.MAC}) res.After = i diff --git a/network/cni/create.go b/network/cni/create.go index 76f8673f..7f7efdbf 100644 --- a/network/cni/create.go +++ b/network/cni/create.go @@ -40,15 +40,22 @@ func (c *CNI) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs return nil, fmt.Errorf("ensure netns %s: %w", nsName, err) } - addedIFs := make([]string, 0, len(specs)) + addedIdx := make([]int, 0, len(specs)) defer func() { if retErr == nil { return } - for _, ifn := range addedIFs { + for _, i := range addedIdx { + ifn := fmt.Sprintf("eth%d", i) if delErr := c.cniDel(ctx, confList, vmID, nsPath, ifn); delErr != nil { logger.Warnf(ctx, "rollback CNI DEL %s/%s: %v", vmID, ifn, delErr) } + // setupTCRedirect creates the TAP; it would leak if the netns persists. + if !createdNetns { + if delErr := deleteTAPInNetns(nsPath, tapNameForVM(vmID, i)); delErr != nil { + logger.Warnf(ctx, "rollback tap delete %s: %v", tapNameForVM(vmID, i), delErr) + } + } } if createdNetns { _ = deleteNetns(ctx, nsName) @@ -79,7 +86,7 @@ func (c *CNI) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs if addErr != nil { return nil, fmt.Errorf("cni add %s/%s: %w", vmID, ifName, addErr) } - addedIFs = append(addedIFs, ifName) + addedIdx = append(addedIdx, spec.Index) netInfo, parseErr := extractNetworkInfo(cniResult) if parseErr != nil { @@ -169,7 +176,6 @@ func (c *CNI) Remove(ctx context.Context, vmID string, indices ...int) error { return c.deleteRecords(ctx, ids) } -// cniDel runs CNI DEL for one NIC; shared by Add (recovery + rollback) and Remove. func (c *CNI) cniDel(ctx context.Context, confList *libcni.NetworkConfigList, vmID, nsPath, ifName string) error { rt := &libcni.RuntimeConf{ContainerID: vmID, NetNS: nsPath, IfName: ifName} return c.cniConf.DelNetworkList(ctx, confList, rt) @@ -179,9 +185,7 @@ func tapNameForVM(vmID string, nic int) string { return fmt.Sprintf("tap%s-%d", network.VMIDPrefix(vmID), nic) } -// ensureNetns creates the netns when /var/run/netns/ is absent. -// The bool reports whether this call did the creation, so rollback only -// tears down a netns we just made. +// ensureNetns creates the netns if missing; bool reports whether this call did the creation. func ensureNetns(name, nsPath string) (bool, error) { if _, err := os.Stat(nsPath); err == nil { return false, nil From 6b1f164337c7f1d2449b253427386fa3fbc5dece Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 15:53:03 +0800 Subject: [PATCH 16/33] feat(netresize): surface non-fatal warnings in Result Result gains Warnings []string. netResizeRemove appends a leak message when plumbing.Remove fails after CH accepted the eject; the CLI logs each warning and JSON output carries them. Without this, host plumbing leaks were only visible in the daemon log. --- cmd/vm/netresize.go | 11 +++++++---- extend/netresize/netresize.go | 18 ++++++++---------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/cmd/vm/netresize.go b/cmd/vm/netresize.go index fd0a6008..b9845040 100644 --- a/cmd/vm/netresize.go +++ b/cmd/vm/netresize.go @@ -34,14 +34,17 @@ func (h Handler) NetResize(cmd *cobra.Command, args []string) error { if done, jsonErr := cmdcore.MaybeOutputJSON(cmd, res); done { return jsonErr } - log.WithFunc("cmd.vm.net").Infof(ctx, "resized %s: before=%d after=%d added=%d removed=%d", + logger := log.WithFunc("cmd.vm.net") + logger.Infof(ctx, "resized %s: before=%d after=%d added=%d removed=%d", args[0], res.Before, res.After, len(res.Added), len(res.Removed)) + for _, w := range res.Warnings { + logger.Warnf(ctx, "%s: %s", args[0], w) + } return nil } -// plumbingForVM picks the network provider matching the VM's existing NICs. -// VMConfig has no bridge hint, so a zero-NIC VM can't be resized up without -// losing the original provider choice. +// plumbingForVM picks the provider matching the VM's existing NICs; zero NICs +// is fatal because VMConfig carries no bridge hint to recover the choice. func plumbingForVM(conf *config.Config, configs []*types.NetworkConfig) (network.Network, error) { if len(configs) == 0 { return nil, fmt.Errorf("vm has zero NICs; resize up is not supported (start the VM with at least one NIC)") diff --git a/extend/netresize/netresize.go b/extend/netresize/netresize.go index 3d032606..b7d0299b 100644 --- a/extend/netresize/netresize.go +++ b/extend/netresize/netresize.go @@ -17,34 +17,32 @@ type Spec struct { Target int } -// NIC is one NIC's summary, returned in Result.Added / Result.Removed. type NIC struct { Index int `json:"index"` TAP string `json:"tap"` MAC string `json:"mac"` } -// Result reports the before/after count and the NICs touched by the call. +// Result reports the before/after count and NICs touched. Warnings surface +// non-fatal divergence (e.g. host plumbing leak after a successful CH eject). type Result struct { - Before int `json:"before"` - After int `json:"after"` - Added []NIC `json:"added,omitempty"` - Removed []NIC `json:"removed,omitempty"` + Before int `json:"before"` + After int `json:"after"` + Added []NIC `json:"added,omitempty"` + Removed []NIC `json:"removed,omitempty"` + Warnings []string `json:"warnings,omitempty"` } -// Plumbing is the host-side network operations NetResize delegates to. -// network.Network satisfies this implicitly. +// Plumbing is the host-side network ops NetResize delegates to; network.Network satisfies it. type Plumbing interface { Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs ...network.AddSpec) ([]*types.NetworkConfig, error) Remove(ctx context.Context, vmID string, indices ...int) error } -// Resizer resizes the NIC count on a running VM. type Resizer interface { NetResize(ctx context.Context, vmRef string, spec Spec, plumbing Plumbing) (Result, error) } -// Normalize validates the spec. func (s *Spec) Normalize() error { if s.Target < 0 { return fmt.Errorf("--nics must be non-negative, got %d", s.Target) From ca0534c00343417023f9ae5c806c5dc26ff29a96 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 15:53:11 +0800 Subject: [PATCH 17/33] chore: trim verbose comments across vm-net-resize branch --- cmd/vm/attach.go | 6 ++---- cmd/vm/lifecycle.go | 5 ++--- hypervisor/cloudhypervisor/extend.go | 3 +-- network/cni/cni.go | 21 ++++++--------------- network/network.go | 11 ----------- 5 files changed, 11 insertions(+), 35 deletions(-) diff --git a/cmd/vm/attach.go b/cmd/vm/attach.go index 7b824b2e..5d04d5f7 100644 --- a/cmd/vm/attach.go +++ b/cmd/vm/attach.go @@ -85,10 +85,8 @@ func (h Handler) DeviceDetach(cmd *cobra.Command, args []string) error { return nil } -// resolveAttacher resolves args[0] to a hypervisor implementing A -// (fs.Attacher / vfio.Attacher / netresize.Resizer). op prefixes both error -// wraps so callers see the operation. Returns the resolved hypervisor too -// so callers can issue further generic ops (e.g. Inspect) without re-finding. +// resolveAttacher resolves args[0] to a hypervisor implementing A; returns the +// hypervisor too so callers can issue further generic ops without re-finding. func resolveAttacher[A any](h Handler, cmd *cobra.Command, args []string, op string, errUnsupported error) (context.Context, *config.Config, hypervisor.Hypervisor, A, error) { var zero A ctx, conf, err := h.Init(cmd) diff --git a/cmd/vm/lifecycle.go b/cmd/vm/lifecycle.go index 00136d4f..d7a39800 100644 --- a/cmd/vm/lifecycle.go +++ b/cmd/vm/lifecycle.go @@ -392,9 +392,8 @@ func (h Handler) recoverNetwork(ctx context.Context, conf *config.Config, hyper } } -// providerForVM selects network provider from persisted NetworkConfig. -// cniProvider may be nil — lazy-inited from conf when configs need CNI. -// bridgeCache must be non-nil; resolved entries are inserted for reuse. +// providerForVM picks the network provider from persisted NetworkConfig; +// cniProvider may be nil (lazy-init); bridgeCache must be non-nil. func providerForVM(conf *config.Config, cniProvider network.Network, bridgeCache map[string]network.Network, configs []*types.NetworkConfig) (network.Network, error) { if len(configs) == 0 { return nil, fmt.Errorf("no network configs") diff --git a/hypervisor/cloudhypervisor/extend.go b/hypervisor/cloudhypervisor/extend.go index 8f17e51a..229825b0 100644 --- a/hypervisor/cloudhypervisor/extend.go +++ b/hypervisor/cloudhypervisor/extend.go @@ -220,8 +220,7 @@ func (ch *CloudHypervisor) runningVMClient(ctx context.Context, vmRef string) (* return hc, err } -// runningVMClientWithRecord is runningVMClient + (vmID, *VMRecord) so callers -// that need the persisted record (e.g. netresize) avoid a second resolve+load. +// runningVMClientWithRecord is runningVMClient + the resolved vmID and record. func (ch *CloudHypervisor) runningVMClientWithRecord(ctx context.Context, vmRef string) (*http.Client, string, hypervisor.VMRecord, error) { vmID, err := ch.ResolveRef(ctx, vmRef) if err != nil { diff --git a/network/cni/cni.go b/network/cni/cni.go index e77237a4..e6008baf 100644 --- a/network/cni/cni.go +++ b/network/cni/cni.go @@ -38,10 +38,8 @@ type CNI struct { cniConf *libcni.CNIConfig } -// New creates a CNI network provider. -// CNI conflist loading is best-effort at creation time; if no conflist is -// available (e.g. no network needed), Delete/Inspect/List still work. -// Config() will fail if the conflist is not loaded. +// New creates a CNI provider; conflist loading is best-effort so Delete/Inspect/List +// still work when none are available — Add fails in that case. func New(conf *config.Config) (*CNI, error) { if conf == nil { return nil, fmt.Errorf("config is nil") @@ -141,20 +139,15 @@ func (c *CNI) deleteVM(ctx context.Context, vmID string) error { return c.deleteRecords(ctx, ids) } -// tearDownNICs runs CNI DEL (and optionally TAP delete) for each record. -// bestEffort=true logs errors and always appends the id (caller still sweeps -// the DB; the netns deletion that follows reclaims kernel-side state anyway). -// bestEffort=false returns on the first CNI/TAP failure so the caller can -// surface "remove" intent failures without dropping live DB records. -// Returns the record IDs eligible for index removal. +// tearDownNICs runs CNI DEL (+ optional TAP delete) per record. bestEffort +// logs and continues; otherwise the first CNI/TAP failure aborts and only +// fully-torn-down records are returned for DB sweep. func (c *CNI) tearDownNICs(ctx context.Context, vmID, nsPath string, records []networkRecord, deleteTAP, bestEffort bool) ([]string, error) { logger := log.WithFunc("cni.tearDownNICs") if c.cniConf == nil { if !bestEffort { return nil, fmt.Errorf("%w: no conflist found in %s", network.ErrNotConfigured, c.conf.CNIConfDir) } - // No CNI configured: nothing to DEL, but the DB records must still - // be reclaimable. Return every id so the caller can sweep them. ids := make([]string, 0, len(records)) for _, rec := range records { ids = append(ids, rec.ID) @@ -193,9 +186,7 @@ func (c *CNI) tearDownNICs(ctx context.Context, vmID, nsPath string, records []n return ids, nil } -// delNICs is the per-record CNI DEL loop without store access; used by the -// lockless gc.Collect path. The store-aware Delete/Remove paths go through -// tearDownNICs. +// delNICs is the lockless CNI DEL loop for gc.Collect; store-aware paths use tearDownNICs. func (c *CNI) delNICs(ctx context.Context, vmID, nsPath string, records []networkRecord) { if c.cniConf == nil { return diff --git a/network/network.go b/network/network.go index 293b8398..2eb23c0c 100644 --- a/network/network.go +++ b/network/network.go @@ -19,27 +19,17 @@ type AddSpec struct { Existing *types.NetworkConfig } -// Network defines the interface for a network provider (CNI, bridge, ...). type Network interface { Type() string - - // Verify checks whether the network namespace / TAPs for a VM exist. Verify(ctx context.Context, vmID string) error - - // Add allocates NIC host plumbing for the given specs; idempotent re: netns. Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs ...AddSpec) ([]*types.NetworkConfig, error) - - // Remove tears down NIC host plumbing for the given indices; preserves netns. Remove(ctx context.Context, vmID string, indices ...int) error - Delete(context.Context, []string) ([]string, error) Inspect(context.Context, string) (*types.Network, error) List(context.Context) ([]*types.Network, error) - RegisterGC(*gc.Orchestrator) } -// AddRange returns specs for a contiguous range of fresh NICs. func AddRange(from, count int) []AddSpec { out := make([]AddSpec, count) for i := range out { @@ -48,7 +38,6 @@ func AddRange(from, count int) []AddSpec { return out } -// AddRecover returns specs for re-creating existing NICs at slots 0..len-1. func AddRecover(existing []*types.NetworkConfig) []AddSpec { out := make([]AddSpec, len(existing)) for i, e := range existing { From d1cd8c7e3c985b6764833a71c408fd50a65b1583 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 16:01:21 +0800 Subject: [PATCH 18/33] fix(cni): always attempt deleteNetns in deleteVM After CNI.Remove was introduced for hot-resize, a VM resized down to 0 NICs ends up with empty DB records but its netns intact (Remove preserves the netns by design). vm rm then short-circuited on len(records)==0 and left /var/run/netns/cocoon- behind until gc ran. Drop the early return so deleteNetns runs unconditionally (idempotent: ignores ENOENT). --- network/cni/cni.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/network/cni/cni.go b/network/cni/cni.go index e6008baf..7893375a 100644 --- a/network/cni/cni.go +++ b/network/cni/cni.go @@ -127,9 +127,8 @@ func (c *CNI) deleteVM(ctx context.Context, vmID string) error { }); err != nil { return fmt.Errorf("read network index: %w", err) } - if len(records) == 0 { - return nil - } + // Run unconditionally — a VM resized to 0 NICs has no records but still + // owns its netns (Remove preserves it by design). nsPath := netnsPath(vmID) ids, _ := c.tearDownNICs(ctx, vmID, nsPath, records, false, true) nsName := netnsName(vmID) From 63649d70facc60d900dffafef8d209aba33e6643 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 16:15:16 +0800 Subject: [PATCH 19/33] fix(cni): sweep partial records when Remove aborts mid-loop tearDownNICs returns ids of fully-torn-down records even when it aborts on a later failure. Remove was returning err early without calling deleteRecords, leaving the cleaned NICs' DB rows orphaned (CNI DEL'd + TAP deleted, but record stays). Sweep them regardless so next operation sees a clean DB. --- network/cni/create.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/network/cni/create.go b/network/cni/create.go index 7f7efdbf..c05cb9d3 100644 --- a/network/cni/create.go +++ b/network/cni/create.go @@ -170,10 +170,12 @@ func (c *CNI) Remove(ctx context.Context, vmID string, indices ...int) error { picked = append(picked, rec) } ids, err := c.tearDownNICs(ctx, vmID, netnsPath(vmID), picked, true, false) - if err != nil { - return err + // Always sweep fully-torn-down records, even when tearDownNICs aborted on + // a later NIC; otherwise the cleaned ones leak DB rows until vm delete. + if delErr := c.deleteRecords(ctx, ids); delErr != nil && err == nil { + err = delErr } - return c.deleteRecords(ctx, ids) + return err } func (c *CNI) cniDel(ctx context.Context, confList *libcni.NetworkConfigList, vmID, nsPath, ifName string) error { From feb7a43e2848812afddfd2ad3aafa1758b5ec175 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 16:16:11 +0800 Subject: [PATCH 20/33] refactor(cni): drop delNICs, route gc.Collect through tearDownNICs delNICs was a strict subset of tearDownNICs(deleteTAP=false, bestEffort=true). tearDownNICs doesn't touch the store, so it's safe to call from gc.Collect which holds the flock itself via WriteRaw/ReadRaw. --- network/cni/cni.go | 18 ------------------ network/cni/gc.go | 2 +- 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/network/cni/cni.go b/network/cni/cni.go index 7893375a..34fe9255 100644 --- a/network/cni/cni.go +++ b/network/cni/cni.go @@ -185,24 +185,6 @@ func (c *CNI) tearDownNICs(ctx context.Context, vmID, nsPath string, records []n return ids, nil } -// delNICs is the lockless CNI DEL loop for gc.Collect; store-aware paths use tearDownNICs. -func (c *CNI) delNICs(ctx context.Context, vmID, nsPath string, records []networkRecord) { - if c.cniConf == nil { - return - } - logger := log.WithFunc("cni.delNICs") - for _, rec := range records { - cl, err := c.confListByName(rec.Type) - if err != nil { - logger.Warnf(ctx, "conflist %q not found for CNI DEL %s/%s: %v", rec.Type, vmID, rec.IfName, err) - continue - } - if err := c.cniDel(ctx, cl, vmID, nsPath, rec.IfName); err != nil { - logger.Warnf(ctx, "CNI DEL %s/%s: %v", vmID, rec.IfName, err) - } - } -} - func (c *CNI) deleteRecords(ctx context.Context, ids []string) error { if len(ids) == 0 { return nil diff --git a/network/cni/gc.go b/network/cni/gc.go index 0bcd0c47..aafa3680 100644 --- a/network/cni/gc.go +++ b/network/cni/gc.go @@ -76,7 +76,7 @@ func (c *CNI) GCModule() gc.Module[cniSnapshot] { } // 2. CNI DEL per NIC — best-effort IPAM release. - c.delNICs(ctx, vmID, netnsPath(vmID), records) + _, _ = c.tearDownNICs(ctx, vmID, netnsPath(vmID), records, false, true) // 3. Remove the named netns (with retry for async kernel fd cleanup). nsName := netnsName(vmID) From 836f47eb5a5988123542e3025d7bad781cbbc9a1 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 16:17:31 +0800 Subject: [PATCH 21/33] refactor(cloudhypervisor): share addCocoonNIC across resize-up and clone Both netResizeAdd and hotSwapNets do networkConfigToNet + cocoonNetID(MAC) + addNetVM. Extract into a single helper that returns the assigned id for caller-side rollback. Future changes to the cocoon-net- convention now live in one place. --- hypervisor/cloudhypervisor/clone.go | 4 +--- hypervisor/cloudhypervisor/helper.go | 12 ++++++++++++ hypervisor/cloudhypervisor/netresize.go | 9 ++++----- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/hypervisor/cloudhypervisor/clone.go b/hypervisor/cloudhypervisor/clone.go index 5092c6cc..51807d49 100644 --- a/hypervisor/cloudhypervisor/clone.go +++ b/hypervisor/cloudhypervisor/clone.go @@ -320,9 +320,7 @@ func hotSwapNets(ctx context.Context, hc *http.Client, oldNets []chNet, networkC logger.Infof(ctx, "removed snapshot NIC %s (old MAC %s)", oldNet.ID, oldNet.MAC) } for i, nc := range networkConfigs { - newNet := networkConfigToNet(nc) - newNet.ID = cocoonNetID(nc.MAC) - if err := addNetVM(ctx, hc, newNet); err != nil { + if _, err := addCocoonNIC(ctx, hc, nc); err != nil { return fmt.Errorf("add net device %d/%d (MAC %s, TAP %s): %w", i+1, len(networkConfigs), nc.MAC, nc.TAP, err) } diff --git a/hypervisor/cloudhypervisor/helper.go b/hypervisor/cloudhypervisor/helper.go index 59f9643a..52de164f 100644 --- a/hypervisor/cloudhypervisor/helper.go +++ b/hypervisor/cloudhypervisor/helper.go @@ -184,6 +184,18 @@ func addNetVM(ctx context.Context, hc *http.Client, net chNet) error { return vmPutJSON(ctx, hc, "vm.add-net", "add-net request", net, http.StatusOK, http.StatusNoContent) } +// addCocoonNIC posts vm.add-net with the deterministic cocoon-net- id so +// resize-up and clone hot-swap converge on the same id convention. Returns the +// id for caller-side rollback. +func addCocoonNIC(ctx context.Context, hc *http.Client, nc *types.NetworkConfig) (string, error) { + chN := networkConfigToNet(nc) + chN.ID = cocoonNetID(nc.MAC) + if err := addNetVM(ctx, hc, chN); err != nil { + return "", err + } + return chN.ID, nil +} + // getVMInfo fetches vm.info; cocoon uses it to detect tag/id conflicts // before hot-add and to surface attached devices through inspect. func getVMInfo(ctx context.Context, hc *http.Client) (*chVMInfoResponse, error) { diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index 919cf69a..44ead598 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -49,9 +49,8 @@ func (ch *CloudHypervisor) netResizeAdd(ctx context.Context, hc *http.Client, vm return res, fmt.Errorf("nic %d: plumbing returned %d configs", i, len(ncs)) } nc := ncs[0] - chN := networkConfigToNet(nc) - chN.ID = cocoonNetID(nc.MAC) - if err := addNetVM(ctx, hc, chN); err != nil { + chID, err := addCocoonNIC(ctx, hc, nc) + if err != nil { if rmErr := plumbing.Remove(ctx, vmID, i); rmErr != nil { logger.Warnf(ctx, "rollback host plumbing for nic %d: %v", i, rmErr) } @@ -59,8 +58,8 @@ func (ch *CloudHypervisor) netResizeAdd(ctx context.Context, hc *http.Client, vm } if err := ch.appendNetworkConfig(ctx, vmID, nc); err != nil { // without rollback the deterministic id collides on the next resize. - if rmErr := removeDeviceVM(ctx, hc, chN.ID); rmErr != nil { - logger.Warnf(ctx, "rollback vm.remove-device %s after persist failure: %v", chN.ID, rmErr) + if rmErr := removeDeviceVM(ctx, hc, chID); rmErr != nil { + logger.Warnf(ctx, "rollback vm.remove-device %s after persist failure: %v", chID, rmErr) } if rmErr := plumbing.Remove(ctx, vmID, i); rmErr != nil { logger.Warnf(ctx, "rollback host plumbing for nic %d: %v", i, rmErr) From b5ddeb838349a410634473e09c2a3b5520b1a02e Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 16:19:04 +0800 Subject: [PATCH 22/33] docs: restore 1-line godoc on exported netresize/network items MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sibling style (extend/fs, extend/vfio, hypervisor/cloudhypervisor/extend.go) has godoc on every exported item. Branch had dropped some during the comment trim pass; restore short one-liners on Resizer, NIC, Normalize, ErrUnsupportedBackend, NetResize, Network, AddRange, AddRecover. Also switch netresize DB update helpers to idx.GetRecord — the existing idiom in hypervisor/clone.go FinalizeClone and snapshot.go. --- extend/netresize/netresize.go | 4 ++++ hypervisor/cloudhypervisor/netresize.go | 13 +++++++------ network/network.go | 3 +++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/extend/netresize/netresize.go b/extend/netresize/netresize.go index b7d0299b..8c38a780 100644 --- a/extend/netresize/netresize.go +++ b/extend/netresize/netresize.go @@ -10,6 +10,7 @@ import ( "github.com/cocoonstack/cocoon/types" ) +// ErrUnsupportedBackend signals the resolved hypervisor cannot resize NICs. var ErrUnsupportedBackend = errors.New("backend does not support net resize") // Spec is one resize request. @@ -17,6 +18,7 @@ type Spec struct { Target int } +// NIC is one NIC summary surfaced through Result.Added / Result.Removed. type NIC struct { Index int `json:"index"` TAP string `json:"tap"` @@ -39,10 +41,12 @@ type Plumbing interface { Remove(ctx context.Context, vmID string, indices ...int) error } +// Resizer resizes a running VM's NIC count. type Resizer interface { NetResize(ctx context.Context, vmRef string, spec Spec, plumbing Plumbing) (Result, error) } +// Normalize validates the spec. func (s *Spec) Normalize() error { if s.Target < 0 { return fmt.Errorf("--nics must be non-negative, got %d", s.Target) diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index 44ead598..41b66351 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -14,6 +14,7 @@ import ( "github.com/cocoonstack/cocoon/types" ) +// NetResize brings the VM's NIC count to spec.Target on a running CH VM. func (ch *CloudHypervisor) NetResize(ctx context.Context, vmRef string, spec netresize.Spec, plumbing netresize.Plumbing) (netresize.Result, error) { if err := spec.Normalize(); err != nil { return netresize.Result{}, err @@ -107,9 +108,9 @@ func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, func (ch *CloudHypervisor) appendNetworkConfig(ctx context.Context, vmID string, nc *types.NetworkConfig) error { return ch.DB.Update(ctx, func(idx *hypervisor.VMIndex) error { - r := idx.VMs[vmID] - if r == nil { - return hypervisor.ErrNotFound + r, err := idx.GetRecord(vmID) + if err != nil { + return err } r.NetworkConfigs = append(r.NetworkConfigs, nc) return nil @@ -118,9 +119,9 @@ func (ch *CloudHypervisor) appendNetworkConfig(ctx context.Context, vmID string, func (ch *CloudHypervisor) truncateNetworkConfigs(ctx context.Context, vmID string, length int) error { return ch.DB.Update(ctx, func(idx *hypervisor.VMIndex) error { - r := idx.VMs[vmID] - if r == nil { - return hypervisor.ErrNotFound + r, err := idx.GetRecord(vmID) + if err != nil { + return err } if length < len(r.NetworkConfigs) { r.NetworkConfigs = r.NetworkConfigs[:length] diff --git a/network/network.go b/network/network.go index 2eb23c0c..ef4b9d24 100644 --- a/network/network.go +++ b/network/network.go @@ -19,6 +19,7 @@ type AddSpec struct { Existing *types.NetworkConfig } +// Network is the per-VM host-side networking provider (CNI, bridge, ...). type Network interface { Type() string Verify(ctx context.Context, vmID string) error @@ -30,6 +31,7 @@ type Network interface { RegisterGC(*gc.Orchestrator) } +// AddRange builds AddSpecs for a contiguous block of fresh NIC indices. func AddRange(from, count int) []AddSpec { out := make([]AddSpec, count) for i := range out { @@ -38,6 +40,7 @@ func AddRange(from, count int) []AddSpec { return out } +// AddRecover builds AddSpecs for re-creating existing NICs (post-reboot recovery). func AddRecover(existing []*types.NetworkConfig) []AddSpec { out := make([]AddSpec, len(existing)) for i, e := range existing { From e882433bc6457776875406767f0ee081a86aefde Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 16:20:04 +0800 Subject: [PATCH 23/33] perf: pre-size Result.Added/Removed and bridge added slices Bounds are known at function entry (target-from for add, current-target for remove, len(specs) for bridge rollback). Avoids the grow-and-copy churn. --- hypervisor/cloudhypervisor/netresize.go | 2 ++ network/bridge/bridge_linux.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index 41b66351..9eb74955 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -41,6 +41,7 @@ func (ch *CloudHypervisor) NetResize(ctx context.Context, vmRef string, spec net func (ch *CloudHypervisor) netResizeAdd(ctx context.Context, hc *http.Client, vmID string, vmCfg *types.VMConfig, plumbing netresize.Plumbing, from, target int, res netresize.Result) (netresize.Result, error) { logger := log.WithFunc("cloudhypervisor.NetResize.add") + res.Added = make([]netresize.NIC, 0, target-from) for i := from; i < target; i++ { ncs, err := plumbing.Add(ctx, vmID, vmCfg, network.AddSpec{Index: i}) if err != nil { @@ -75,6 +76,7 @@ func (ch *CloudHypervisor) netResizeAdd(ctx context.Context, hc *http.Client, vm func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, info *chVMInfoResponse, vmID string, ncs []*types.NetworkConfig, plumbing netresize.Plumbing, current, target int, res netresize.Result) (netresize.Result, error) { logger := log.WithFunc("cloudhypervisor.NetResize.remove") + res.Removed = make([]netresize.NIC, 0, current-target) macToID := make(map[string]string, len(info.Config.Nets)) for _, n := range info.Config.Nets { macToID[strings.ToLower(n.MAC)] = n.ID diff --git a/network/bridge/bridge_linux.go b/network/bridge/bridge_linux.go index d8962521..6cab195d 100644 --- a/network/bridge/bridge_linux.go +++ b/network/bridge/bridge_linux.go @@ -79,7 +79,7 @@ func (b *Bridge) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, sp return nil, fmt.Errorf("find bridge: %w", err) } - var added []int + added := make([]int, 0, len(specs)) defer func() { if retErr == nil || len(added) == 0 { return From 609fc4dede9dca1e198a327bf825d483e1babe42 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 16:23:44 +0800 Subject: [PATCH 24/33] docs(netresize): point leak warning at the actual reclaim path vm stop/start does not run network teardown; only cocoon vm rm (via Network.Delete) or gc reclaim the host plumbing. Replace the misleading 'stop+restart will reclaim' hint. --- hypervisor/cloudhypervisor/netresize.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index 9eb74955..50b65b4c 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -98,7 +98,7 @@ func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, return res, fmt.Errorf("persist remove nic %d: %w", i, err) } if plumbingErr != nil { - msg := fmt.Sprintf("nic %d (%s) host plumbing leaked, stop+restart will reclaim: %v", i, chID, plumbingErr) + msg := fmt.Sprintf("nic %d (%s) host plumbing leaked, cocoon vm rm or gc will reclaim: %v", i, chID, plumbingErr) logger.Warn(ctx, msg) res.Warnings = append(res.Warnings, msg) } From e4ad25b232af1471a68bfc61a0c15f14a0039bd1 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 16:23:49 +0800 Subject: [PATCH 25/33] chore: tighten a few inline/godoc comments --- hypervisor/cloudhypervisor/helper.go | 5 ++--- network/cni/cni.go | 3 +-- network/cni/create.go | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/hypervisor/cloudhypervisor/helper.go b/hypervisor/cloudhypervisor/helper.go index 52de164f..ed90034f 100644 --- a/hypervisor/cloudhypervisor/helper.go +++ b/hypervisor/cloudhypervisor/helper.go @@ -184,9 +184,8 @@ func addNetVM(ctx context.Context, hc *http.Client, net chNet) error { return vmPutJSON(ctx, hc, "vm.add-net", "add-net request", net, http.StatusOK, http.StatusNoContent) } -// addCocoonNIC posts vm.add-net with the deterministic cocoon-net- id so -// resize-up and clone hot-swap converge on the same id convention. Returns the -// id for caller-side rollback. +// addCocoonNIC posts vm.add-net with the deterministic cocoon-net- id +// shared by resize-up and clone hot-swap; returns the id for rollback. func addCocoonNIC(ctx context.Context, hc *http.Client, nc *types.NetworkConfig) (string, error) { chN := networkConfigToNet(nc) chN.ID = cocoonNetID(nc.MAC) diff --git a/network/cni/cni.go b/network/cni/cni.go index 34fe9255..471980ce 100644 --- a/network/cni/cni.go +++ b/network/cni/cni.go @@ -127,8 +127,7 @@ func (c *CNI) deleteVM(ctx context.Context, vmID string) error { }); err != nil { return fmt.Errorf("read network index: %w", err) } - // Run unconditionally — a VM resized to 0 NICs has no records but still - // owns its netns (Remove preserves it by design). + // Run even when records is empty: a VM resized to 0 NICs still owns its netns. nsPath := netnsPath(vmID) ids, _ := c.tearDownNICs(ctx, vmID, nsPath, records, false, true) nsName := netnsName(vmID) diff --git a/network/cni/create.go b/network/cni/create.go index c05cb9d3..6cca42e0 100644 --- a/network/cni/create.go +++ b/network/cni/create.go @@ -170,8 +170,7 @@ func (c *CNI) Remove(ctx context.Context, vmID string, indices ...int) error { picked = append(picked, rec) } ids, err := c.tearDownNICs(ctx, vmID, netnsPath(vmID), picked, true, false) - // Always sweep fully-torn-down records, even when tearDownNICs aborted on - // a later NIC; otherwise the cleaned ones leak DB rows until vm delete. + // Sweep partially-torn-down records even on abort, else they leak DB rows. if delErr := c.deleteRecords(ctx, ids); delErr != nil && err == nil { err = delErr } From e52dc03387da7739def584c146dd9712266f9736 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 16:55:58 +0800 Subject: [PATCH 26/33] fix(cni): converge Remove on CNI DEL / TAP failure tearDownNICs now attempts every cleanup step per record (CNI DEL + TAP delete) instead of bailing on the first failure, so the leaked TAP no longer blocks a later resize-up at the same index. CNI.Remove sweeps DB records for every picked index regardless of teardown outcome, matching netResizeRemove's "CH eject is irrevocable, truncate even if plumbing leaks" invariant. The teardown error still propagates so the caller can surface the IPAM leak via Result.Warnings. --- network/cni/cni.go | 32 ++++++++++++++++++-------------- network/cni/create.go | 10 +++++++--- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/network/cni/cni.go b/network/cni/cni.go index 471980ce..81f59265 100644 --- a/network/cni/cni.go +++ b/network/cni/cni.go @@ -137,9 +137,9 @@ func (c *CNI) deleteVM(ctx context.Context, vmID string) error { return c.deleteRecords(ctx, ids) } -// tearDownNICs runs CNI DEL (+ optional TAP delete) per record. bestEffort -// logs and continues; otherwise the first CNI/TAP failure aborts and only -// fully-torn-down records are returned for DB sweep. +// tearDownNICs attempts CNI DEL (+ optional TAP delete) for every record. +// Returns ids of fully-clean records and the first error; bestEffort=false +// stops after the first failed record (post-cleanup attempt). func (c *CNI) tearDownNICs(ctx context.Context, vmID, nsPath string, records []networkRecord, deleteTAP, bestEffort bool) ([]string, error) { logger := log.WithFunc("cni.tearDownNICs") if c.cniConf == nil { @@ -154,32 +154,36 @@ func (c *CNI) tearDownNICs(ctx context.Context, vmID, nsPath string, records []n } ids := make([]string, 0, len(records)) for _, rec := range records { + var recErr error cl, err := c.confListByName(rec.Type) if err != nil { - if !bestEffort { - return ids, err - } + recErr = err logger.Warnf(ctx, "conflist %q not found for CNI DEL %s/%s: %v", rec.Type, vmID, rec.IfName, err) - continue } - if err := c.cniDel(ctx, cl, vmID, nsPath, rec.IfName); err != nil { - if !bestEffort { - return ids, fmt.Errorf("cni del %s/%s: %w", vmID, rec.IfName, err) + if cl != nil { + if err := c.cniDel(ctx, cl, vmID, nsPath, rec.IfName); err != nil { + if recErr == nil { + recErr = fmt.Errorf("cni del %s/%s: %w", vmID, rec.IfName, err) + } + logger.Warnf(ctx, "CNI DEL %s/%s: %v", vmID, rec.IfName, err) } - logger.Warnf(ctx, "CNI DEL %s/%s: %v", vmID, rec.IfName, err) } if deleteTAP { var idx int if _, scanErr := fmt.Sscanf(rec.IfName, "eth%d", &idx); scanErr != nil { logger.Warnf(ctx, "parse ifname %q for %s: %v (skip tap delete)", rec.IfName, vmID, scanErr) } else if delErr := deleteTAPInNetns(nsPath, tapNameForVM(vmID, idx)); delErr != nil { - if !bestEffort { - return ids, fmt.Errorf("delete tap %s: %w", tapNameForVM(vmID, idx), delErr) + if recErr == nil { + recErr = fmt.Errorf("delete tap %s: %w", tapNameForVM(vmID, idx), delErr) } logger.Warnf(ctx, "delete tap %s in netns %s: %v", tapNameForVM(vmID, idx), nsPath, delErr) } } - ids = append(ids, rec.ID) + if recErr == nil { + ids = append(ids, rec.ID) + } else if !bestEffort { + return ids, recErr + } } return ids, nil } diff --git a/network/cni/create.go b/network/cni/create.go index 6cca42e0..45948521 100644 --- a/network/cni/create.go +++ b/network/cni/create.go @@ -145,6 +145,9 @@ func (c *CNI) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs } // Remove tears down NIC plumbing for the given indices; preserves the netns. +// DB records for picked indices are always swept so a later resize-up at the +// same index does not see a stale eth entry; CNI/TAP failures still +// propagate as the returned error. func (c *CNI) Remove(ctx context.Context, vmID string, indices ...int) error { if len(indices) == 0 { return nil @@ -161,6 +164,7 @@ func (c *CNI) Remove(ctx context.Context, vmID string, indices ...int) error { byIfName[r.IfName] = r } picked := make([]networkRecord, 0, len(indices)) + pickedIDs := make([]string, 0, len(indices)) for _, i := range indices { ifName := fmt.Sprintf("eth%d", i) rec, ok := byIfName[ifName] @@ -168,10 +172,10 @@ func (c *CNI) Remove(ctx context.Context, vmID string, indices ...int) error { return fmt.Errorf("nic %d (%s): no record", i, ifName) } picked = append(picked, rec) + pickedIDs = append(pickedIDs, rec.ID) } - ids, err := c.tearDownNICs(ctx, vmID, netnsPath(vmID), picked, true, false) - // Sweep partially-torn-down records even on abort, else they leak DB rows. - if delErr := c.deleteRecords(ctx, ids); delErr != nil && err == nil { + _, err := c.tearDownNICs(ctx, vmID, netnsPath(vmID), picked, true, false) + if delErr := c.deleteRecords(ctx, pickedIDs); delErr != nil && err == nil { err = delErr } return err From 5ee5837031ffe98979009e5d4d8ec7e37ed8ab1a Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 17:06:52 +0800 Subject: [PATCH 27/33] fix(cni): join Remove teardown and delete-records errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit errors.Join surfaces a deleteRecords failure even when tearDownNICs also failed — the two indicate different problems (host teardown vs DB divergence) and shouldn't mask each other. --- network/cni/create.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/network/cni/create.go b/network/cni/create.go index 45948521..5a102262 100644 --- a/network/cni/create.go +++ b/network/cni/create.go @@ -175,10 +175,7 @@ func (c *CNI) Remove(ctx context.Context, vmID string, indices ...int) error { pickedIDs = append(pickedIDs, rec.ID) } _, err := c.tearDownNICs(ctx, vmID, netnsPath(vmID), picked, true, false) - if delErr := c.deleteRecords(ctx, pickedIDs); delErr != nil && err == nil { - err = delErr - } - return err + return errors.Join(err, c.deleteRecords(ctx, pickedIDs)) } func (c *CNI) cniDel(ctx context.Context, confList *libcni.NetworkConfigList, vmID, nsPath, ifName string) error { From d5ae0276c603db911081fcd7ccada7c82d926c09 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 17:29:52 +0800 Subject: [PATCH 28/33] fix(cni): always sweep DB records in deleteVM regardless of teardown deleteVM previously relied on tearDownNICs' returned ids, so a record whose persisted conflist name no longer resolves was logged-and-skipped inside tearDownNICs and never deleted from the DB. Sweep all records of this VM after teardown instead. Drops the unused []string return from tearDownNICs since CNI.Remove also pre-collects pickedIDs. --- network/cni/cni.go | 31 ++++++++++++++----------------- network/cni/create.go | 2 +- network/cni/gc.go | 2 +- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/network/cni/cni.go b/network/cni/cni.go index 81f59265..a067f3bb 100644 --- a/network/cni/cni.go +++ b/network/cni/cni.go @@ -129,30 +129,29 @@ func (c *CNI) deleteVM(ctx context.Context, vmID string) error { } // Run even when records is empty: a VM resized to 0 NICs still owns its netns. nsPath := netnsPath(vmID) - ids, _ := c.tearDownNICs(ctx, vmID, nsPath, records, false, true) + _ = c.tearDownNICs(ctx, vmID, nsPath, records, false, true) nsName := netnsName(vmID) if err := deleteNetns(ctx, nsName); err != nil && !errors.Is(err, fs.ErrNotExist) { return fmt.Errorf("remove netns %s: %w", nsPath, err) } - return c.deleteRecords(ctx, ids) + allIDs := make([]string, 0, len(records)) + for _, rec := range records { + allIDs = append(allIDs, rec.ID) + } + return c.deleteRecords(ctx, allIDs) } // tearDownNICs attempts CNI DEL (+ optional TAP delete) for every record. -// Returns ids of fully-clean records and the first error; bestEffort=false -// stops after the first failed record (post-cleanup attempt). -func (c *CNI) tearDownNICs(ctx context.Context, vmID, nsPath string, records []networkRecord, deleteTAP, bestEffort bool) ([]string, error) { +// Returns the first error; bestEffort=false stops after the first failed +// record (post-cleanup attempt). Callers own DB-record sweep separately. +func (c *CNI) tearDownNICs(ctx context.Context, vmID, nsPath string, records []networkRecord, deleteTAP, bestEffort bool) error { logger := log.WithFunc("cni.tearDownNICs") if c.cniConf == nil { if !bestEffort { - return nil, fmt.Errorf("%w: no conflist found in %s", network.ErrNotConfigured, c.conf.CNIConfDir) - } - ids := make([]string, 0, len(records)) - for _, rec := range records { - ids = append(ids, rec.ID) + return fmt.Errorf("%w: no conflist found in %s", network.ErrNotConfigured, c.conf.CNIConfDir) } - return ids, nil + return nil } - ids := make([]string, 0, len(records)) for _, rec := range records { var recErr error cl, err := c.confListByName(rec.Type) @@ -179,13 +178,11 @@ func (c *CNI) tearDownNICs(ctx context.Context, vmID, nsPath string, records []n logger.Warnf(ctx, "delete tap %s in netns %s: %v", tapNameForVM(vmID, idx), nsPath, delErr) } } - if recErr == nil { - ids = append(ids, rec.ID) - } else if !bestEffort { - return ids, recErr + if recErr != nil && !bestEffort { + return recErr } } - return ids, nil + return nil } func (c *CNI) deleteRecords(ctx context.Context, ids []string) error { diff --git a/network/cni/create.go b/network/cni/create.go index 5a102262..694b0c36 100644 --- a/network/cni/create.go +++ b/network/cni/create.go @@ -174,7 +174,7 @@ func (c *CNI) Remove(ctx context.Context, vmID string, indices ...int) error { picked = append(picked, rec) pickedIDs = append(pickedIDs, rec.ID) } - _, err := c.tearDownNICs(ctx, vmID, netnsPath(vmID), picked, true, false) + err := c.tearDownNICs(ctx, vmID, netnsPath(vmID), picked, true, false) return errors.Join(err, c.deleteRecords(ctx, pickedIDs)) } diff --git a/network/cni/gc.go b/network/cni/gc.go index aafa3680..d96937d6 100644 --- a/network/cni/gc.go +++ b/network/cni/gc.go @@ -76,7 +76,7 @@ func (c *CNI) GCModule() gc.Module[cniSnapshot] { } // 2. CNI DEL per NIC — best-effort IPAM release. - _, _ = c.tearDownNICs(ctx, vmID, netnsPath(vmID), records, false, true) + _ = c.tearDownNICs(ctx, vmID, netnsPath(vmID), records, false, true) // 3. Remove the named netns (with retry for async kernel fd cleanup). nsName := netnsName(vmID) From edb767d7c4b188b7a7263d9d2c76208f885a15e0 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 17:46:56 +0800 Subject: [PATCH 29/33] fix: nil-guard NetworkConfig derefs in addCocoonNIC and netResizeRemove Per project rule "never dereference pointers without nil check". Both call sites do nc.MAC; a nil entry in rec.NetworkConfigs would panic. Surface a clear error instead. --- hypervisor/cloudhypervisor/helper.go | 3 +++ hypervisor/cloudhypervisor/netresize.go | 3 +++ 2 files changed, 6 insertions(+) diff --git a/hypervisor/cloudhypervisor/helper.go b/hypervisor/cloudhypervisor/helper.go index ed90034f..7a3c34e9 100644 --- a/hypervisor/cloudhypervisor/helper.go +++ b/hypervisor/cloudhypervisor/helper.go @@ -187,6 +187,9 @@ func addNetVM(ctx context.Context, hc *http.Client, net chNet) error { // addCocoonNIC posts vm.add-net with the deterministic cocoon-net- id // shared by resize-up and clone hot-swap; returns the id for rollback. func addCocoonNIC(ctx context.Context, hc *http.Client, nc *types.NetworkConfig) (string, error) { + if nc == nil { + return "", fmt.Errorf("addCocoonNIC: nil network config") + } chN := networkConfigToNet(nc) chN.ID = cocoonNetID(nc.MAC) if err := addNetVM(ctx, hc, chN); err != nil { diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index 50b65b4c..00cdd6c4 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -83,6 +83,9 @@ func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, } for i := current - 1; i >= target; i-- { nc := ncs[i] + if nc == nil { + return res, fmt.Errorf("nic %d: nil network config", i) + } chID := macToID[strings.ToLower(nc.MAC)] if chID == "" { return res, fmt.Errorf("nic %d MAC %s: no live device", i, nc.MAC) From a3d41cc8b2ade1fa7532d6ff1dc0c9fb7deda173 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 18:32:16 +0800 Subject: [PATCH 30/33] fix(netresize): wait for guest B0EJ before host plumbing teardown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After vm.remove-device CH only flags pending eject; the device stays in device_tree until the guest writes B0EJ via ACPI/SCI. Pause/snapshot iterate device_tree and error out with "receiving on a closed channel" (VMM-thread response channel collapse) when called too soon. Poll vm.info's device_tree for absence of the chID up to 15 s before teardown. Linux acks in < 1 s; Windows can take a few seconds. The wait is scoped to netresize remove — clone's hotSwapNets runs paused (guest can't ACK) and we accept the existing CH limitation there. --- hypervisor/cloudhypervisor/api.go | 5 ++++- hypervisor/cloudhypervisor/helper.go | 17 +++++++++++++++++ hypervisor/cloudhypervisor/netresize.go | 10 ++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/hypervisor/cloudhypervisor/api.go b/hypervisor/cloudhypervisor/api.go index 9703ee44..a03dee2e 100644 --- a/hypervisor/cloudhypervisor/api.go +++ b/hypervisor/cloudhypervisor/api.go @@ -1,5 +1,7 @@ package cloudhypervisor +import "encoding/json" + type chVMConfig struct { // Optional — pointer + omitempty (nil → omitted from JSON). Payload *chPayload `json:"payload,omitempty"` @@ -110,7 +112,8 @@ type chPciDeviceInfo struct { } type chVMInfoResponse struct { - Config chVMInfoConfig `json:"config"` + Config chVMInfoConfig `json:"config"` + DeviceTree map[string]json.RawMessage `json:"device_tree,omitempty"` } type chVMInfoConfig struct { diff --git a/hypervisor/cloudhypervisor/helper.go b/hypervisor/cloudhypervisor/helper.go index 7a3c34e9..1abff46c 100644 --- a/hypervisor/cloudhypervisor/helper.go +++ b/hypervisor/cloudhypervisor/helper.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/projecteru2/core/log" @@ -180,6 +181,22 @@ func removeDeviceVM(ctx context.Context, hc *http.Client, deviceID string) error return vmPutJSON(ctx, hc, "vm.remove-device", "remove-device request", map[string]string{"id": deviceID}) } +// waitDeviceEjected blocks until id is gone from CH's device_tree. CH's +// vm.remove-device only flags pending eject and mutates vm_config; the actual +// device_tree removal happens when the guest writes B0EJ via ACPI/SCI. Pause +// / snapshot iterate device_tree, so they error out if called before the +// guest acks. Linux acks within < 1 s; Windows can take several seconds. +func waitDeviceEjected(ctx context.Context, hc *http.Client, deviceID string, timeout time.Duration) error { + return utils.WaitFor(ctx, timeout, 100*time.Millisecond, func() (bool, error) { + info, err := getVMInfo(ctx, hc) + if err != nil { + return false, err + } + _, present := info.DeviceTree[deviceID] + return !present, nil + }) +} + func addNetVM(ctx context.Context, hc *http.Client, net chNet) error { return vmPutJSON(ctx, hc, "vm.add-net", "add-net request", net, http.StatusOK, http.StatusNoContent) } diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index 00cdd6c4..af378a02 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -5,6 +5,7 @@ import ( "fmt" "net/http" "strings" + "time" "github.com/projecteru2/core/log" @@ -14,6 +15,10 @@ import ( "github.com/cocoonstack/cocoon/types" ) +// ejectWaitTimeout caps how long we block on guest B0EJ between vm.remove-device +// and the host plumbing teardown. Linux acks in < 1 s; Windows can take a few. +const ejectWaitTimeout = 15 * time.Second + // NetResize brings the VM's NIC count to spec.Target on a running CH VM. func (ch *CloudHypervisor) NetResize(ctx context.Context, vmRef string, spec netresize.Spec, plumbing netresize.Plumbing) (netresize.Result, error) { if err := spec.Normalize(); err != nil { @@ -93,6 +98,11 @@ func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, if err := removeDeviceVM(ctx, hc, chID); err != nil { return res, fmt.Errorf("vm.remove-device nic %d (%s): %w", i, chID, err) } + // CH only flags pending eject — wait for the guest to ACK via B0EJ so + // the device_tree entry is gone before pause/snapshot iterate it. + if err := waitDeviceEjected(ctx, hc, chID, ejectWaitTimeout); err != nil { + return res, fmt.Errorf("wait eject nic %d (%s): %w", i, chID, err) + } // CH eject is irrevocable; truncate even if plumbing leaks, else the // next resize re-reads the stale NIC and fails MAC lookup. plumbingErr := plumbing.Remove(ctx, vmID, i) From e642cce308f9c071c8b1fd8d4de1e6dd240e4d71 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 18:37:41 +0800 Subject: [PATCH 31/33] docs: note clone hot-swap can't wait for B0EJ (paused guest) --- KNOWN_ISSUES.md | 4 ++++ hypervisor/cloudhypervisor/helper.go | 6 +----- hypervisor/cloudhypervisor/netresize.go | 9 ++------- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/KNOWN_ISSUES.md b/KNOWN_ISSUES.md index 7a43e031..b7a8cfc9 100644 --- a/KNOWN_ISSUES.md +++ b/KNOWN_ISSUES.md @@ -255,6 +255,10 @@ Quiesce the guest NIC (ip link set down + NetworkManager remove on Linux; Disabl Firecracker is not supported: FC has no NIC hot-plug / hot-unplug API. +## NIC hot-remove during clone hot-swap cannot wait for guest B0EJ + +Plain `cocoon vm net --nics N` polls CH's `device_tree` after `vm.remove-device` until the guest ACKs B0EJ via ACPI/SCI (typically < 1 s on Linux, a few seconds on Windows). The clone path's `hotSwapNets` runs while the VM is paused, so the guest cannot process SCI — eject stays pending until resume. The clone path therefore returns without waiting and adds the fresh NICs against half-removed device-tree state. This is the long-standing CH limitation cocoon was designed around; the new wait only applies to the running-VM resize. + ## Android cocoon-agent service may be blocked by SELinux `os-image/android/{14.0,15.0}` install the cocoon-agent binary at `/system/bin/cocoon-agent` and register it via `/system/etc/init/cocoon-agent.rc`. Android's SELinux policies don't ship with a domain for cocoon-agent, so the service may run in `init`'s domain or be denied outright depending on the redroid build. diff --git a/hypervisor/cloudhypervisor/helper.go b/hypervisor/cloudhypervisor/helper.go index 1abff46c..6ebf1d62 100644 --- a/hypervisor/cloudhypervisor/helper.go +++ b/hypervisor/cloudhypervisor/helper.go @@ -181,11 +181,7 @@ func removeDeviceVM(ctx context.Context, hc *http.Client, deviceID string) error return vmPutJSON(ctx, hc, "vm.remove-device", "remove-device request", map[string]string{"id": deviceID}) } -// waitDeviceEjected blocks until id is gone from CH's device_tree. CH's -// vm.remove-device only flags pending eject and mutates vm_config; the actual -// device_tree removal happens when the guest writes B0EJ via ACPI/SCI. Pause -// / snapshot iterate device_tree, so they error out if called before the -// guest acks. Linux acks within < 1 s; Windows can take several seconds. +// waitDeviceEjected blocks until id is gone from CH's device_tree. func waitDeviceEjected(ctx context.Context, hc *http.Client, deviceID string, timeout time.Duration) error { return utils.WaitFor(ctx, timeout, 100*time.Millisecond, func() (bool, error) { info, err := getVMInfo(ctx, hc) diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index af378a02..4107be49 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -15,9 +15,8 @@ import ( "github.com/cocoonstack/cocoon/types" ) -// ejectWaitTimeout caps how long we block on guest B0EJ between vm.remove-device -// and the host plumbing teardown. Linux acks in < 1 s; Windows can take a few. -const ejectWaitTimeout = 15 * time.Second +// ejectWaitTimeout caps how long we block on guest B0EJ between vm.remove-device and the host plumbing teardown. +const ejectWaitTimeout = 5 * time.Second // NetResize brings the VM's NIC count to spec.Target on a running CH VM. func (ch *CloudHypervisor) NetResize(ctx context.Context, vmRef string, spec netresize.Spec, plumbing netresize.Plumbing) (netresize.Result, error) { @@ -98,13 +97,9 @@ func (ch *CloudHypervisor) netResizeRemove(ctx context.Context, hc *http.Client, if err := removeDeviceVM(ctx, hc, chID); err != nil { return res, fmt.Errorf("vm.remove-device nic %d (%s): %w", i, chID, err) } - // CH only flags pending eject — wait for the guest to ACK via B0EJ so - // the device_tree entry is gone before pause/snapshot iterate it. if err := waitDeviceEjected(ctx, hc, chID, ejectWaitTimeout); err != nil { return res, fmt.Errorf("wait eject nic %d (%s): %w", i, chID, err) } - // CH eject is irrevocable; truncate even if plumbing leaks, else the - // next resize re-reads the stale NIC and fails MAC lookup. plumbingErr := plumbing.Remove(ctx, vmID, i) if err := ch.truncateNetworkConfigs(ctx, vmID, i); err != nil { logger.Errorf(ctx, err, "persistence diverged from CH for vm %s nic %d (%s): live device removed, cocoon record retained", vmID, i, chID) From 534a9f6f20ba81d08c27aa3edc5d6c16ab9c968c Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 19:41:10 +0800 Subject: [PATCH 32/33] fix(netresize): symmetric eject-wait in Add persist-fail rollback + 10s timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two findings from final closure review: 1. netResizeAdd's persist-failure rollback called removeDeviceVM then plumbing.Remove with no wait between. Symmetric to the resize-down race that a3d41cc fixed: a fast plumbing.Remove can yank the TAP while CH's virtio-net ioeventfd is still attached. Mirror the same waitDeviceEjected bracket so the rollback can't recreate the channel-closed condition. 2. 5 s ejectWaitTimeout was tight for Windows. Bump to 10 s — Linux still acks in < 1 s; Windows NDIS can take a few. --- hypervisor/cloudhypervisor/netresize.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index 4107be49..f001780e 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -15,8 +15,9 @@ import ( "github.com/cocoonstack/cocoon/types" ) -// ejectWaitTimeout caps how long we block on guest B0EJ between vm.remove-device and the host plumbing teardown. -const ejectWaitTimeout = 5 * time.Second +// ejectWaitTimeout caps how long we block on guest B0EJ between vm.remove-device +// and the host plumbing teardown. Linux acks in < 1 s; Windows can take a few. +const ejectWaitTimeout = 10 * time.Second // NetResize brings the VM's NIC count to spec.Target on a running CH VM. func (ch *CloudHypervisor) NetResize(ctx context.Context, vmRef string, spec netresize.Spec, plumbing netresize.Plumbing) (netresize.Result, error) { @@ -63,9 +64,12 @@ func (ch *CloudHypervisor) netResizeAdd(ctx context.Context, hc *http.Client, vm return res, fmt.Errorf("vm.add-net nic %d: %w", i, err) } if err := ch.appendNetworkConfig(ctx, vmID, nc); err != nil { - // without rollback the deterministic id collides on the next resize. + // Symmetric with netResizeRemove: wait for guest B0EJ before tearing + // down host plumbing so we don't yank a TAP CH's ioeventfd still owns. if rmErr := removeDeviceVM(ctx, hc, chID); rmErr != nil { logger.Warnf(ctx, "rollback vm.remove-device %s after persist failure: %v", chID, rmErr) + } else if wErr := waitDeviceEjected(ctx, hc, chID, ejectWaitTimeout); wErr != nil { + logger.Warnf(ctx, "rollback wait eject %s after persist failure: %v", chID, wErr) } if rmErr := plumbing.Remove(ctx, vmID, i); rmErr != nil { logger.Warnf(ctx, "rollback host plumbing for nic %d: %v", i, rmErr) From 396295121ba38ef05de001b57b5760f4180e7c89 Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 19:47:25 +0800 Subject: [PATCH 33/33] chore: trim multi-line godocs introduced by this branch to 1 line Collapsed 8 multi-liners I added (resolveAttacher, plumbingForVM, providerForVM, Result, addCocoonNIC, ejectWaitTimeout, the symmetric- rollback inline, tearDownNICs, Remove) into single lines. Pre-existing multi-line comments left alone. --- cmd/vm/attach.go | 3 +-- cmd/vm/lifecycle.go | 3 +-- cmd/vm/netresize.go | 3 +-- extend/netresize/netresize.go | 3 +-- hypervisor/cloudhypervisor/helper.go | 3 +-- hypervisor/cloudhypervisor/netresize.go | 6 ++---- network/cni/cni.go | 4 +--- network/cni/create.go | 4 +--- 8 files changed, 9 insertions(+), 20 deletions(-) diff --git a/cmd/vm/attach.go b/cmd/vm/attach.go index 5d04d5f7..d96368a2 100644 --- a/cmd/vm/attach.go +++ b/cmd/vm/attach.go @@ -85,8 +85,7 @@ func (h Handler) DeviceDetach(cmd *cobra.Command, args []string) error { return nil } -// resolveAttacher resolves args[0] to a hypervisor implementing A; returns the -// hypervisor too so callers can issue further generic ops without re-finding. +// resolveAttacher resolves args[0] to a hypervisor implementing A; also returns conf+hyper for further ops. func resolveAttacher[A any](h Handler, cmd *cobra.Command, args []string, op string, errUnsupported error) (context.Context, *config.Config, hypervisor.Hypervisor, A, error) { var zero A ctx, conf, err := h.Init(cmd) diff --git a/cmd/vm/lifecycle.go b/cmd/vm/lifecycle.go index d7a39800..9e84aca3 100644 --- a/cmd/vm/lifecycle.go +++ b/cmd/vm/lifecycle.go @@ -392,8 +392,7 @@ func (h Handler) recoverNetwork(ctx context.Context, conf *config.Config, hyper } } -// providerForVM picks the network provider from persisted NetworkConfig; -// cniProvider may be nil (lazy-init); bridgeCache must be non-nil. +// providerForVM picks the network provider from persisted NetworkConfig; cniProvider may be nil (lazy-init), bridgeCache must be non-nil. func providerForVM(conf *config.Config, cniProvider network.Network, bridgeCache map[string]network.Network, configs []*types.NetworkConfig) (network.Network, error) { if len(configs) == 0 { return nil, fmt.Errorf("no network configs") diff --git a/cmd/vm/netresize.go b/cmd/vm/netresize.go index b9845040..aa1fab8a 100644 --- a/cmd/vm/netresize.go +++ b/cmd/vm/netresize.go @@ -43,8 +43,7 @@ func (h Handler) NetResize(cmd *cobra.Command, args []string) error { return nil } -// plumbingForVM picks the provider matching the VM's existing NICs; zero NICs -// is fatal because VMConfig carries no bridge hint to recover the choice. +// plumbingForVM picks the provider for the VM's existing NICs; fails on zero NICs (VMConfig has no bridge hint). func plumbingForVM(conf *config.Config, configs []*types.NetworkConfig) (network.Network, error) { if len(configs) == 0 { return nil, fmt.Errorf("vm has zero NICs; resize up is not supported (start the VM with at least one NIC)") diff --git a/extend/netresize/netresize.go b/extend/netresize/netresize.go index 8c38a780..4433d0ad 100644 --- a/extend/netresize/netresize.go +++ b/extend/netresize/netresize.go @@ -25,8 +25,7 @@ type NIC struct { MAC string `json:"mac"` } -// Result reports the before/after count and NICs touched. Warnings surface -// non-fatal divergence (e.g. host plumbing leak after a successful CH eject). +// Result reports the before/after count, NICs touched, and non-fatal Warnings. type Result struct { Before int `json:"before"` After int `json:"after"` diff --git a/hypervisor/cloudhypervisor/helper.go b/hypervisor/cloudhypervisor/helper.go index 6ebf1d62..f0cee3c8 100644 --- a/hypervisor/cloudhypervisor/helper.go +++ b/hypervisor/cloudhypervisor/helper.go @@ -197,8 +197,7 @@ func addNetVM(ctx context.Context, hc *http.Client, net chNet) error { return vmPutJSON(ctx, hc, "vm.add-net", "add-net request", net, http.StatusOK, http.StatusNoContent) } -// addCocoonNIC posts vm.add-net with the deterministic cocoon-net- id -// shared by resize-up and clone hot-swap; returns the id for rollback. +// addCocoonNIC posts vm.add-net with the deterministic cocoon-net- id; returns id for rollback. func addCocoonNIC(ctx context.Context, hc *http.Client, nc *types.NetworkConfig) (string, error) { if nc == nil { return "", fmt.Errorf("addCocoonNIC: nil network config") diff --git a/hypervisor/cloudhypervisor/netresize.go b/hypervisor/cloudhypervisor/netresize.go index f001780e..09649d8c 100644 --- a/hypervisor/cloudhypervisor/netresize.go +++ b/hypervisor/cloudhypervisor/netresize.go @@ -15,8 +15,7 @@ import ( "github.com/cocoonstack/cocoon/types" ) -// ejectWaitTimeout caps how long we block on guest B0EJ between vm.remove-device -// and the host plumbing teardown. Linux acks in < 1 s; Windows can take a few. +// ejectWaitTimeout bounds the wait for guest B0EJ; Linux acks < 1 s, Windows can take a few. const ejectWaitTimeout = 10 * time.Second // NetResize brings the VM's NIC count to spec.Target on a running CH VM. @@ -64,8 +63,7 @@ func (ch *CloudHypervisor) netResizeAdd(ctx context.Context, hc *http.Client, vm return res, fmt.Errorf("vm.add-net nic %d: %w", i, err) } if err := ch.appendNetworkConfig(ctx, vmID, nc); err != nil { - // Symmetric with netResizeRemove: wait for guest B0EJ before tearing - // down host plumbing so we don't yank a TAP CH's ioeventfd still owns. + // Wait for B0EJ before host teardown (symmetric with netResizeRemove). if rmErr := removeDeviceVM(ctx, hc, chID); rmErr != nil { logger.Warnf(ctx, "rollback vm.remove-device %s after persist failure: %v", chID, rmErr) } else if wErr := waitDeviceEjected(ctx, hc, chID, ejectWaitTimeout); wErr != nil { diff --git a/network/cni/cni.go b/network/cni/cni.go index a067f3bb..e7e5df9b 100644 --- a/network/cni/cni.go +++ b/network/cni/cni.go @@ -141,9 +141,7 @@ func (c *CNI) deleteVM(ctx context.Context, vmID string) error { return c.deleteRecords(ctx, allIDs) } -// tearDownNICs attempts CNI DEL (+ optional TAP delete) for every record. -// Returns the first error; bestEffort=false stops after the first failed -// record (post-cleanup attempt). Callers own DB-record sweep separately. +// tearDownNICs runs CNI DEL (+ optional TAP delete) per record; bestEffort=false stops at first failure. Caller sweeps DB. func (c *CNI) tearDownNICs(ctx context.Context, vmID, nsPath string, records []networkRecord, deleteTAP, bestEffort bool) error { logger := log.WithFunc("cni.tearDownNICs") if c.cniConf == nil { diff --git a/network/cni/create.go b/network/cni/create.go index 694b0c36..08056c69 100644 --- a/network/cni/create.go +++ b/network/cni/create.go @@ -145,9 +145,7 @@ func (c *CNI) Add(ctx context.Context, vmID string, vmCfg *types.VMConfig, specs } // Remove tears down NIC plumbing for the given indices; preserves the netns. -// DB records for picked indices are always swept so a later resize-up at the -// same index does not see a stale eth entry; CNI/TAP failures still -// propagate as the returned error. +// Always sweeps DB records for picked indices so resize-up to the same index isn't stale; CNI/TAP errors still propagate. func (c *CNI) Remove(ctx context.Context, vmID string, indices ...int) error { if len(indices) == 0 { return nil