From 4dbc79d60ed57505df4d3469611bafe7bf243fcc Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 22:30:03 +0800 Subject: [PATCH 1/2] feat(vm): cocoon vm clone --nics N to override snapshot NIC count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clone has always inherited NIC count from the snapshot. With the new runtime net resize, a VM resized down to 0 NICs then snapshot'd produces a clone that can't recover networking later: CH started in host netns because NetworkConfigs was empty, so a subsequent vm net adds NICs in the per-VM netns CH never entered. Mirror cocoon vm run's --nics: passing --nics N at clone time forces N NICs at clone start, putting CH into the per-VM netns from the start and keeping the rest of the lifecycle aligned with --nics ≥ 1 path. -1 (default) preserves inheritance from snapshot. Adjust the vm net error to point users at the clone override when they hit the zero-NIC resize-up wall. --- cmd/vm/commands.go | 1 + cmd/vm/netresize.go | 4 ++-- cmd/vm/run.go | 6 +++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/cmd/vm/commands.go b/cmd/vm/commands.go index 036b0dac..10e4f919 100644 --- a/cmd/vm/commands.go +++ b/cmd/vm/commands.go @@ -293,6 +293,7 @@ func addVMFlags(cmd *cobra.Command) { func addCloneFlags(cmd *cobra.Command) { cmd.Flags().String("name", "", "VM name (default: cocoon-clone-)") + cmd.Flags().Int("nics", 0, "override NIC count (omit to inherit from snapshot)") cmd.Flags().Int("queue-size", 0, "virtio-net ring depth per queue (0 = inherit from snapshot)") //nolint:mnd cmd.Flags().Int("disk-queue-size", 0, "virtio-blk ring depth per device (0 = inherit from snapshot)") //nolint:mnd cmd.Flags().String("network", "", "CNI conflist name (empty = inherit from source VM)") diff --git a/cmd/vm/netresize.go b/cmd/vm/netresize.go index aa1fab8a..59677389 100644 --- a/cmd/vm/netresize.go +++ b/cmd/vm/netresize.go @@ -43,10 +43,10 @@ func (h Handler) NetResize(cmd *cobra.Command, args []string) error { return nil } -// plumbingForVM picks the provider for the VM's existing NICs; fails on zero NICs (VMConfig has no bridge hint). +// plumbingForVM picks the provider matching the VM's existing NICs; zero NICs is fatal (use `vm clone --nics N` instead). func plumbingForVM(conf *config.Config, configs []*types.NetworkConfig) (network.Network, error) { if len(configs) == 0 { - return nil, fmt.Errorf("vm has zero NICs; resize up is not supported (start the VM with at least one NIC)") + return nil, fmt.Errorf("zero NICs; resize up not supported (use `vm clone --nics N` instead)") } return providerForVM(conf, nil, map[string]network.Network{}, configs) } diff --git a/cmd/vm/run.go b/cmd/vm/run.go index 8c4b955d..0ee91005 100644 --- a/cmd/vm/run.go +++ b/cmd/vm/run.go @@ -351,7 +351,11 @@ func (h Handler) prepareClone(ctx context.Context, cmd *cobra.Command, conf *con } bridgeDev, _ := cmd.Flags().GetString("bridge") - netProvider, networkConfigs, err := initNetwork(ctx, conf, vmID, cfg.NICs, vmCfg, tapQueues(vmCfg.CPU, conf.UseFirecracker), bridgeDev) + nics := cfg.NICs + if cmd.Flags().Changed("nics") { + nics, _ = cmd.Flags().GetInt("nics") + } + netProvider, networkConfigs, err := initNetwork(ctx, conf, vmID, nics, vmCfg, tapQueues(vmCfg.CPU, conf.UseFirecracker), bridgeDev) if err != nil { return nil, "", nil, nil, err } From 061a965471e7ec4ab26efacb97fa4030a4755ebd Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 12 May 2026 23:54:43 +0800 Subject: [PATCH 2/2] docs(readme): document clone --nics override + network backend precedence --- README.md | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index aa2e68ba..10c3153f 100644 --- a/README.md +++ b/README.md @@ -206,6 +206,7 @@ Applies to `cocoon vm clone`: | Flag | Default | Description | | ----------- | ------------------------ | ------------------------------------------------------- | | `--name` | `cocoon-clone-` | VM name | +| `--nics` | inherit from snapshot | Override NIC count at clone time; lets a 0-NIC snapshot clone with networking (CH hot-swaps NICs after restore) | | `--queue-size` | `0` (inherit) | Virtio-net ring depth per queue (0 = inherit from snapshot) | | `--disk-queue-size` | `0` (inherit) | Virtio-blk ring depth per device (0 = inherit from snapshot; CH only) | | `--network` | empty (inherit) | CNI conflist name (empty = inherit from source VM) | @@ -215,10 +216,23 @@ Applies to `cocoon vm clone`: | `--pull` | `false` | Auto-pull base image if not found locally (for cross-node clone) | | `--from-dir` | empty | Clone from a snapshot directory (must contain `snapshot.json`); mutually exclusive with positional `SNAPSHOT` | -CPU, memory, storage, and NIC count all inherit from the snapshot — both -hypervisors restore the guest from the snapshot's binary device state, so -those values are fixed at snapshot time. Use `cocoon vm run` to create a -fresh VM with different resources. +CPU, memory, and storage all inherit from the snapshot — both hypervisors +restore the guest from the snapshot's binary device state, so those values +are fixed at snapshot time. NIC count inherits by default but `--nics N` +overrides it (CH only) by hot-swapping the snapshot's NICs for a fresh set +right after restore. Use `cocoon vm run` to create a fresh VM with different +CPU/memory/storage. + +**Network backend** is decided per clone (the snapshot does not persist a +bridge device). Precedence: + +1. `--bridge X` → bridge backend with bridge device `X`. +2. `--network Y` (no `--bridge`) → CNI backend with conflist `Y`. +3. neither → CNI backend, conflist inherited from the snapshot's recorded + `vmCfg.Network` (empty = CNI default). + +A bridge-backed source snapshot cloned without `--bridge` silently defaults +to CNI. Pass `--bridge X` at clone time to keep bridge mode. ### Restore Flags @@ -535,7 +549,7 @@ cocoon vm net my-vm --nics 1 Cocoon manages **host-side** plumbing only. CH's `vm.remove-device` marks the slot for ejection but the actual eject only happens when the guest cooperates via ACPI (B0EJ write). The host TAP / veth / CNI lease are torn down immediately after the API call regardless. Quiesce in-guest NIC state (driver unbind, NetworkManager removal, Windows NDIS halt) **before** reducing the count, or the in-guest driver will reference plumbing that no longer exists. -A VM started with zero NICs cannot be resized up (the VM record carries no provider hint). Start with at least one NIC if you plan to resize. +A VM started with zero NICs cannot be resized up — CH was launched in the host netns (no `NetworkConfigs` to derive a per-VM netns from), so later plumbing can't reach it. To recover networking on a 0-NIC snapshot, clone with `cocoon vm clone --nics 1 --network ` (or `--bridge `): the clone starts with NICs from the start, putting CH in the right netns from boot. ## Windows Support