diff --git a/compute/src/main/java/org/zstack/compute/vm/devices/VmInstanceResourceMetadataManagerImpl.java b/compute/src/main/java/org/zstack/compute/vm/devices/VmInstanceResourceMetadataManagerImpl.java index 3d78c146588..c88290fa26b 100644 --- a/compute/src/main/java/org/zstack/compute/vm/devices/VmInstanceResourceMetadataManagerImpl.java +++ b/compute/src/main/java/org/zstack/compute/vm/devices/VmInstanceResourceMetadataManagerImpl.java @@ -22,13 +22,29 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; import static org.zstack.core.Platform.operr; public class VmInstanceResourceMetadataManagerImpl implements VmInstanceResourceMetadataManager { private static final CLogger logger = Utils.getLogger(VmInstanceResourceMetadataManagerImpl.class); + + // Synthetic resourceUuids that must survive pruneStaleDeviceMetadata + // regardless of what the libvirt snapshot reports. These rows are owned + // by other subsystems (memory balloon, resource config, guest tools). + private static final Set ALWAYS_KEEP_UUIDS; + static { + Set s = new HashSet<>(); + s.add(MEM_BALLOON_UUID); + s.add(RESOURCE_CONFIG_UUID); + s.add(GUEST_TOOLS_RESOURCE_CONFIG_UUID); + ALWAYS_KEEP_UUIDS = Collections.unmodifiableSet(s); + } + @Autowired private DatabaseFacade dbf; @@ -343,6 +359,53 @@ public void updateVmResourceMetadataDeviceAddress(String vmInstanceUuid, String createOrUpdateVmResourceMetadata(resourceUuid, DeviceAddress.fromString(deviceAddress), vmInstanceUuid, null, null); } + @Override + public int pruneStaleDeviceMetadata(String vmInstanceUuid, Set survivingResourceUuids) { + if (deviceAddressRecordingDisabled()) { + return 0; + } + if (vmInstanceUuid == null) { + return 0; + } + + List candidates = Q.New(VmInstanceResourceMetadataVO.class) + .eq(VmInstanceResourceMetadataVO_.vmInstanceUuid, vmInstanceUuid) + .list(); + if (candidates.isEmpty()) { + return 0; + } + + // Whitelist: always preserved regardless of surviving set. + // - vmInstanceUuid: vmXml archive row + // - MEM_BALLOON / RESOURCE_CONFIG / GUEST_TOOLS: synthetic uuids + // owned by other subsystems, never reported by the libvirt + // device snapshot. + Set surviving = survivingResourceUuids == null + ? Collections.emptySet() : survivingResourceUuids; + + List toDelete = candidates.stream() + .map(VmInstanceResourceMetadataVO::getResourceUuid) + .filter(ru -> !ALWAYS_KEEP_UUIDS.contains(ru)) + .filter(ru -> !vmInstanceUuid.equals(ru)) + .filter(ru -> !surviving.contains(ru)) + .collect(Collectors.toList()); + + if (toDelete.isEmpty()) { + return 0; + } + + logger.debug(String.format( + "prune stale VmInstanceResourceMetadataVO for vm[%s], resourceUuids=%s", + vmInstanceUuid, toDelete)); + + SQL.New(VmInstanceResourceMetadataVO.class) + .eq(VmInstanceResourceMetadataVO_.vmInstanceUuid, vmInstanceUuid) + .in(VmInstanceResourceMetadataVO_.resourceUuid, toDelete) + .hardDelete(); + + return toDelete.size(); + } + @Override public void deleteArchiveVmInstanceResourceMetadataGroup(String archiveForResourceUuid) { SQL.New(VmInstanceResourceMetadataGroupVO.class).eq(VmInstanceResourceMetadataGroupVO_.resourceUuid, archiveForResourceUuid).hardDelete(); diff --git a/header/src/main/java/org/zstack/header/vm/devices/VmInstanceResourceMetadataManager.java b/header/src/main/java/org/zstack/header/vm/devices/VmInstanceResourceMetadataManager.java index 29f2f81070d..16c2cc94be7 100644 --- a/header/src/main/java/org/zstack/header/vm/devices/VmInstanceResourceMetadataManager.java +++ b/header/src/main/java/org/zstack/header/vm/devices/VmInstanceResourceMetadataManager.java @@ -4,6 +4,7 @@ import java.util.List; import java.util.Map; +import java.util.Set; public interface VmInstanceResourceMetadataManager { String MEM_BALLOON_UUID = "4780bf6d2fa65700f22e36c27e8ff05c"; @@ -151,4 +152,24 @@ public interface VmInstanceResourceMetadataManager { List getArchivedResourceMetadataInfoFromArchiveForResourceUuid(String vmInstanceUuid, String archiveForResourceUuid, String metadataClass); void updateVmResourceMetadataDeviceAddress(String vmInstanceUuid, String resourceUuid, String deviceAddress); + + /** + * Drop rows of this VM whose resourceUuid is NOT in survivingResourceUuids. + * Used by start-vm / sync-vm-device-info extension point to reconcile DB + * state with the libvirt domain actually running: any extra resourceUuid + * that the agent response does not report is considered stale and removed, + * regardless of metadataClass. + * + * Always preserved (whitelist, independent of survivingResourceUuids): + * - row whose resourceUuid == vmInstanceUuid (vmXml archive) + * - MEM_BALLOON_UUID / RESOURCE_CONFIG_UUID / GUEST_TOOLS_RESOURCE_CONFIG_UUID + * + * Caller is responsible for adding any resourceUuid that must survive + * (e.g. every VM nic uuid when rsp.nicInfos is null) to survivingResourceUuids. + * + * @param vmInstanceUuid VM uuid + * @param survivingResourceUuids resourceUuids that still exist in the libvirt domain + * @return number of rows deleted + */ + int pruneStaleDeviceMetadata(String vmInstanceUuid, Set survivingResourceUuids); } diff --git a/plugin/kvm/src/main/java/org/zstack/kvm/VirtualPciDeviceKvmExtensionPoint.java b/plugin/kvm/src/main/java/org/zstack/kvm/VirtualPciDeviceKvmExtensionPoint.java index e7c04f4e989..091423d34b9 100644 --- a/plugin/kvm/src/main/java/org/zstack/kvm/VirtualPciDeviceKvmExtensionPoint.java +++ b/plugin/kvm/src/main/java/org/zstack/kvm/VirtualPciDeviceKvmExtensionPoint.java @@ -13,6 +13,9 @@ import org.zstack.utils.gson.JSONObjectUtil; import org.zstack.utils.logging.CLogger; +import java.util.HashSet; +import java.util.Set; + public class VirtualPciDeviceKvmExtensionPoint implements KVMStartVmExtensionPoint, KVMSyncVmDeviceInfoExtensionPoint { private static final CLogger logger = Utils.getLogger(VirtualPciDeviceKvmExtensionPoint.class); @@ -77,38 +80,88 @@ private String getVmXml(String vmUuid) { public void afterReceiveVmDeviceInfoResponse(VmInstanceInventory vm, KVMAgentCommands.VmDevicesInfoResponse rsp, VmInstanceSpec spec) { String vmUuid = spec != null ? spec.getVmInventory().getUuid() : vm.getUuid(); + Set surviving = new HashSet<>(); + // Tracks whether the agent reply actually contains valid device info. + // Only flipped to true when we observe at least one valid virtual + // device / matched nic / balloon address from the response itself. + // This is the ONLY signal used to decide whether prune may run; + // resourceUuids added as defensive fallbacks (e.g. nic uuids copied + // from spec when nicInfos is null) must NOT influence this flag. + boolean hasAuthoritativeDeviceInfo = false; + vidManager.saveVmXmlMetadata(rsp.getVmXml(), vmUuid); + // the vmXml row uses vmUuid as resourceUuid; keep it + surviving.add(vmUuid); // only update pci address, metadata is not mandatory in normal usage // check its usage when create snapshot or backup if (rsp.getVirtualDeviceInfoList() != null) { - rsp.getVirtualDeviceInfoList().forEach(info -> { + for (VirtualDeviceInfo info : rsp.getVirtualDeviceInfoList()) { if (info.isValid()) { vidManager.createOrUpdateVmResourceMetadata(info, vmUuid); + surviving.add(info.getResourceUuid()); + hasAuthoritativeDeviceInfo = true; } - }); + } } - if (rsp.getNicInfos() == null) { - return; - } + if (rsp.getNicInfos() != null) { + for (KVMAgentCommands.VmNicInfo info : rsp.getNicInfos()) { + VmNicInventory nic = (spec != null ? spec.getDestNics() : vm.getVmNics()) + .stream() + .filter(vmNicInventory -> vmNicInventory.getMac().equals(info.getMacAddress())) + .findFirst() + .orElse(null); + if (nic == null) { + continue; + } - rsp.getNicInfos().forEach(info -> { - VmNicInventory nic = (spec != null ? spec.getDestNics() : vm.getVmNics()) - .stream() - .filter(vmNicInventory -> vmNicInventory.getMac().equals(info.getMacAddress())) - .findFirst() - .orElse(null); - if (nic == null) { - return; + vidManager.createOrUpdateVmResourceMetadata(new VirtualDeviceInfo(nic.getUuid(), info.getDeviceAddress()), vmUuid); + surviving.add(nic.getUuid()); + hasAuthoritativeDeviceInfo = true; } + } else { + // nicInfos is null => partial agent response; we do NOT have an + // authoritative nic snapshot. Preserve every known nic uuid of + // this VM so a prune step (triggered by other authoritative fields) + // cannot drop their address rows as collateral damage. + (spec != null ? spec.getDestNics() : vm.getVmNics()) + .forEach(n -> surviving.add(n.getUuid())); + } - vidManager.createOrUpdateVmResourceMetadata(new VirtualDeviceInfo(nic.getUuid(), info.getDeviceAddress()), vmUuid); - }); - - if (!StringUtils.isEmpty(rsp.getMemBalloonInfo().getDeviceAddress().toString())) { + if (rsp.getMemBalloonInfo() != null + && rsp.getMemBalloonInfo().getDeviceAddress() != null + && !StringUtils.isEmpty(rsp.getMemBalloonInfo().getDeviceAddress().toString())) { vidManager.createOrUpdateVmResourceMetadata(new VirtualDeviceInfo(vidManager.MEM_BALLOON_UUID, DeviceAddress.fromString(rsp.getMemBalloonInfo().getDeviceAddress().toString())), vmUuid); + surviving.add(vidManager.MEM_BALLOON_UUID); + hasAuthoritativeDeviceInfo = true; + } + + // Differential prune of stale address-only rows (metadataClass IS NULL) + // whose resourceUuid is no longer present in the libvirt domain, so DB + // state stays consistent after devices (e.g. cdrom) are detached while + // the VM is stopped. Rows owned by other extension points (non-null + // metadataClass) are untouched. + // + // Guard: only prune when the response actually delivered authoritative + // device info. An empty / all-invalid reply gives no ground truth; we + // must not treat absence-of-data as proof-of-deletion. + if (!hasAuthoritativeDeviceInfo) { + logger.debug(String.format( + "skip pruneStaleDeviceMetadata for vm[%s]: empty device snapshot", + vmUuid)); + return; + } + try { + int pruned = vidManager.pruneStaleDeviceMetadata(vmUuid, surviving); + if (pruned > 0) { + logger.debug(String.format( + "pruned %d stale VmInstanceResourceMetadataVO row(s) for vm[%s]", + pruned, vmUuid)); + } + } catch (Exception e) { + logger.warn(String.format("failed to prune stale device metadata for vm[%s]", vmUuid), e); } }