Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,14 @@ public String getHostUuid() {
public void setHostUuid(String hostUuid) {
this.hostUuid = hostUuid;
}

private boolean cleanAllVmMetadata;

public boolean isCleanAllVmMetadata() {
return cleanAllVmMetadata;
}

public void setCleanAllVmMetadata(boolean cleanAllVmMetadata) {
this.cleanAllVmMetadata = cleanAllVmMetadata;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,13 @@
import org.zstack.header.message.MessageReply;

public class CleanupVmInstanceMetadataOnPrimaryStorageReply extends MessageReply {
private int cleanedCount;

public int getCleanedCount() {
return cleanedCount;
}

public void setCleanedCount(int cleanedCount) {
this.cleanedCount = cleanedCount;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ public void setFailedVmUuids(List<String> failedVmUuids) {
this.failedVmUuids = failedVmUuids;
}

private List<String> failedPrimaryStorageUuids;

public List<String> getFailedPrimaryStorageUuids() {
return failedPrimaryStorageUuids;
}

public void setFailedPrimaryStorageUuids(List<String> failedPrimaryStorageUuids) {
this.failedPrimaryStorageUuids = failedPrimaryStorageUuids;
}

public static APICleanupVmInstanceMetadataEvent __example__() {
APICleanupVmInstanceMetadataEvent evt = new APICleanupVmInstanceMetadataEvent();
evt.totalCleaned = 5;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@
isAction = true
)
public class APICleanupVmInstanceMetadataMsg extends APIMessage {
@APIParam(resourceType = VmInstanceVO.class, nonempty = true)
@APIParam(resourceType = VmInstanceVO.class, required = false)
private List<String> vmUuids;

@APIParam(required = false)
private boolean cleanAllVmMetadata;

public List<String> getVmUuids() {
return vmUuids;
}
Expand All @@ -25,9 +28,18 @@ public void setVmUuids(List<String> vmUuids) {
this.vmUuids = vmUuids;
}

public boolean isCleanAllVmMetadata() {
return cleanAllVmMetadata;
}

public void setCleanAllVmMetadata(boolean cleanAllVmMetadata) {
this.cleanAllVmMetadata = cleanAllVmMetadata;
}

public static APICleanupVmInstanceMetadataMsg __example__() {
APICleanupVmInstanceMetadataMsg msg = new APICleanupVmInstanceMetadataMsg();
msg.vmUuids = java.util.Arrays.asList(uuid(), uuid());
msg.cleanAllVmMetadata = false;
return msg;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
import java.io.File;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -3560,6 +3561,11 @@ public void done(ErrorCodeList errorCodeList) {

@Override
protected void handle(final CleanupVmInstanceMetadataOnPrimaryStorageMsg msg) {
if (msg.isCleanAllVmMetadata()) {
handleCleanAllVmMetadataOnLocalStorage(msg);
return;
}
Comment on lines +3564 to +3567
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

cleanAll 分支绕过了现有的元数据串行化队列。

这里直接 return 到新的全量清理逻辑,导致它不再经过下面单 VM cleanup 使用的 thdf.chainSubmit(...),也和 Line 3368 的 metadata update 路径失去了同一个串行化边界。这样全量清理可以和更新/单 VM 清理并发修改同一套 metadata,结果会依赖时序。建议把 cleanAllVmMetadata=true 也放进同一个 per-PS sync chain,再在 chain 内部做 host 级 fan-out。

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In
`@plugin/localstorage/src/main/java/org/zstack/storage/primary/local/LocalStorageBase.java`
around lines 3564 - 3567, The cleanAll branch currently returns early and
bypasses the existing per-primary-storage serialization used by thdf.chainSubmit
and the metadata update path (so cleanAll can race with per-VM updates);
instead, wrap the clean-all work in the same per-PS chain: submit a chain task
via thdf.chainSubmit using the same serialization key you use for single-VM
cleanup/metadata updates, and inside that chain task call
handleCleanAllVmMetadataOnLocalStorage (performing any host-level fan-out from
within the chain). This ensures cleanAllVmMetadata follows the same serialized
boundary as the metadata update code and single-VM cleanup.


thdf.chainSubmit(new ChainTask(msg) {
@Override
public String getSyncSignature() {
Expand Down Expand Up @@ -3625,4 +3631,64 @@ public String getName() {
}
});
}

private void handleCleanAllVmMetadataOnLocalStorage(final CleanupVmInstanceMetadataOnPrimaryStorageMsg msg) {
CleanupVmInstanceMetadataOnPrimaryStorageReply reply = new CleanupVmInstanceMetadataOnPrimaryStorageReply();

List<String> connectedHostUuids = SQL.New(
"select h.hostUuid from LocalStorageHostRefVO h, HostVO host" +
" where h.primaryStorageUuid = :psUuid" +
" and h.hostUuid = host.uuid" +
" and host.status = :hstatus", String.class)
.param("psUuid", self.getUuid())
.param("hstatus", HostStatus.Connected)
.list();
if (connectedHostUuids.isEmpty()) {
logger.warn(String.format("[MetadataCleanup] cleanAll: no connected host found for local ps[uuid:%s]", self.getUuid()));
reply.setCleanedCount(0);
bus.reply(msg, reply);
return;
Comment on lines +3638 to +3650
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

cleanAll 现在会把“未清理离线宿主机”误报成成功。

这里仅查询 HostStatus.Connected 的宿主机;只要有一台在线宿主机成功,最终就会返回成功并带 cleanedCount。但挂载在该 local ps 上的离线宿主机上的 metadata 根本没有机会被清理,cleanAllVmMetadata 的语义就不成立了。建议像本类的扫描逻辑一样先枚举全部宿主机,把离线宿主机计入失败/部分失败,或者直接在存在离线宿主机时返回错误。

可参考的修正方向
-        List<String> connectedHostUuids = SQL.New(
+        List<String> allHostUuids = SQL.New(
+                        "select h.hostUuid from LocalStorageHostRefVO h, HostVO host" +
+                                " where h.primaryStorageUuid = :psUuid" +
+                                " and h.hostUuid = host.uuid", String.class)
+                .param("psUuid", self.getUuid())
+                .list();
+
+        List<String> connectedHostUuids = SQL.New(
                         "select h.hostUuid from LocalStorageHostRefVO h, HostVO host" +
                                 " where h.primaryStorageUuid = :psUuid" +
                                 " and h.hostUuid = host.uuid" +
                                 " and host.status = :hstatus", String.class)
                 .param("psUuid", self.getUuid())
                 .param("hstatus", HostStatus.Connected)
                 .list();
-        if (connectedHostUuids.isEmpty()) {
-            logger.warn(String.format("[MetadataCleanup] cleanAll: no connected host found for local ps[uuid:%s]", self.getUuid()));
-            reply.setCleanedCount(0);
+
+        List<String> disconnectedHostUuids = new ArrayList<>(allHostUuids);
+        disconnectedHostUuids.removeAll(connectedHostUuids);
+        if (!disconnectedHostUuids.isEmpty()) {
+            reply.setError(operr("cannot clean all vm metadata on local primary storage[uuid:%s], disconnected hosts=%s",
+                    self.getUuid(), disconnectedHostUuids));
             bus.reply(msg, reply);
             return;
         }

Also applies to: 3684-3690

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In
`@plugin/localstorage/src/main/java/org/zstack/storage/primary/local/LocalStorageBase.java`
around lines 3638 - 3650, The cleanAll logic in LocalStorageBase (method
cleanAll / helper cleanAllVmMetadata) only queries hosts with
HostStatus.Connected, which allows cleanAll to report success while offline
hosts remain uncleaned; change the host enumeration to fetch all host UUIDs
mounted to this primary storage (remove the "and host.status = :hstatus" filter
or query HostVO without status), then detect hosts whose HostStatus != Connected
and mark the operation as partial failure or return an error (e.g., set reply to
failed/partial with cleanedCount for connected hosts and include the offline
host UUIDs/count); update the analogous block around the cleanAllVmMetadata call
(the code at the other occurrence) the same way so offline hosts are counted and
reported rather than ignored.

}

AtomicInteger totalCleaned = new AtomicInteger(0);
new While<>(connectedHostUuids).all((hostUuid, com) -> {
final LocalStorageHypervisorBackend bkd;
try {
LocalStorageHypervisorFactory f = getHypervisorBackendFactoryByHostUuid(hostUuid);
bkd = f.getHypervisorBackend(self);
} catch (Exception e) {
logger.warn(String.format("[MetadataCleanup] cleanAll: failed to prepare backend for host[uuid:%s] on ps[uuid:%s]: %s",
hostUuid, self.getUuid(), e.getMessage()));
com.addError(operr("failed to prepare backend for host[uuid:%s]: %s", hostUuid, e.getMessage()));
com.done();
return;
}
bkd.handle(msg, hostUuid, new ReturnValueCompletion<CleanupVmInstanceMetadataOnPrimaryStorageReply>(com) {
@Override
public void success(CleanupVmInstanceMetadataOnPrimaryStorageReply returnValue) {
totalCleaned.addAndGet(returnValue.getCleanedCount());
com.done();
}

@Override
public void fail(ErrorCode errorCode) {
logger.warn(String.format("[MetadataCleanup] cleanAll: failed on host[uuid:%s] on ps[uuid:%s]: %s",
hostUuid, self.getUuid(), errorCode));
com.addError(errorCode);
com.done();
}
});
}).run(new WhileDoneCompletion(msg) {
@Override
public void done(ErrorCodeList errorCodeList) {
if (!errorCodeList.getCauses().isEmpty() && errorCodeList.getCauses().size() == connectedHostUuids.size()) {
reply.setError(operr("failed to cleanup all vm metadata from all hosts on local primary storage[uuid:%s], causes: %s",
self.getUuid(), errorCodeList));
} else {
reply.setCleanedCount(totalCleaned.get());
}
bus.reply(msg, reply);
}
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -946,9 +946,11 @@ public static class ScanVmMetadataRsp extends AgentResponse {

public static class CleanupVmMetadataCmd extends AgentCommand {
public String metadataPath;
public boolean cleanAllVmMetadata;
}

public static class CleanupVmMetadataRsp extends AgentResponse {
public int cleanedCount;
}

public static class PrefixRebaseBackingFilesCmd extends LocalStorageKvmBackend.AgentCommand {
Expand Down Expand Up @@ -3945,11 +3947,13 @@ public void fail(ErrorCode errorCode) {
void handle(CleanupVmInstanceMetadataOnPrimaryStorageMsg msg, String hostUuid, ReturnValueCompletion<CleanupVmInstanceMetadataOnPrimaryStorageReply> completion) {
CleanupVmMetadataCmd cmd = new CleanupVmMetadataCmd();
cmd.metadataPath = msg.getMetadataPath();
cmd.cleanAllVmMetadata = msg.isCleanAllVmMetadata();

httpCall(CLEANUP_VM_METADATA_PATH, hostUuid, cmd, CleanupVmMetadataRsp.class, new ReturnValueCompletion<CleanupVmMetadataRsp>(completion) {
@Override
public void success(CleanupVmMetadataRsp rsp) {
CleanupVmInstanceMetadataOnPrimaryStorageReply reply = new CleanupVmInstanceMetadataOnPrimaryStorageReply();
reply.setCleanedCount(rsp.cleanedCount);
completion.success(reply);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2170,6 +2170,7 @@ public void handle(CleanupVmInstanceMetadataOnPrimaryStorageMsg msg, String host
CleanupVmMetadataCmd cmd = new CleanupVmMetadataCmd();
cmd.setUuid(msg.getPrimaryStorageUuid());
cmd.metadataPath = msg.getMetadataPath();
cmd.cleanAllVmMetadata = msg.isCleanAllVmMetadata();

KVMHostAsyncHttpCallMsg hmsg = new KVMHostAsyncHttpCallMsg();
hmsg.setCommand(cmd);
Expand All @@ -2191,6 +2192,7 @@ public void run(MessageReply reply) {
}

CleanupVmInstanceMetadataOnPrimaryStorageReply r = new CleanupVmInstanceMetadataOnPrimaryStorageReply();
r.setCleanedCount(rsp.cleanedCount);
completion.success(r);
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -990,9 +990,11 @@ public static class ScanVmMetadataRsp extends NfsPrimaryStorageAgentResponse {

public static class CleanupVmMetadataCmd extends NfsPrimaryStorageAgentCommand {
public String metadataPath;
public boolean cleanAllVmMetadata;
}

public static class CleanupVmMetadataRsp extends NfsPrimaryStorageAgentResponse {
public int cleanedCount;
}

public static class PrefixRebaseBackingFilesCmd extends NfsPrimaryStorageAgentCommand {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@ public Result throwExceptionIfError() {
}
}

@Param(required = true, nonempty = true, nullElements = false, emptyString = true, noTrim = false)
@Param(required = false, nonempty = false, nullElements = false, emptyString = true, noTrim = false)
public java.util.List vmUuids;

@Param(required = false, nonempty = false, nullElements = false, emptyString = true, noTrim = false)
public boolean cleanAllVmMetadata;

@Param(required = false)
public java.util.List systemTags;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,12 @@ public java.util.List getFailedVmUuids() {
return this.failedVmUuids;
}

public java.util.List failedPrimaryStorageUuids;
public void setFailedPrimaryStorageUuids(java.util.List failedPrimaryStorageUuids) {
this.failedPrimaryStorageUuids = failedPrimaryStorageUuids;
}
public java.util.List getFailedPrimaryStorageUuids() {
return this.failedPrimaryStorageUuids;
}

}