WithAutonomi · mickvandijke · Apr 27, 2026 · Apr 24, 2026 · Apr 27, 2026 · Copilot
diff --git a/src/payment/verifier.rs b/src/payment/verifier.rs
@@ -771,10 +771,49 @@ impl PaymentVerifier {
     /// Wrapped by [`verify_merkle_candidate_closeness`] with a pass-cache and
     /// single-flight guard so a batch of chunks and a storm of forged PUTs
     /// don't multiply the lookup cost.
+    /// Derive each candidate's `PeerId` from its `pub_key` and reject the
+    /// pool if any `PeerId` appears more than once.
+    ///
+    /// This is a pure-validation pre-check, runnable without a `P2PNode`:
+    /// catches the case where one real peer's `pub_key` is repeated to
+    /// inflate the closeness match count, without paying for a Kademlia
+    /// lookup. An honest pool has [`evmlib::merkle_payments::CANDIDATES_PER_POOL`]
+    /// distinct candidate `pub_keys` by construction.
+    fn derive_distinct_candidate_peer_ids(
+        pool: &evmlib::merkle_payments::MerklePaymentCandidatePool,
+    ) -> Result<Vec<PeerId>> {
+        let mut candidate_peer_ids = Vec::with_capacity(pool.candidate_nodes.len());
+        let mut seen = std::collections::HashSet::with_capacity(pool.candidate_nodes.len());
+        for candidate in &pool.candidate_nodes {
+            let pid = peer_id_from_public_key_bytes(&candidate.pub_key).map_err(|e| {
+                Error::Payment(format!(
+                    "Invalid ML-DSA public key in merkle candidate: {e}"
+                ))
+            })?;
+            if !seen.insert(pid) {
+                return Err(Error::Payment(
+                    "Merkle candidate pool rejected: duplicate candidate PeerId. An \
+                     honest pool has 16 distinct candidate pub_keys; duplicates would \
+                     let a single real peer satisfy the closeness threshold by being \
+                     counted multiple times."
+                        .into(),
+                ));
-                return Err(Error::Payment(
-                    "Merkle candidate pool rejected: duplicate candidate PeerId. An \
-                     honest pool has 16 distinct candidate pub_keys; duplicates would \
-                     let a single real peer satisfy the closeness threshold by being \
-                     counted multiple times."
-                        .into(),
-                ));
+                return Err(Error::Payment(format!(
+                    "Merkle candidate pool rejected: duplicate candidate PeerId. An \
+                     honest pool has {} distinct candidate pub_keys; duplicates would \
+                     let a single real peer satisfy the closeness threshold by being \
+                     counted multiple times.",
+                    evmlib::merkle_payments::CANDIDATES_PER_POOL
+                )));
-                return Err(Error::Payment(
-                    "Merkle candidate pool rejected: duplicate candidate PeerId. An \
-                     honest pool has 16 distinct candidate pub_keys; duplicates would \
-                     let a single real peer satisfy the closeness threshold by being \
-                     counted multiple times."
-                        .into(),
-                ));
+                return Err(Error::Payment(format!(
+                    "Merkle candidate pool rejected: duplicate candidate PeerId. An \
+                     honest pool has {} distinct candidate pub_keys; duplicates would \
+                     let a single real peer satisfy the closeness threshold by being \
+                     counted multiple times.",
+                    evmlib::merkle_payments::CANDIDATES_PER_POOL
+                )));
+            }
+            candidate_peer_ids.push(pid);
+        }
+        Ok(candidate_peer_ids)
+    }
+
+    #[allow(clippy::too_many_lines)]
     async fn verify_merkle_candidate_closeness_inner(
         &self,
         pool: &evmlib::merkle_payments::MerklePaymentCandidatePool,
     ) -> Result<()> {
+        // Pre-check: catch malformed/hostile pools (duplicate candidate
+        // PeerIds) before paying for the Kademlia lookup. Runs in unit
+        // tests without a P2PNode too.
+        let candidate_peer_ids = Self::derive_distinct_candidate_peer_ids(pool)?;
+
         // Release the RwLock guard before any await to avoid holding it
         // across an iterative Kademlia lookup.
         let attached = self.p2p_node.read().as_ref().map(Arc::clone);
@@ -811,20 +850,6 @@ impl PaymentVerifier {
         };
 
         let pool_address = pool.midpoint_proof.address();
-
-        // Derive each candidate's would-be PeerId from its pub_key. Fail
-        // closed on malformed keys — the candidate signature check ran
-        // upstream so a valid-looking pool ought to parse cleanly here.
-        let mut candidate_peer_ids = Vec::with_capacity(pool.candidate_nodes.len());
-        for candidate in &pool.candidate_nodes {
-            let pid = peer_id_from_public_key_bytes(&candidate.pub_key).map_err(|e| {
-                Error::Payment(format!(
-                    "Invalid ML-DSA public key in merkle candidate: {e}"
-                ))
-            })?;
-            candidate_peer_ids.push(pid);
-        }
-
         let lookup_count = pool.candidate_nodes.len();
         let network_lookup = p2p_node
             .dht_manager()
@@ -857,10 +882,32 @@ impl PaymentVerifier {
                 }
             };
 
-        // Set-membership check against the returned closest-peers list. The
-        // lookup may return fewer than `lookup_count` on a sparse network,
-        // which only tightens the bar — any candidate not in the returned
-        // list counts as unmatched.
+        // Sparse-network short-circuit: if the DHT itself returned fewer
+        // peers than the closeness threshold, the proof can never pass —
+        // not because the candidates are forged, but because we don't
+        // have an authoritative view to compare against. Surface this
+        // distinct cause so operators can tell "retry once the network
+        // settles" apart from "this peer sent a forged pool".
+        if network_peers.len() < Self::CANDIDATE_CLOSENESS_REQUIRED {
+            debug!(
+                "Merkle closeness deferred: network lookup returned {} peers \
+                 for pool midpoint {} (need at least {} to verify)",
+                network_peers.len(),
+                hex::encode(pool_address.0),
+                Self::CANDIDATE_CLOSENESS_REQUIRED,
+            );
+            return Err(Error::Payment(format!(
+                "Merkle candidate pool rejected: authoritative DHT lookup returned \
+                 only {} peers, less than the {} required to verify candidate \
+                 closeness. Retry once the routing table populates further.",
+                network_peers.len(),
+                Self::CANDIDATE_CLOSENESS_REQUIRED,
+            )));
+        }
+
+        // Set-membership check against the returned closest-peers list.
+        // Candidate `PeerId`s are deduplicated upstream, so each match
+        // corresponds to a distinct peer.
         let network_set: std::collections::HashSet<PeerId> =
             network_peers.iter().map(|n| n.peer_id).collect();
         let matched = candidate_peer_ids
@@ -1998,6 +2045,44 @@ mod tests {
         );
     }
 
+    #[tokio::test]
+    async fn closeness_rejects_pool_with_duplicate_candidate_pub_keys() {
+        // An attacker who submits 16 copies of the same real peer's pub_key
+        // would otherwise satisfy the 13/16 closeness threshold trivially:
+        // that one peer's membership in the DHT-returned set would count
+        // 16 times. The dedupe check in verify_merkle_candidate_closeness_inner
+        // must reject the pool BEFORE the network lookup runs (so this test
+        // works even with no P2PNode attached).
+        let verifier = create_test_verifier();
+        let pool_hash = [0xDDu8; 32];
+
+        // Build a normal pool, then overwrite every candidate's pub_key
+        // with a single shared key so all 16 derive to the same PeerId.
+        let mut candidates = make_candidate_nodes(1_700_000_000);
+        let shared_pub_key = candidates
+            .first()
+            .expect("make_candidate_nodes returns CANDIDATES_PER_POOL entries")
+            .pub_key
+            .clone();
+        for c in &mut candidates {
+            c.pub_key = shared_pub_key.clone();
+        }
+        let pool = MerklePaymentCandidatePool {
+            midpoint_proof: fake_midpoint_proof(),
+            candidate_nodes: candidates,
+        };
+
+        let result = verifier
+            .verify_merkle_candidate_closeness(&pool, pool_hash)
+            .await;
+        let err = result.expect_err("duplicate candidate PeerIds must be rejected");
+        let msg = err.to_string();
+        assert!(
+            msg.contains("duplicate candidate PeerId"),
+            "rejection must be the duplicate-PeerId branch, got: {msg}"
+        );
+    }
+
     /// Build a deterministic but otherwise-unused `MidpointProof` so unit
     /// tests can construct a `MerklePaymentCandidatePool` without spinning
     /// up a real merkle tree. The closeness path only calls `.address()`

diff --git a/tests/e2e/merkle_payment.rs b/tests/e2e/merkle_payment.rs
@@ -499,10 +499,15 @@ async fn test_attack_merkle_pay_yourself_fabricated_pool() -> Result<(), Box<dyn
 {
     info!("MERKLE ATTACK TEST: pay-yourself via fabricated candidate pool");
 
-    // Minimal testnet (5 nodes) — attacker-generated candidate pub_keys hash
-    // to PeerIds uniformly across the 2^256 ID space and will not match any
-    // of these 5 nodes' PeerIds with any meaningful probability.
-    let config = TestNetworkConfig::minimal();
+    // Need at least CANDIDATE_CLOSENESS_REQUIRED (13) nodes so the
+    // closeness check exercises the matched-count branch rather than the
+    // sparse-network short-circuit. The attacker's 16 fabricated pub_keys
+    // hash to PeerIds uniformly across the 2^256 ID space, so 0 of 16
+    // will match the network's 13 closest — we expect rejection at the
+    // matched-count step (matched < CANDIDATE_CLOSENESS_REQUIRED).
+    let mut config = TestNetworkConfig::minimal();
+    config.node_count = 14;
+    config.stabilization_timeout = Duration::from_secs(60);
     let harness = TestHarness::setup_with_config(config).await?;
     // Let routing tables settle so find_closest_nodes_network returns a
     // stable set.
@@ -563,13 +568,17 @@ async fn test_attack_merkle_pay_yourself_fabricated_pool() -> Result<(), Box<dyn
     // rejecting this proof once on-chain lookup is bypassed — if the response
     // is a Success or an unrelated error, the attack works.
     match response.body {
+        // Match the specific matched-count rejection wording, not just any
+        // "closeness" substring. The sparse-network short-circuit (returns
+        // <13 peers) and the matched-count check (matched < 13) both
+        // contain "closeness" in their messages, so a loose match would
+        // accept a passing test for the wrong reason on undersized
+        // testnets. The matched-count message is "candidate pub_keys do
+        // not match the network's closest peers".
         ChunkMessageBody::PutResponse(ChunkPutResponse::Error(ProtocolError::PaymentFailed(
             ref msg,
-        ))) if msg.contains("closest peers")
-            || msg.contains("closeness")
-            || msg.contains("authoritative network") =>
-        {
-            info!("Correctly rejected pay-yourself attack: {msg}");
+        ))) if msg.contains("candidate pub_keys do not match") => {
+            info!("Correctly rejected pay-yourself attack at matched-count check: {msg}");
         }
         ChunkMessageBody::PutResponse(ChunkPutResponse::Success { .. }) => {
             panic!(
@@ -579,9 +588,10 @@ async fn test_attack_merkle_pay_yourself_fabricated_pool() -> Result<(), Box<dyn
         }
         other => {
             panic!(
-                "Pay-yourself attack was rejected for the WRONG reason — expected a closeness \
-                 rejection, got: {other:?}. The closeness defence is missing; the proof only \
-                 failed because of an unrelated check."
+                "Pay-yourself attack was rejected for the WRONG reason — expected the \
+                 matched-count rejection (\"candidate pub_keys do not match...\"), got: \
+                 {other:?}. Either the testnet was too sparse (<13 peers returned) and the \
+                 sparse-network short-circuit fired instead, or some other check failed first."
             );
         }
     }