From e6446647efc786c19e86e78e53227b3942ec2890 Mon Sep 17 00:00:00 2001
From: Ovi Trif <ovitrif@proton.me>
Date: Tue, 28 Apr 2026 09:43:02 +0200
Subject: [PATCH] feat: add scorer-inspect crate with text/csv/json output

scorer-inspect is a small standalone CLI for offline diagnostics of LDK
serialized scorer files - both the `latest.bin` style payloads served at
URLs like api.blocktank.to/scorer-prod or scores.zeusln.com/latest.bin
and the bytes returned by Node::export_pathfinding_scores. Both wire
formats are identical in current LDK (ProbabilisticScorer::write just
delegates to ChannelLiquidities::write), so the parser is a single
ChannelLiquidities::read; the --source flag is metadata only.

Output is configurable as plain text (default), CSV, or pretty-printed
JSON, optionally written to disk via --save. The summary section
reports entry count, history-populated percentage, and offset / bucket
weight distributions; the per-channel section can be limited via --top
or fully dumped via --all, and sorted by narrowest offset window,
recency, or historical bucket weight.

Depends on the diagnostics() accessors added to ChannelLiquidities and
ProbabilisticScorer in the synonymdev/rust-lightning fork. The ldk-node
[patch.crates-io] block is unchanged here; the rust-lightning PR must
land first and the rev pin must be bumped before this branch can merge.
For local development of this crate, point [patch.crates-io] at a
checkout of synonymdev/rust-lightning that has the diagnostics
accessors applied.
---
 Cargo.toml                        |   2 +-
 crates/scorer-inspect/Cargo.toml  |  16 ++
 crates/scorer-inspect/README.md   |  69 +++++++
 crates/scorer-inspect/src/main.rs | 332 ++++++++++++++++++++++++++++++
 4 files changed, 418 insertions(+), 1 deletion(-)
 create mode 100644 crates/scorer-inspect/Cargo.toml
 create mode 100644 crates/scorer-inspect/README.md
 create mode 100644 crates/scorer-inspect/src/main.rs
diff --git a/Cargo.toml b/Cargo.toml
index fc45137fe..282f4402b 100755
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,5 @@
 [workspace]
-members = [".", "crates/bdk-wallet-aggregate"]
+members = [".", "crates/bdk-wallet-aggregate", "crates/scorer-inspect"]
 exclude = ["bindings/uniffi-bindgen"]
 
 [package]
diff --git a/crates/scorer-inspect/Cargo.toml b/crates/scorer-inspect/Cargo.toml
new file mode 100644
index 000000000..971e59144
--- /dev/null
+++ b/crates/scorer-inspect/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "scorer-inspect"
+version = "0.1.0"
+edition = "2021"
+rust-version = "1.85"
+description = "Offline diagnostics CLI for LDK ProbabilisticScorer / ChannelLiquidities files."
+license = "MIT OR Apache-2.0"
+
+[dependencies]
+anyhow = "1"
+clap = { version = "4", features = ["derive"] }
+csv = "1"
+humansize = "2"
+lightning = { version = "0.2.0", features = ["std"] }
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
diff --git a/crates/scorer-inspect/README.md b/crates/scorer-inspect/README.md
new file mode 100644
index 000000000..e8cb21d17
--- /dev/null
+++ b/crates/scorer-inspect/README.md
@@ -0,0 +1,69 @@
+# scorer-inspect
+
+Offline diagnostics CLI for LDK serialized scorer files.
+
+Reads a binary file produced by either:
+- LDK's external scorer / `ProbabilisticScorer::write` (e.g. `https://api.blocktank.to/scorer-prod`, `https://scores.zeusln.com/latest.bin`), or
+- ldk-node's `Node::export_pathfinding_scores`.
+
+Both wire formats are identical in current LDK — `ProbabilisticScorer::write` just calls `self.channel_liquidities.write(w)`. The tool always reads via `ChannelLiquidities::read` and the `--source` flag is purely metadata for the report.
+
+## Build
+
+```
+cargo build -p scorer-inspect --release
+```
+
+## Usage
+
+```
+scorer-inspect <FILE>
+  [--source served|exported]
+  [--output text|csv|json]
+  [--save PATH]
+  [--top N]
+  [--all]
+  [--sort narrow|recent|history]
+```
+
+- `--source` — annotates the report; doesn't affect parsing.
+- `--output text` (default) — short summary + top-N channel rows for human review.
+- `--output csv` — summary row, blank line, then one row per channel. Editor-friendly.
+- `--output json` — `{ summary: {...}, channels: [...] }`. Machine-readable.
+- `--save PATH` — write to file instead of stdout.
+- `--top N` (default 20) — limit channel rows.
+- `--all` — dump every entry; overrides `--top`.
+- `--sort` — `narrow` (smallest offset window first; highest information density), `recent` (most recently updated first), `history` (largest historical-bucket weight first; most probe-derived signal).
+
+## Examples
+
+```
+# Quick eyeball of Zeus's served file
+curl -o /tmp/zeus.bin https://scores.zeusln.com/latest.bin
+scorer-inspect /tmp/zeus.bin --source served
+
+# Full per-channel CSV diff between Zeus and Bitkit
+curl -o /tmp/blocktank.bin https://api.blocktank.to/scorer-prod
+scorer-inspect /tmp/zeus.bin       --source served --all --output csv --save /tmp/zeus.csv
+scorer-inspect /tmp/blocktank.bin  --source served --all --output csv --save /tmp/blocktank.csv
+```
+
+## What the columns mean
+
+- `min_liquidity_offset_msat` / `max_liquidity_offset_msat` — non-directional offsets relative to the channel's node ordering. Resolving them into directional `min_liquidity_sat` / `max_liquidity_sat` requires a `NetworkGraph` (capacity + node-id ordering), which this tool doesn't yet take.
+- `has_history` — whether either historical-bucket array is non-zero. The single best signal for distinguishing probe-derived data from synthetic graph seeding.
+- `total_valid_points_tracked` (CSV/JSON), `history_weight` (text) — LDK-internal scalar weight summarizing the historical bucket distribution. Stored as `f64`; not an integer payment count.
+- `last_updated_secs` — seconds since the unix epoch when either liquidity bound was last modified.
+
+## Distinguishing rich probe data from synthetic seeding
+
+A scorer file can be large for two very different reasons:
+
+- **Probe-rich**: many entries with `has_history=true`, narrow `[min_offset, max_offset]` windows, sizable `total_valid_points_tracked`. This is what real probing produces.
+- **Synthetic-coverage**: many entries with `has_history=false`, `min_offset=0`, `max_offset` close to channel capacity, zero bucket weights. This is what gossip-graph seeding produces — big file, low pathfinding signal.
+
+Compare the two by running the tool against any scorer file and checking the `history populated` percentage and the offset-window distribution.
+
+## Limits
+
+- v1 is graph-free: directional output and capacity-resolved sats are not available without a `NetworkGraph`. Adding `--graph PATH` is tracked as future work.
diff --git a/crates/scorer-inspect/src/main.rs b/crates/scorer-inspect/src/main.rs
new file mode 100644
index 000000000..677c95f2c
--- /dev/null
+++ b/crates/scorer-inspect/src/main.rs
@@ -0,0 +1,332 @@
+//! Offline diagnostics for LDK serialized scorer files.
+//!
+//! Reads a binary file produced by either:
+//!   - LDK's external scorer / `ProbabilisticScorer::write`, or
+//!   - ldk-node's `Node::export_pathfinding_scores`.
+//! Both are wire-compatible — they both serialize a `ChannelLiquidities` —
+//! so this tool always reads via `ChannelLiquidities::read`. The `--source`
+//! flag is purely metadata for the report.
+
+use std::fs;
+use std::io::{BufWriter, Write};
+use std::path::PathBuf;
+
+use lightning::io::Cursor;
+
+use anyhow::{Context, Result};
+use clap::{Parser, ValueEnum};
+use humansize::{format_size, BINARY};
+use lightning::routing::scoring::{ChannelLiquidities, ChannelLiquidityDiagnostic};
+use lightning::util::ser::Readable;
+use serde::Serialize;
+
+#[derive(Copy, Clone, Debug, ValueEnum)]
+enum Source {
+    /// File came from a remote URL such as `https://api.blocktank.to/scorer-prod`
+    /// or `https://scores.zeusln.com/latest.bin`.
+    Served,
+    /// File came from `Node::export_pathfinding_scores`.
+    Exported,
+}
+
+#[derive(Copy, Clone, Debug, ValueEnum)]
+enum OutputFormat {
+    Text,
+    Csv,
+    Json,
+}
+
+#[derive(Copy, Clone, Debug, ValueEnum)]
+enum Sort {
+    /// Smallest `[min_offset, max_offset]` window first (highest-information entries).
+    Narrow,
+    /// Most recently updated first.
+    Recent,
+    /// Highest historical-bucket weight first (most probe-derived signal).
+    History,
+}
+
+#[derive(Parser, Debug)]
+#[command(
+    name = "scorer-inspect",
+    about = "Offline diagnostics for LDK ProbabilisticScorer / ChannelLiquidities files",
+    long_about = None,
+)]
+struct Cli {
+    /// Path to a binary scorer file.
+    file: PathBuf,
+    /// Metadata tag for the report. The wire format is identical for both today.
+    #[arg(long, value_enum, default_value_t = Source::Served)]
+    source: Source,
+    /// Output format.
+    #[arg(long, value_enum, default_value_t = OutputFormat::Text)]
+    output: OutputFormat,
+    /// Write output to a file instead of stdout.
+    #[arg(long)]
+    save: Option<PathBuf>,
+    /// Limit per-channel rows to this number. Ignored if `--all` is set.
+    #[arg(long)]
+    top: Option<usize>,
+    /// Dump every entry. Overrides `--top`.
+    #[arg(long, default_value_t = false)]
+    all: bool,
+    /// Sort key for per-channel rows.
+    #[arg(long, value_enum, default_value_t = Sort::Narrow)]
+    sort: Sort,
+}
+
+#[derive(Debug, Serialize)]
+struct Summary {
+    file: String,
+    source: String,
+    file_size_bytes: u64,
+    file_size_human: String,
+    entry_count: usize,
+    history_populated_count: usize,
+    history_empty_count: usize,
+    history_populated_pct: f64,
+    min_offset_msat_p50: u64,
+    min_offset_msat_p95: u64,
+    min_offset_msat_max: u64,
+    max_offset_msat_p50: u64,
+    max_offset_msat_p95: u64,
+    max_offset_msat_max: u64,
+    total_valid_points_tracked_p50: f64,
+    total_valid_points_tracked_p95: f64,
+    total_valid_points_tracked_max: f64,
+}
+
+#[derive(Debug, Serialize)]
+struct ChannelRow {
+    scid: u64,
+    min_liquidity_offset_msat: u64,
+    max_liquidity_offset_msat: u64,
+    has_history: bool,
+    total_valid_points_tracked: f64,
+    last_updated_secs: u64,
+    offset_history_last_updated_secs: u64,
+    last_datapoint_time_secs: u64,
+}
+
+#[derive(Debug, Serialize)]
+struct Report {
+    summary: Summary,
+    channels: Vec<ChannelRow>,
+}
+
+fn percentile_u64(sorted: &[u64], p: f64) -> u64 {
+    if sorted.is_empty() {
+        return 0;
+    }
+    let idx = ((sorted.len() as f64 - 1.0) * p).round() as usize;
+    sorted[idx.min(sorted.len() - 1)]
+}
+
+fn percentile_f64(sorted: &[f64], p: f64) -> f64 {
+    if sorted.is_empty() {
+        return 0.0;
+    }
+    let idx = ((sorted.len() as f64 - 1.0) * p).round() as usize;
+    sorted[idx.min(sorted.len() - 1)]
+}
+
+fn build_summary(
+    file_path: &PathBuf, source: Source, file_size: u64,
+    diags: &[ChannelLiquidityDiagnostic],
+) -> Summary {
+    let entry_count = diags.len();
+    let history_populated_count = diags.iter().filter(|d| d.has_history).count();
+    let history_empty_count = entry_count - history_populated_count;
+    let history_populated_pct = if entry_count == 0 {
+        0.0
+    } else {
+        100.0 * history_populated_count as f64 / entry_count as f64
+    };
+
+    let mut min_offsets: Vec<u64> = diags.iter().map(|d| d.min_liquidity_offset_msat).collect();
+    let mut max_offsets: Vec<u64> = diags.iter().map(|d| d.max_liquidity_offset_msat).collect();
+    let mut weights: Vec<f64> =
+        diags.iter().map(|d| d.total_valid_points_tracked).collect();
+    min_offsets.sort_unstable();
+    max_offsets.sort_unstable();
+    weights.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
+
+    Summary {
+        file: file_path.display().to_string(),
+        source: format!("{:?}", source).to_lowercase(),
+        file_size_bytes: file_size,
+        file_size_human: format_size(file_size, BINARY),
+        entry_count,
+        history_populated_count,
+        history_empty_count,
+        history_populated_pct,
+        min_offset_msat_p50: percentile_u64(&min_offsets, 0.50),
+        min_offset_msat_p95: percentile_u64(&min_offsets, 0.95),
+        min_offset_msat_max: min_offsets.last().copied().unwrap_or(0),
+        max_offset_msat_p50: percentile_u64(&max_offsets, 0.50),
+        max_offset_msat_p95: percentile_u64(&max_offsets, 0.95),
+        max_offset_msat_max: max_offsets.last().copied().unwrap_or(0),
+        total_valid_points_tracked_p50: percentile_f64(&weights, 0.50),
+        total_valid_points_tracked_p95: percentile_f64(&weights, 0.95),
+        total_valid_points_tracked_max: weights.last().copied().unwrap_or(0.0),
+    }
+}
+
+fn sort_diagnostics(diags: &mut [ChannelLiquidityDiagnostic], by: Sort) {
+    match by {
+        Sort::Narrow => diags.sort_by_key(|d| {
+            d.max_liquidity_offset_msat.saturating_sub(d.min_liquidity_offset_msat)
+        }),
+        Sort::Recent => diags.sort_by(|a, b| b.last_updated_secs.cmp(&a.last_updated_secs)),
+        Sort::History => diags.sort_by(|a, b| {
+            b.total_valid_points_tracked
+                .partial_cmp(&a.total_valid_points_tracked)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        }),
+    }
+}
+
+fn render_text<W: Write>(writer: &mut W, report: &Report) -> Result<()> {
+    let s = &report.summary;
+    writeln!(writer, "scorer-inspect — offline scorer diagnostics")?;
+    writeln!(writer, "  file:                   {}", s.file)?;
+    writeln!(writer, "  source:                 {}", s.source)?;
+    writeln!(
+        writer,
+        "  file size:              {} ({} bytes)",
+        s.file_size_human, s.file_size_bytes
+    )?;
+    writeln!(writer, "  entries:                {}", s.entry_count)?;
+    writeln!(
+        writer,
+        "  history populated:      {} ({:.1}%)",
+        s.history_populated_count, s.history_populated_pct
+    )?;
+    writeln!(writer, "  history empty:          {}", s.history_empty_count)?;
+    writeln!(writer)?;
+    writeln!(
+        writer,
+        "  min_offset_msat:        p50={}  p95={}  max={}",
+        s.min_offset_msat_p50, s.min_offset_msat_p95, s.min_offset_msat_max
+    )?;
+    writeln!(
+        writer,
+        "  max_offset_msat:        p50={}  p95={}  max={}",
+        s.max_offset_msat_p50, s.max_offset_msat_p95, s.max_offset_msat_max
+    )?;
+    writeln!(
+        writer,
+        "  history bucket weight:  p50={:.0}  p95={:.0}  max={:.0}",
+        s.total_valid_points_tracked_p50,
+        s.total_valid_points_tracked_p95,
+        s.total_valid_points_tracked_max
+    )?;
+    writeln!(writer)?;
+
+    if !report.channels.is_empty() {
+        writeln!(
+            writer,
+            "{:>20}  {:>14}  {:>14}  {:>3}  {:>16}  {:>10}",
+            "scid", "min_offset", "max_offset", "his", "history_weight", "updated"
+        )?;
+        for row in &report.channels {
+            writeln!(
+                writer,
+                "{:>20}  {:>14}  {:>14}  {:>3}  {:>16.0}  {:>10}",
+                row.scid,
+                row.min_liquidity_offset_msat,
+                row.max_liquidity_offset_msat,
+                if row.has_history { "yes" } else { "no" },
+                row.total_valid_points_tracked,
+                row.last_updated_secs,
+            )?;
+        }
+    }
+    Ok(())
+}
+
+fn render_csv<W: Write>(mut writer: W, report: &Report) -> Result<()> {
+    // Section 1: summary as a single labeled row.
+    {
+        let mut wtr = csv::Writer::from_writer(&mut writer);
+        wtr.serialize(&report.summary).context("write summary row")?;
+        wtr.flush()?;
+    }
+    if !report.channels.is_empty() {
+        // Blank line + a fresh CSV section for the channel rows so a human can scan
+        // the file in Sublime Text without confusing the two sections.
+        writer.write_all(b"\n")?;
+        let mut wtr = csv::Writer::from_writer(&mut writer);
+        for row in &report.channels {
+            wtr.serialize(row).context("write channel row")?;
+        }
+        wtr.flush()?;
+    }
+    Ok(())
+}
+
+fn render_json<W: Write>(writer: W, report: &Report) -> Result<()> {
+    serde_json::to_writer_pretty(writer, report).context("write json")?;
+    Ok(())
+}
+
+fn main() -> Result<()> {
+    let cli = Cli::parse();
+
+    let bytes = fs::read(&cli.file)
+        .with_context(|| format!("read scorer file {}", cli.file.display()))?;
+    let file_size = bytes.len() as u64;
+
+    let mut cursor = Cursor::new(&bytes);
+    let liquidities = ChannelLiquidities::read(&mut cursor)
+        .map_err(|e| anyhow::anyhow!("decode ChannelLiquidities: {:?}", e))?;
+    let mut diags = liquidities.diagnostics();
+
+    let summary = build_summary(&cli.file, cli.source, file_size, &diags);
+
+    sort_diagnostics(&mut diags, cli.sort);
+    let limit = if cli.all { diags.len() } else { cli.top.unwrap_or(20) };
+    let channels: Vec<ChannelRow> = diags
+        .into_iter()
+        .take(limit)
+        .map(|d| ChannelRow {
+            scid: d.scid,
+            min_liquidity_offset_msat: d.min_liquidity_offset_msat,
+            max_liquidity_offset_msat: d.max_liquidity_offset_msat,
+            has_history: d.has_history,
+            total_valid_points_tracked: d.total_valid_points_tracked,
+            last_updated_secs: d.last_updated_secs,
+            offset_history_last_updated_secs: d.offset_history_last_updated_secs,
+            last_datapoint_time_secs: d.last_datapoint_time_secs,
+        })
+        .collect();
+
+    let report = Report { summary, channels };
+
+    match cli.save {
+        Some(path) => {
+            let f = fs::File::create(&path)
+                .with_context(|| format!("create output file {}", path.display()))?;
+            let mut buf = BufWriter::new(f);
+            match cli.output {
+                OutputFormat::Text => render_text(&mut buf, &report)?,
+                OutputFormat::Csv => render_csv(&mut buf, &report)?,
+                OutputFormat::Json => render_json(&mut buf, &report)?,
+            }
+            buf.flush()?;
+            eprintln!("wrote {} ({})", path.display(), format_size(file_size, BINARY));
+        }
+        None => {
+            let stdout = std::io::stdout();
+            let mut handle = stdout.lock();
+            match cli.output {
+                OutputFormat::Text => render_text(&mut handle, &report)?,
+                OutputFormat::Csv => render_csv(&mut handle, &report)?,
+                OutputFormat::Json => render_json(&mut handle, &report)?,
+            }
+            handle.flush()?;
+        }
+    }
+
+    Ok(())
+}