From e6446647efc786c19e86e78e53227b3942ec2890 Mon Sep 17 00:00:00 2001 From: Ovi Trif Date: Tue, 28 Apr 2026 09:43:02 +0200 Subject: [PATCH] feat: add scorer-inspect crate with text/csv/json output scorer-inspect is a small standalone CLI for offline diagnostics of LDK serialized scorer files - both the `latest.bin` style payloads served at URLs like api.blocktank.to/scorer-prod or scores.zeusln.com/latest.bin and the bytes returned by Node::export_pathfinding_scores. Both wire formats are identical in current LDK (ProbabilisticScorer::write just delegates to ChannelLiquidities::write), so the parser is a single ChannelLiquidities::read; the --source flag is metadata only. Output is configurable as plain text (default), CSV, or pretty-printed JSON, optionally written to disk via --save. The summary section reports entry count, history-populated percentage, and offset / bucket weight distributions; the per-channel section can be limited via --top or fully dumped via --all, and sorted by narrowest offset window, recency, or historical bucket weight. Depends on the diagnostics() accessors added to ChannelLiquidities and ProbabilisticScorer in the synonymdev/rust-lightning fork. The ldk-node [patch.crates-io] block is unchanged here; the rust-lightning PR must land first and the rev pin must be bumped before this branch can merge. For local development of this crate, point [patch.crates-io] at a checkout of synonymdev/rust-lightning that has the diagnostics accessors applied. --- Cargo.toml | 2 +- crates/scorer-inspect/Cargo.toml | 16 ++ crates/scorer-inspect/README.md | 69 +++++++ crates/scorer-inspect/src/main.rs | 332 ++++++++++++++++++++++++++++++ 4 files changed, 418 insertions(+), 1 deletion(-) create mode 100644 crates/scorer-inspect/Cargo.toml create mode 100644 crates/scorer-inspect/README.md create mode 100644 crates/scorer-inspect/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index fc45137fe..282f4402b 100755 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = [".", "crates/bdk-wallet-aggregate"] +members = [".", "crates/bdk-wallet-aggregate", "crates/scorer-inspect"] exclude = ["bindings/uniffi-bindgen"] [package] diff --git a/crates/scorer-inspect/Cargo.toml b/crates/scorer-inspect/Cargo.toml new file mode 100644 index 000000000..971e59144 --- /dev/null +++ b/crates/scorer-inspect/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "scorer-inspect" +version = "0.1.0" +edition = "2021" +rust-version = "1.85" +description = "Offline diagnostics CLI for LDK ProbabilisticScorer / ChannelLiquidities files." +license = "MIT OR Apache-2.0" + +[dependencies] +anyhow = "1" +clap = { version = "4", features = ["derive"] } +csv = "1" +humansize = "2" +lightning = { version = "0.2.0", features = ["std"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" diff --git a/crates/scorer-inspect/README.md b/crates/scorer-inspect/README.md new file mode 100644 index 000000000..e8cb21d17 --- /dev/null +++ b/crates/scorer-inspect/README.md @@ -0,0 +1,69 @@ +# scorer-inspect + +Offline diagnostics CLI for LDK serialized scorer files. + +Reads a binary file produced by either: +- LDK's external scorer / `ProbabilisticScorer::write` (e.g. `https://api.blocktank.to/scorer-prod`, `https://scores.zeusln.com/latest.bin`), or +- ldk-node's `Node::export_pathfinding_scores`. + +Both wire formats are identical in current LDK — `ProbabilisticScorer::write` just calls `self.channel_liquidities.write(w)`. The tool always reads via `ChannelLiquidities::read` and the `--source` flag is purely metadata for the report. + +## Build + +``` +cargo build -p scorer-inspect --release +``` + +## Usage + +``` +scorer-inspect + [--source served|exported] + [--output text|csv|json] + [--save PATH] + [--top N] + [--all] + [--sort narrow|recent|history] +``` + +- `--source` — annotates the report; doesn't affect parsing. +- `--output text` (default) — short summary + top-N channel rows for human review. +- `--output csv` — summary row, blank line, then one row per channel. Editor-friendly. +- `--output json` — `{ summary: {...}, channels: [...] }`. Machine-readable. +- `--save PATH` — write to file instead of stdout. +- `--top N` (default 20) — limit channel rows. +- `--all` — dump every entry; overrides `--top`. +- `--sort` — `narrow` (smallest offset window first; highest information density), `recent` (most recently updated first), `history` (largest historical-bucket weight first; most probe-derived signal). + +## Examples + +``` +# Quick eyeball of Zeus's served file +curl -o /tmp/zeus.bin https://scores.zeusln.com/latest.bin +scorer-inspect /tmp/zeus.bin --source served + +# Full per-channel CSV diff between Zeus and Bitkit +curl -o /tmp/blocktank.bin https://api.blocktank.to/scorer-prod +scorer-inspect /tmp/zeus.bin --source served --all --output csv --save /tmp/zeus.csv +scorer-inspect /tmp/blocktank.bin --source served --all --output csv --save /tmp/blocktank.csv +``` + +## What the columns mean + +- `min_liquidity_offset_msat` / `max_liquidity_offset_msat` — non-directional offsets relative to the channel's node ordering. Resolving them into directional `min_liquidity_sat` / `max_liquidity_sat` requires a `NetworkGraph` (capacity + node-id ordering), which this tool doesn't yet take. +- `has_history` — whether either historical-bucket array is non-zero. The single best signal for distinguishing probe-derived data from synthetic graph seeding. +- `total_valid_points_tracked` (CSV/JSON), `history_weight` (text) — LDK-internal scalar weight summarizing the historical bucket distribution. Stored as `f64`; not an integer payment count. +- `last_updated_secs` — seconds since the unix epoch when either liquidity bound was last modified. + +## Distinguishing rich probe data from synthetic seeding + +A scorer file can be large for two very different reasons: + +- **Probe-rich**: many entries with `has_history=true`, narrow `[min_offset, max_offset]` windows, sizable `total_valid_points_tracked`. This is what real probing produces. +- **Synthetic-coverage**: many entries with `has_history=false`, `min_offset=0`, `max_offset` close to channel capacity, zero bucket weights. This is what gossip-graph seeding produces — big file, low pathfinding signal. + +Compare the two by running the tool against any scorer file and checking the `history populated` percentage and the offset-window distribution. + +## Limits + +- v1 is graph-free: directional output and capacity-resolved sats are not available without a `NetworkGraph`. Adding `--graph PATH` is tracked as future work. diff --git a/crates/scorer-inspect/src/main.rs b/crates/scorer-inspect/src/main.rs new file mode 100644 index 000000000..677c95f2c --- /dev/null +++ b/crates/scorer-inspect/src/main.rs @@ -0,0 +1,332 @@ +//! Offline diagnostics for LDK serialized scorer files. +//! +//! Reads a binary file produced by either: +//! - LDK's external scorer / `ProbabilisticScorer::write`, or +//! - ldk-node's `Node::export_pathfinding_scores`. +//! Both are wire-compatible — they both serialize a `ChannelLiquidities` — +//! so this tool always reads via `ChannelLiquidities::read`. The `--source` +//! flag is purely metadata for the report. + +use std::fs; +use std::io::{BufWriter, Write}; +use std::path::PathBuf; + +use lightning::io::Cursor; + +use anyhow::{Context, Result}; +use clap::{Parser, ValueEnum}; +use humansize::{format_size, BINARY}; +use lightning::routing::scoring::{ChannelLiquidities, ChannelLiquidityDiagnostic}; +use lightning::util::ser::Readable; +use serde::Serialize; + +#[derive(Copy, Clone, Debug, ValueEnum)] +enum Source { + /// File came from a remote URL such as `https://api.blocktank.to/scorer-prod` + /// or `https://scores.zeusln.com/latest.bin`. + Served, + /// File came from `Node::export_pathfinding_scores`. + Exported, +} + +#[derive(Copy, Clone, Debug, ValueEnum)] +enum OutputFormat { + Text, + Csv, + Json, +} + +#[derive(Copy, Clone, Debug, ValueEnum)] +enum Sort { + /// Smallest `[min_offset, max_offset]` window first (highest-information entries). + Narrow, + /// Most recently updated first. + Recent, + /// Highest historical-bucket weight first (most probe-derived signal). + History, +} + +#[derive(Parser, Debug)] +#[command( + name = "scorer-inspect", + about = "Offline diagnostics for LDK ProbabilisticScorer / ChannelLiquidities files", + long_about = None, +)] +struct Cli { + /// Path to a binary scorer file. + file: PathBuf, + /// Metadata tag for the report. The wire format is identical for both today. + #[arg(long, value_enum, default_value_t = Source::Served)] + source: Source, + /// Output format. + #[arg(long, value_enum, default_value_t = OutputFormat::Text)] + output: OutputFormat, + /// Write output to a file instead of stdout. + #[arg(long)] + save: Option, + /// Limit per-channel rows to this number. Ignored if `--all` is set. + #[arg(long)] + top: Option, + /// Dump every entry. Overrides `--top`. + #[arg(long, default_value_t = false)] + all: bool, + /// Sort key for per-channel rows. + #[arg(long, value_enum, default_value_t = Sort::Narrow)] + sort: Sort, +} + +#[derive(Debug, Serialize)] +struct Summary { + file: String, + source: String, + file_size_bytes: u64, + file_size_human: String, + entry_count: usize, + history_populated_count: usize, + history_empty_count: usize, + history_populated_pct: f64, + min_offset_msat_p50: u64, + min_offset_msat_p95: u64, + min_offset_msat_max: u64, + max_offset_msat_p50: u64, + max_offset_msat_p95: u64, + max_offset_msat_max: u64, + total_valid_points_tracked_p50: f64, + total_valid_points_tracked_p95: f64, + total_valid_points_tracked_max: f64, +} + +#[derive(Debug, Serialize)] +struct ChannelRow { + scid: u64, + min_liquidity_offset_msat: u64, + max_liquidity_offset_msat: u64, + has_history: bool, + total_valid_points_tracked: f64, + last_updated_secs: u64, + offset_history_last_updated_secs: u64, + last_datapoint_time_secs: u64, +} + +#[derive(Debug, Serialize)] +struct Report { + summary: Summary, + channels: Vec, +} + +fn percentile_u64(sorted: &[u64], p: f64) -> u64 { + if sorted.is_empty() { + return 0; + } + let idx = ((sorted.len() as f64 - 1.0) * p).round() as usize; + sorted[idx.min(sorted.len() - 1)] +} + +fn percentile_f64(sorted: &[f64], p: f64) -> f64 { + if sorted.is_empty() { + return 0.0; + } + let idx = ((sorted.len() as f64 - 1.0) * p).round() as usize; + sorted[idx.min(sorted.len() - 1)] +} + +fn build_summary( + file_path: &PathBuf, source: Source, file_size: u64, + diags: &[ChannelLiquidityDiagnostic], +) -> Summary { + let entry_count = diags.len(); + let history_populated_count = diags.iter().filter(|d| d.has_history).count(); + let history_empty_count = entry_count - history_populated_count; + let history_populated_pct = if entry_count == 0 { + 0.0 + } else { + 100.0 * history_populated_count as f64 / entry_count as f64 + }; + + let mut min_offsets: Vec = diags.iter().map(|d| d.min_liquidity_offset_msat).collect(); + let mut max_offsets: Vec = diags.iter().map(|d| d.max_liquidity_offset_msat).collect(); + let mut weights: Vec = + diags.iter().map(|d| d.total_valid_points_tracked).collect(); + min_offsets.sort_unstable(); + max_offsets.sort_unstable(); + weights.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); + + Summary { + file: file_path.display().to_string(), + source: format!("{:?}", source).to_lowercase(), + file_size_bytes: file_size, + file_size_human: format_size(file_size, BINARY), + entry_count, + history_populated_count, + history_empty_count, + history_populated_pct, + min_offset_msat_p50: percentile_u64(&min_offsets, 0.50), + min_offset_msat_p95: percentile_u64(&min_offsets, 0.95), + min_offset_msat_max: min_offsets.last().copied().unwrap_or(0), + max_offset_msat_p50: percentile_u64(&max_offsets, 0.50), + max_offset_msat_p95: percentile_u64(&max_offsets, 0.95), + max_offset_msat_max: max_offsets.last().copied().unwrap_or(0), + total_valid_points_tracked_p50: percentile_f64(&weights, 0.50), + total_valid_points_tracked_p95: percentile_f64(&weights, 0.95), + total_valid_points_tracked_max: weights.last().copied().unwrap_or(0.0), + } +} + +fn sort_diagnostics(diags: &mut [ChannelLiquidityDiagnostic], by: Sort) { + match by { + Sort::Narrow => diags.sort_by_key(|d| { + d.max_liquidity_offset_msat.saturating_sub(d.min_liquidity_offset_msat) + }), + Sort::Recent => diags.sort_by(|a, b| b.last_updated_secs.cmp(&a.last_updated_secs)), + Sort::History => diags.sort_by(|a, b| { + b.total_valid_points_tracked + .partial_cmp(&a.total_valid_points_tracked) + .unwrap_or(std::cmp::Ordering::Equal) + }), + } +} + +fn render_text(writer: &mut W, report: &Report) -> Result<()> { + let s = &report.summary; + writeln!(writer, "scorer-inspect — offline scorer diagnostics")?; + writeln!(writer, " file: {}", s.file)?; + writeln!(writer, " source: {}", s.source)?; + writeln!( + writer, + " file size: {} ({} bytes)", + s.file_size_human, s.file_size_bytes + )?; + writeln!(writer, " entries: {}", s.entry_count)?; + writeln!( + writer, + " history populated: {} ({:.1}%)", + s.history_populated_count, s.history_populated_pct + )?; + writeln!(writer, " history empty: {}", s.history_empty_count)?; + writeln!(writer)?; + writeln!( + writer, + " min_offset_msat: p50={} p95={} max={}", + s.min_offset_msat_p50, s.min_offset_msat_p95, s.min_offset_msat_max + )?; + writeln!( + writer, + " max_offset_msat: p50={} p95={} max={}", + s.max_offset_msat_p50, s.max_offset_msat_p95, s.max_offset_msat_max + )?; + writeln!( + writer, + " history bucket weight: p50={:.0} p95={:.0} max={:.0}", + s.total_valid_points_tracked_p50, + s.total_valid_points_tracked_p95, + s.total_valid_points_tracked_max + )?; + writeln!(writer)?; + + if !report.channels.is_empty() { + writeln!( + writer, + "{:>20} {:>14} {:>14} {:>3} {:>16} {:>10}", + "scid", "min_offset", "max_offset", "his", "history_weight", "updated" + )?; + for row in &report.channels { + writeln!( + writer, + "{:>20} {:>14} {:>14} {:>3} {:>16.0} {:>10}", + row.scid, + row.min_liquidity_offset_msat, + row.max_liquidity_offset_msat, + if row.has_history { "yes" } else { "no" }, + row.total_valid_points_tracked, + row.last_updated_secs, + )?; + } + } + Ok(()) +} + +fn render_csv(mut writer: W, report: &Report) -> Result<()> { + // Section 1: summary as a single labeled row. + { + let mut wtr = csv::Writer::from_writer(&mut writer); + wtr.serialize(&report.summary).context("write summary row")?; + wtr.flush()?; + } + if !report.channels.is_empty() { + // Blank line + a fresh CSV section for the channel rows so a human can scan + // the file in Sublime Text without confusing the two sections. + writer.write_all(b"\n")?; + let mut wtr = csv::Writer::from_writer(&mut writer); + for row in &report.channels { + wtr.serialize(row).context("write channel row")?; + } + wtr.flush()?; + } + Ok(()) +} + +fn render_json(writer: W, report: &Report) -> Result<()> { + serde_json::to_writer_pretty(writer, report).context("write json")?; + Ok(()) +} + +fn main() -> Result<()> { + let cli = Cli::parse(); + + let bytes = fs::read(&cli.file) + .with_context(|| format!("read scorer file {}", cli.file.display()))?; + let file_size = bytes.len() as u64; + + let mut cursor = Cursor::new(&bytes); + let liquidities = ChannelLiquidities::read(&mut cursor) + .map_err(|e| anyhow::anyhow!("decode ChannelLiquidities: {:?}", e))?; + let mut diags = liquidities.diagnostics(); + + let summary = build_summary(&cli.file, cli.source, file_size, &diags); + + sort_diagnostics(&mut diags, cli.sort); + let limit = if cli.all { diags.len() } else { cli.top.unwrap_or(20) }; + let channels: Vec = diags + .into_iter() + .take(limit) + .map(|d| ChannelRow { + scid: d.scid, + min_liquidity_offset_msat: d.min_liquidity_offset_msat, + max_liquidity_offset_msat: d.max_liquidity_offset_msat, + has_history: d.has_history, + total_valid_points_tracked: d.total_valid_points_tracked, + last_updated_secs: d.last_updated_secs, + offset_history_last_updated_secs: d.offset_history_last_updated_secs, + last_datapoint_time_secs: d.last_datapoint_time_secs, + }) + .collect(); + + let report = Report { summary, channels }; + + match cli.save { + Some(path) => { + let f = fs::File::create(&path) + .with_context(|| format!("create output file {}", path.display()))?; + let mut buf = BufWriter::new(f); + match cli.output { + OutputFormat::Text => render_text(&mut buf, &report)?, + OutputFormat::Csv => render_csv(&mut buf, &report)?, + OutputFormat::Json => render_json(&mut buf, &report)?, + } + buf.flush()?; + eprintln!("wrote {} ({})", path.display(), format_size(file_size, BINARY)); + } + None => { + let stdout = std::io::stdout(); + let mut handle = stdout.lock(); + match cli.output { + OutputFormat::Text => render_text(&mut handle, &report)?, + OutputFormat::Csv => render_csv(&mut handle, &report)?, + OutputFormat::Json => render_json(&mut handle, &report)?, + } + handle.flush()?; + } + } + + Ok(()) +}