Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions native/shared/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub mod postfx;
pub mod custom_shaders;
pub mod staging;
pub mod profiler;
pub mod sdf_cache;
// Jolt C ABI + Rust wrapper live on native only. On wasm32 the web crate
// routes bloom_physics_* calls through wasm_bindgen to JoltPhysics.js;
// no Rust-side Jolt integration is needed.
Expand Down
128 changes: 120 additions & 8 deletions native/shared/src/renderer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1055,6 +1055,13 @@ pub struct Renderer {
pub sdf_bake_pipeline: wgpu::ComputePipeline,
pub sdf_bake_layout: wgpu::BindGroupLayout,
pub sdf_bake_uniform: wgpu::Buffer,
/// Ticket 022 — staging buffers awaiting readback so freshly-baked
/// per-mesh SDFs can be written to the disk cache. Populated by
/// `bake_pending_sdfs` (one entry per dispatch); drained by
/// `flush_sdf_cache_writes` after the frame's main submit, which
/// maps each buffer, copies bytes to the cache file, and drops it.
/// Empty on cache-hit frames and after cold launch finishes.
sdf_cache_writes: Vec<(crate::sdf_cache::MeshHash, wgpu::Buffer)>,

// --- Ticket 014 V2: scene-wide SDF clipmap ---
pub scene_sdf_clipmap_tex: wgpu::Texture,
Expand Down Expand Up @@ -6040,6 +6047,7 @@ impl Renderer {
sdf_bake_pipeline,
sdf_bake_layout,
sdf_bake_uniform,
sdf_cache_writes: Vec::new(),
scene_sdf_clipmap_tex,
scene_sdf_clipmap_view,
scene_sdf_clipmap_built: false,
Expand Down Expand Up @@ -7183,6 +7191,13 @@ impl Renderer {
/// per-frame budget; expensive workload (O(voxels × triangles)
/// per mesh), so the rate-limit keeps first-frame stutter
/// bounded. Static scenes amortise and never re-bake.
///
/// Ticket 022 — after each dispatch, encode a copy_texture_to_buffer
/// against a fresh staging buffer and stash (hash, buffer) on
/// `sdf_cache_writes`. The frame's main submit picks up the copies
/// alongside the bake; `flush_sdf_cache_writes` then maps and
/// persists each buffer to the on-disk cache so the next launch
/// hits the load path in scene.rs and skips the bake entirely.
fn bake_pending_sdfs(
&mut self,
scene: &mut crate::scene::SceneGraph,
Expand All @@ -7196,18 +7211,21 @@ impl Renderer {
let pending: Vec<f64> = scene.pending_sdf_bakes.drain(..take).collect();

for handle in pending {
let (sdf_view, vb_ptr, ib_ptr, bmin, bmax, index_count) = {
let (sdf_tex, sdf_view, vb_ptr, ib_ptr, bmin, bmax, index_count, mesh_hash) = {
let Some(node) = scene.nodes.get(handle) else { continue; };
let Some(sdf_tex) = node.mesh_sdf.as_ref() else { continue; };
let Some(sdf_view) = node.mesh_sdf_view.as_ref() else { continue; };
let Some(vb) = node.gpu_vb.as_ref() else { continue; };
let Some(ib) = node.gpu_ib.as_ref() else { continue; };
(
sdf_tex.clone(),
sdf_view.clone(),
vb.clone(),
ib.clone(),
node.bounds_min,
node.bounds_max,
node.gpu_index_count,
node.mesh_hash,
)
};
if index_count == 0 {
Expand All @@ -7234,13 +7252,95 @@ impl Renderer {
wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(&sdf_view) },
],
});
let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
label: Some("sdf_bake_pass"),
timestamp_writes: None,
});
pass.set_pipeline(&self.sdf_bake_pipeline);
pass.set_bind_group(0, &bg, &[]);
pass.dispatch_workgroups(MESH_SDF_RES / 4, MESH_SDF_RES / 4, MESH_SDF_RES / 4);
{
let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
label: Some("sdf_bake_pass"),
timestamp_writes: None,
});
pass.set_pipeline(&self.sdf_bake_pipeline);
pass.set_bind_group(0, &bg, &[]);
pass.dispatch_workgroups(MESH_SDF_RES / 4, MESH_SDF_RES / 4, MESH_SDF_RES / 4);
}

// Ticket 022 — schedule a readback against the freshly-baked
// texture so the next launch can skip the bake. We only do
// this when scene.rs computed a hash (it always does, but
// skip defensively); padded staging size is bound by
// wgpu's COPY_BYTES_PER_ROW alignment.
if let Some(hash) = mesh_hash {
let row_padded = ((MESH_SDF_RES * 4 + 255) & !255) as u64;
let staging = self.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("sdf_cache_readback"),
size: row_padded * (MESH_SDF_RES as u64) * (MESH_SDF_RES as u64),
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
mapped_at_creation: false,
});
encoder.copy_texture_to_buffer(
wgpu::TexelCopyTextureInfo {
texture: &sdf_tex,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
wgpu::TexelCopyBufferInfo {
buffer: &staging,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(row_padded as u32),
rows_per_image: Some(MESH_SDF_RES),
},
},
wgpu::Extent3d {
width: MESH_SDF_RES,
height: MESH_SDF_RES,
depth_or_array_layers: MESH_SDF_RES,
},
);
self.sdf_cache_writes.push((hash, staging));
}
}
}

/// Ticket 022 — drain pending SDF cache writes after the frame's
/// main submit. Maps each staging buffer in one pass (single
/// `device.poll(Wait)` covers all of them), unpads the row-aligned
/// payload back to the tightly-packed on-disk layout, and writes
/// to the cache. Best-effort throughout: a write failure is
/// silently ignored — the next cold launch just rebakes.
pub fn flush_sdf_cache_writes(&mut self) {
if self.sdf_cache_writes.is_empty() { return; }
let entries = std::mem::take(&mut self.sdf_cache_writes);

// Issue map_async on every buffer up front so a single poll
// resolves all of them — much cheaper than serially polling
// per buffer when 8 cold-launch bakes complete in one frame.
for (_, buf) in &entries {
let slice = buf.slice(..);
slice.map_async(wgpu::MapMode::Read, |_| { /* polled below */ });
}
let _ = self.device.poll(wgpu::PollType::Wait { submission_index: None, timeout: None });

let row_tight = (MESH_SDF_RES * 4) as usize;
let row_padded = ((MESH_SDF_RES * 4 + 255) & !255) as usize;
let res = MESH_SDF_RES as usize;

for (hash, buf) in entries {
let slice = buf.slice(..);
let data = slice.get_mapped_range();
// Strip the wgpu-required row padding back to a tight
// 32³ × 4-byte payload before storing.
let mut tight = vec![0u8; res * res * row_tight];
for z in 0..res {
for y in 0..res {
let src_off = (z * res + y) * row_padded;
let dst_off = (z * res + y) * row_tight;
tight[dst_off..dst_off + row_tight]
.copy_from_slice(&data[src_off..src_off + row_tight]);
}
}
drop(data);
buf.unmap();
let _ = crate::sdf_cache::store(hash, &tight);
}
}

Expand Down Expand Up @@ -11363,6 +11463,18 @@ impl Renderer {
profiler.end("queue_submit");
}

// Ticket 022 — drain freshly-baked SDFs to the on-disk cache.
// No-op on cache-hit frames (queue is empty); on cold-launch
// bake frames it blocks briefly on a single device.poll(Wait)
// covering all 8 readbacks. Skipped on wasm32 (no filesystem
// path, sdf_cache::store returns Err immediately).
#[cfg(not(target_arch = "wasm32"))]
{
profiler.begin("sdf_cache_write");
self.flush_sdf_cache_writes();
profiler.end("sdf_cache_write");
}

#[cfg(target_arch = "wasm32")]
{
self.queue.submit(std::iter::once(encoder.finish()));
Expand Down
69 changes: 68 additions & 1 deletion native/shared/src/scene.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ pub struct SceneNode {
/// `None` on non-RT-capable adapters or until the bake lands.
pub mesh_sdf: Option<wgpu::Texture>,
pub mesh_sdf_view: Option<wgpu::TextureView>,
/// Content hash of (positions, indices) computed at upload time.
/// Set whenever `mesh_sdf` exists; the renderer reads it back when
/// flushing cache writes after a fresh bake. `None` until the
/// first geo upload — and on non-RT-capable adapters that never
/// allocate a per-mesh SDF.
pub mesh_hash: Option<crate::sdf_cache::MeshHash>,
/// Flat mesh-average world-space normal, cached on BLAS build so
/// the per-instance GI data buffer can be populated without
/// re-reading the vertex array. Rough heuristic — for walls and
Expand Down Expand Up @@ -185,6 +191,7 @@ impl SceneNode {
card_dynamic: false,
mesh_sdf: None,
mesh_sdf_view: None,
mesh_hash: None,
flat_normal_ws: [0.0, 1.0, 0.0],
flat_albedo: [1.0, 1.0, 1.0],
uniform_slot: None,
Expand Down Expand Up @@ -752,9 +759,69 @@ impl SceneGraph {
device,
"scene_node_sdf",
);

// Ticket 022 — content-hash the geometry and
// try the on-disk SDF cache before scheduling
// a GPU bake. Vertex layout is interleaved;
// pull the position prefix out as a
// [[f32; 3]] slice so the hash only sees
// geometry-relevant bytes.
let positions: Vec<[f32; 3]> =
node.vertices.iter().map(|v| v.position).collect();
let hash = crate::sdf_cache::compute_mesh_hash(
&positions, &node.indices,
);
node.mesh_hash = Some(hash);

if let Some(bytes) = crate::sdf_cache::load(hash) {
// Cache hit — pad the tightly-packed
// 128 B/row payload to 256 B/row so it
// clears wgpu's COPY_BYTES_PER_ROW
// alignment, then upload directly and
// skip the bake. Native cache size
// stays compact (128 KB/mesh on disk);
// the 128 KB padding allocation is
// free'd immediately after the call.
const RES: u32 = crate::sdf_cache::VOXEL_RES;
let row_tight = (RES * 4) as usize;
let row_padded = ((row_tight + 255) & !255) as u32;
let mut padded = vec![
0u8;
(row_padded as usize) * (RES as usize) * (RES as usize)
];
for z in 0..RES as usize {
for y in 0..RES as usize {
let src_off = (z * RES as usize + y) * row_tight;
let dst_off = (z * RES as usize + y) * row_padded as usize;
padded[dst_off..dst_off + row_tight]
.copy_from_slice(&bytes[src_off..src_off + row_tight]);
}
}
queue.write_texture(
wgpu::TexelCopyTextureInfo {
texture: &sdf_tex,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
&padded,
wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(row_padded),
rows_per_image: Some(RES),
},
wgpu::Extent3d {
width: RES,
height: RES,
depth_or_array_layers: RES,
},
);
} else {
pending_sdf.push(handle);
}

node.mesh_sdf = Some(sdf_tex);
node.mesh_sdf_view = Some(sdf_view);
pending_sdf.push(handle);
}
}
}
Expand Down
Loading
Loading