From 12e5db6396919fa4f783c751aaf7a4ef96bcea84 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 21 May 2026 17:09:38 +0000 Subject: [PATCH] feat(lance-graph-ontology): seed NamespaceRegistry with bO-* upstream vocabs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Companion to PR #407 (merged). Expands `NamespaceRegistry::seed_defaults` from 16 to 29 entries, registering the 13 external vocabularies that PR #407 added hydrators for. This is the O(1) IRI ↔ context_id matching table backed by `lance_cache.rs`'s Lance dataset; consumers like smb-office-rs and woa-rs lookup by namespace shortname instead of hand-rolling slot constants. Why this lives in lance-graph-ontology, not in OGIT: - Public OWL/RDF source files stay pristine in data/ontologies/ (DOLCE+DUL, FIBO-FND/BE, OWL-Time, PROV-O, QUDT, schema.org, SKOS, ZUGFeRD CII XSDs + Schematron). Modifying them taints downstream use. - The OGIT repo is authoritative for namespace registrations but adding new TTL files there with hand-picked contextIds would be drift. - The matching table belongs in the CLIENT (lance-graph-ontology), keyed by namespace shortname, persisted via the existing lance_cache layer. - Per user direction 2026-05-21: "expand always but drift is probably bad" + "deinterlace them locally and keep that matching table in a lance table for O(1) and check what lance-graph-ontology has in regards" → expansion lives here, OGIT untouched. Allocation: ID Namespace PR / Hydrator ───────────────────────────────────────────────────────── 0 SMB (pre-existing) 1 WorkOrder (pre-existing) 2 Healthcare (pre-existing) 3 Network (pre-existing) 4 EmailCorrespondance (pre-existing) 5 SharePoint (pre-existing) 10-19 Medical/ (pre-existing, dense) 20 Foundation/DOLCE-DUL bO-1 hydrate_dolce 21 Foundation/OWL-Time bO-2 hydrate_owltime 22 Foundation/PROV-O bO-3 hydrate_provo 23 Foundation/QUDT bO-4 hydrate_qudt 24 Foundation/schema-org bO-8 hydrate_schemaorg 25 Foundation/SKOS bO-5 hydrate_skos 30 FinancialAccounting/FIBO-FND bO-6 hydrate_fibo_fnd 31 FinancialAccounting/FIBO-BE bO-7 hydrate_fibo_be 32 FinancialAccounting/ZUGFeRD bO-16 hydrate_zugferd 33 FinancialAccounting/ZUGFeRD-Rules bO-15 hydrate_zugferd_rules 34 FinancialAccounting/SKR03 bO-13 hydrate_skr03 35 FinancialAccounting/SKR04 bO-13 hydrate_skr04 36 FinancialAccounting/SKR03-Bau bO-13 hydrate_skr03_bau Allocation policy matches the existing Medical/ pattern: dense within family-range, gaps between ranges left as expansion room. `allocate()` continues to fill gaps 6..=9 and 26..=29 first, then 37+. Notes: - `next_free_id` doc-comment updated to reflect the new seed layout. First dynamic id is now 6 (was already 6 in practice; the prior comment said "20" which was off by 14). - Three regression tests updated: * `seed_defaults_has_sixteen_entries` → `_has_twenty_nine_entries` * `seed_defaults_assigns_canonical_ids` adds spot-checks at 20/25/30/34/35/36 * `allocate_skips_to_first_unused_id` len assertion 16 → 29 - One integration test (`tests/context_id_test.rs`) updated to match. All 116 lance-graph-ontology tests pass; clippy clean (5 pre-existing oxrdf deprecation warnings, no new); downstream consumers (callcenter, consumer-conformance, cognitive-shader-driver) build clean. --- .../src/namespace_registry.rs | 54 ++++++++++++++++--- .../tests/context_id_test.rs | 19 +++++-- 2 files changed, 62 insertions(+), 11 deletions(-) diff --git a/crates/lance-graph-ontology/src/namespace_registry.rs b/crates/lance-graph-ontology/src/namespace_registry.rs index ca209c4b..d4e7adaa 100644 --- a/crates/lance-graph-ontology/src/namespace_registry.rs +++ b/crates/lance-graph-ontology/src/namespace_registry.rs @@ -87,7 +87,7 @@ impl NamespaceRegistry { /// - `lance-graph-callcenter::hydration::parse_super_domain_name` /// - OQ-4 resolution in PR #366 / EPIPHANIES 2026-05-13 sprint-7 meta entry pub fn seed_defaults() -> Self { - let mut ids = HashMap::with_capacity(16); + let mut ids = HashMap::with_capacity(29); // Live cognitive namespaces. ids.insert("SMB".to_string(), 0); // export-only per v5 ratification ids.insert("WorkOrder".to_string(), 1); @@ -108,6 +108,26 @@ impl NamespaceRegistry { ids.insert("Medical/HPO".to_string(), 17); ids.insert("Medical/DRON".to_string(), 18); ids.insert("Medical/CHEBI".to_string(), 19); + // Foundation/ reserved range 20..=29 (PR-bO-1..bO-5, bO-8). + // L1 upper ontology + L2 utility vocabularies hydrated by + // lance-graph-ontology::hydrators. Public OWL/RDF sources kept + // pristine in data/ontologies/; this registry is the local + // IRI ↔ context_id matching table (O(1) via lance_cache). + ids.insert("Foundation/DOLCE-DUL".to_string(), 20); + ids.insert("Foundation/OWL-Time".to_string(), 21); + ids.insert("Foundation/PROV-O".to_string(), 22); + ids.insert("Foundation/QUDT".to_string(), 23); + ids.insert("Foundation/schema-org".to_string(), 24); + ids.insert("Foundation/SKOS".to_string(), 25); + // FinancialAccounting/ reserved range 30..=39 + // (PR-bO-6, bO-7, bO-13, bO-15, bO-16). + ids.insert("FinancialAccounting/FIBO-FND".to_string(), 30); + ids.insert("FinancialAccounting/FIBO-BE".to_string(), 31); + ids.insert("FinancialAccounting/ZUGFeRD".to_string(), 32); + ids.insert("FinancialAccounting/ZUGFeRD-Rules".to_string(), 33); + ids.insert("FinancialAccounting/SKR03".to_string(), 34); + ids.insert("FinancialAccounting/SKR04".to_string(), 35); + ids.insert("FinancialAccounting/SKR03-Bau".to_string(), 36); Self { ids } } @@ -147,9 +167,12 @@ impl NamespaceRegistry { self.ids.iter().map(|(k, v)| (k.as_str(), *v)) } - /// First context id that is not currently in use. Skips the seed ranges - /// to keep allocations dense within their family (v1 ids 0..=3 + 10..=19 - /// occupied by `seed_defaults`; first dynamic id therefore lands at 20). + /// First context id that is not currently in use. Walks `0u32..` and + /// returns the first value not present in the registry. With the + /// current `seed_defaults` (16 cognitive + 13 Foundation/FinancialAccounting + /// entries), the seed occupies 0..=5 + 10..=19 + 20..=25 + 30..=36; + /// the first dynamic id therefore lands at 6 (next gap), then 7..=9, + /// then 26..=29, then 37+. Allocation stays dense across seed gaps. fn next_free_id(&self) -> u32 { let mut candidate: u32 = 0; let used: std::collections::BTreeSet = self.ids.values().copied().collect(); @@ -165,9 +188,15 @@ mod tests { use super::*; #[test] - fn seed_defaults_has_sixteen_entries() { + fn seed_defaults_has_twenty_nine_entries() { let r = NamespaceRegistry::seed_defaults(); - assert_eq!(r.len(), 16); + // 6 cognitive (SMB, WorkOrder, Healthcare, Network, Email, SharePoint) + // + 10 Medical/* (ICD10CM..CHEBI) + // + 6 Foundation/* (DOLCE-DUL, OWL-Time, PROV-O, QUDT, schema-org, SKOS) + // + 7 FinancialAccounting/* (FIBO-FND, FIBO-BE, ZUGFeRD, ZUGFeRD-Rules, + // SKR03, SKR04, SKR03-Bau) + // = 29 + assert_eq!(r.len(), 29); } #[test] @@ -183,17 +212,26 @@ mod tests { // Medical/ reserved range 10..=19. assert_eq!(r.get("Medical/ICD10CM"), Some(10)); assert_eq!(r.get("Medical/CHEBI"), Some(19)); + // Foundation/ reserved range 20..=29. + assert_eq!(r.get("Foundation/DOLCE-DUL"), Some(20)); + assert_eq!(r.get("Foundation/OWL-Time"), Some(21)); + assert_eq!(r.get("Foundation/SKOS"), Some(25)); + // FinancialAccounting/ reserved range 30..=39. + assert_eq!(r.get("FinancialAccounting/FIBO-FND"), Some(30)); + assert_eq!(r.get("FinancialAccounting/SKR03"), Some(34)); + assert_eq!(r.get("FinancialAccounting/SKR04"), Some(35)); + assert_eq!(r.get("FinancialAccounting/SKR03-Bau"), Some(36)); } #[test] fn allocate_skips_to_first_unused_id() { let mut r = NamespaceRegistry::seed_defaults(); - // 0..=5 and 10..=19 are taken; first free id is 6. + // Occupied: 0..=5, 10..=19, 20..=25, 30..=36. First free id is 6. let id = r.allocate("CallCenter"); assert_eq!(id, 6); // Idempotent: re-allocate returns the same id. assert_eq!(r.allocate("CallCenter"), 6); - // Next allocation skips again. + // Next allocation skips again (still in 6..=9 gap). assert_eq!(r.allocate("Splat"), 7); } diff --git a/crates/lance-graph-ontology/tests/context_id_test.rs b/crates/lance-graph-ontology/tests/context_id_test.rs index f29ca2ce..86f0b96f 100644 --- a/crates/lance-graph-ontology/tests/context_id_test.rs +++ b/crates/lance-graph-ontology/tests/context_id_test.rs @@ -64,8 +64,20 @@ fn namespace_registry_seed_defaults_assigns_canonical_v1_ids() { assert_eq!(r.get("Medical/DRON"), Some(18)); assert_eq!(r.get("Medical/CHEBI"), Some(19)); - // 16 seed mappings total (6 cognitive + 10 medical). - assert_eq!(r.len(), 16); + // Foundation reserved range 20..=29 (PR-bO-1..bO-5, bO-8). + assert_eq!(r.get("Foundation/DOLCE-DUL"), Some(20)); + assert_eq!(r.get("Foundation/SKOS"), Some(25)); + + // FinancialAccounting reserved range 30..=39 (PR-bO-6, bO-7, bO-13, bO-15, bO-16). + assert_eq!(r.get("FinancialAccounting/FIBO-FND"), Some(30)); + assert_eq!(r.get("FinancialAccounting/SKR03-Bau"), Some(36)); + + // 29 seed mappings total: + // 6 cognitive (SMB / WorkOrder / Healthcare / Network / Email / SharePoint) + // + 10 medical (ICD10CM..CHEBI) + // + 6 foundation (DOLCE-DUL, OWL-Time, PROV-O, QUDT, schema-org, SKOS) + // + 7 financial (FIBO-FND, FIBO-BE, ZUGFeRD, ZUGFeRD-Rules, SKR03, SKR04, SKR03-Bau) + assert_eq!(r.len(), 29); } #[test] @@ -88,7 +100,8 @@ fn namespace_registry_allocate_is_idempotent_and_dense() { let id2 = r.allocate("Splat"); assert_eq!(id2, 7); assert_ne!(id1, id2); - assert_eq!(r.len(), 18); + // Seed (29) + 2 new allocations = 31. + assert_eq!(r.len(), 31); } #[test]