From ca350cd8693e5bd15ad360a9601ee0c51593901d Mon Sep 17 00:00:00 2001 From: Ian Nickles Date: Thu, 30 Apr 2026 12:46:51 -0700 Subject: [PATCH] Support merging types with multiple (allOf) regex validations. --- typify-impl/src/convert.rs | 36 +++++++++- typify-impl/src/merge.rs | 37 ++++++++++- typify-impl/tests/all_of.json | 16 +++++ typify-impl/tests/all_of.out | 111 +++++++++++++++++++++++++++++++ typify-impl/tests/test_all_of.rs | 28 ++++++++ typify-test/Cargo.toml | 1 + typify-test/build.rs | 12 ++++ typify-test/src/main.rs | 28 ++++++++ 8 files changed, 267 insertions(+), 2 deletions(-) create mode 100644 typify-impl/tests/all_of.json create mode 100644 typify-impl/tests/all_of.out create mode 100644 typify-impl/tests/test_all_of.rs diff --git a/typify-impl/src/convert.rs b/typify-impl/src/convert.rs index f97b31b9..82d4873a 100644 --- a/typify-impl/src/convert.rs +++ b/typify-impl/src/convert.rs @@ -892,7 +892,41 @@ impl TypeSpace { Some(unhandled) => { info!("treating a string format '{}' as a String", unhandled); - Ok((TypeEntryDetails::String.into(), metadata)) + // Apply any pattern/length constraints even when the format + // is unrecognized. + match validation { + None + | Some(schemars::schema::StringValidation { + max_length: None, + min_length: None, + pattern: None, + }) => Ok((TypeEntryDetails::String.into(), metadata)), + + Some(validation) => { + if let Some(pattern) = &validation.pattern { + let _ = + regress::Regex::new(pattern).map_err(|e| Error::InvalidSchema { + type_name: type_name.clone().into_option(), + reason: format!("invalid pattern '{}' {}", pattern, e), + })?; + self.uses_regress = true; + } + + let string = TypeEntryDetails::String.into(); + let type_id = self.assign_type(string); + Ok(( + TypeEntryNewtype::from_metadata_with_string_validation( + self, + type_name, + metadata, + type_id, + validation, + original_schema.clone(), + ), + metadata, + )) + } + } } } } diff --git a/typify-impl/src/merge.rs b/typify-impl/src/merge.rs index 5f1319e9..ed46b7e2 100644 --- a/typify-impl/src/merge.rs +++ b/typify-impl/src/merge.rs @@ -783,7 +783,12 @@ fn merge_so_string( let pattern = match (&a.pattern, &b.pattern) { (None, v) | (v, None) => v.clone(), (Some(x), Some(y)) if x == y => Some(x.clone()), - _ => unimplemented!("merging distinct patterns is impractical"), + // Combine distinct patterns using lookaheads so the merged + // string must satisfy all constraints. If x is already a + // sequence of lookaheads (produced by a prior merge), append + // rather than re-wrap nested lookaheads. + (Some(x), Some(y)) if x.starts_with("(?=") => Some(format!("{x}(?={y})")), + (Some(x), Some(y)) => Some(format!("(?={x})(?={y})")), }; if let (Some(min), Some(max)) = (min_length, max_length) { @@ -1885,4 +1890,34 @@ mod tests { serde_json::to_string_pretty(&merged).unwrap(), ) } + + #[test] + fn test_merge_multiple_patterns() { + // Multiple schemas with distinct string patterns that must all be + // satisfied. The merged pattern should be a flat sequence of + // lookaheads: (?=p1)(?=p2)(?=p3). + let schemas: Vec = [ + json!({"type": "string", "pattern": "^[a-z]+$"}), + json!({"type": "string", "pattern": "^.+[0-9].+$"}), + json!({"type": "string", "pattern": ".+[A-Z]$"}), + ] + .into_iter() + .map(|v| serde_json::from_value(v).unwrap()) + .collect(); + + let merged = super::merge_all(&schemas, &BTreeMap::default()); + + let expected: schemars::schema::Schema = serde_json::from_value(json!({ + "type": "string", + "pattern": "(?=^[a-z]+$)(?=^.+[0-9].+$)(?=.+[A-Z]$)" + })) + .unwrap(); + + assert_eq!( + merged, + expected, + "{}", + serde_json::to_string_pretty(&merged).unwrap(), + ); + } } diff --git a/typify-impl/tests/all_of.json b/typify-impl/tests/all_of.json new file mode 100644 index 00000000..82868876 --- /dev/null +++ b/typify-impl/tests/all_of.json @@ -0,0 +1,16 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "definitions": { + "TriplePattern": { + "allOf": [ + { + "type": "string", + "pattern": "^[a-z].+$", + "format": "custom-id" + }, + { "type": "string", "pattern": "^.{4,8}$" }, + { "type": "string", "pattern": ".+[a-z]$" } + ] + } + } +} diff --git a/typify-impl/tests/all_of.out b/typify-impl/tests/all_of.out new file mode 100644 index 00000000..1237a3fa --- /dev/null +++ b/typify-impl/tests/all_of.out @@ -0,0 +1,111 @@ +#[doc = r" Error types."] +pub mod error { + #[doc = r" Error from a `TryFrom` or `FromStr` implementation."] + pub struct ConversionError(::std::borrow::Cow<'static, str>); + impl ::std::error::Error for ConversionError {} + impl ::std::fmt::Display for ConversionError { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> Result<(), ::std::fmt::Error> { + ::std::fmt::Display::fmt(&self.0, f) + } + } + impl ::std::fmt::Debug for ConversionError { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> Result<(), ::std::fmt::Error> { + ::std::fmt::Debug::fmt(&self.0, f) + } + } + impl From<&'static str> for ConversionError { + fn from(value: &'static str) -> Self { + Self(value.into()) + } + } + impl From for ConversionError { + fn from(value: String) -> Self { + Self(value.into()) + } + } +} +#[doc = "`TriplePattern`"] +#[doc = r""] +#[doc = r"
JSON schema"] +#[doc = r""] +#[doc = r" ```json"] +#[doc = "{"] +#[doc = " \"allOf\": ["] +#[doc = " {"] +#[doc = " \"type\": \"string\","] +#[doc = " \"format\": \"custom-id\","] +#[doc = " \"pattern\": \"^[a-z].+$\""] +#[doc = " },"] +#[doc = " {"] +#[doc = " \"type\": \"string\","] +#[doc = " \"pattern\": \"^.{4,8}$\""] +#[doc = " },"] +#[doc = " {"] +#[doc = " \"type\": \"string\","] +#[doc = " \"pattern\": \".+[a-z]$\""] +#[doc = " }"] +#[doc = " ]"] +#[doc = "}"] +#[doc = r" ```"] +#[doc = r"
"] +#[derive(:: serde :: Serialize, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[serde(transparent)] +pub struct TriplePattern(::std::string::String); +impl ::std::ops::Deref for TriplePattern { + type Target = ::std::string::String; + fn deref(&self) -> &::std::string::String { + &self.0 + } +} +impl ::std::convert::From for ::std::string::String { + fn from(value: TriplePattern) -> Self { + value.0 + } +} +impl ::std::str::FromStr for TriplePattern { + type Err = self::error::ConversionError; + fn from_str(value: &str) -> ::std::result::Result { + static PATTERN: ::std::sync::LazyLock<::regress::Regex> = + ::std::sync::LazyLock::new(|| { + ::regress::Regex::new("(?=^[a-z].+$)(?=^.{4,8}$)(?=.+[a-z]$)").unwrap() + }); + if PATTERN.find(value).is_none() { + return Err("doesn't match pattern \"(?=^[a-z].+$)(?=^.{4,8}$)(?=.+[a-z]$)\"".into()); + } + Ok(Self(value.to_string())) + } +} +impl ::std::convert::TryFrom<&str> for TriplePattern { + type Error = self::error::ConversionError; + fn try_from(value: &str) -> ::std::result::Result { + value.parse() + } +} +impl ::std::convert::TryFrom<&::std::string::String> for TriplePattern { + type Error = self::error::ConversionError; + fn try_from( + value: &::std::string::String, + ) -> ::std::result::Result { + value.parse() + } +} +impl ::std::convert::TryFrom<::std::string::String> for TriplePattern { + type Error = self::error::ConversionError; + fn try_from( + value: ::std::string::String, + ) -> ::std::result::Result { + value.parse() + } +} +impl<'de> ::serde::Deserialize<'de> for TriplePattern { + fn deserialize(deserializer: D) -> ::std::result::Result + where + D: ::serde::Deserializer<'de>, + { + ::std::string::String::deserialize(deserializer)? + .parse() + .map_err(|e: self::error::ConversionError| { + ::custom(e.to_string()) + }) + } +} diff --git a/typify-impl/tests/test_all_of.rs b/typify-impl/tests/test_all_of.rs new file mode 100644 index 00000000..ae1874bf --- /dev/null +++ b/typify-impl/tests/test_all_of.rs @@ -0,0 +1,28 @@ +// Copyright 2026 Oxide Computer Company + +use std::{fs::File, io::BufReader, path::Path}; + +use schemars::schema::RootSchema; +use typify_impl::TypeSpace; + +/// Verify that `allOf` schemas with three distinct string patterns are merged +/// with a sequence of lookaheads and that the generated `FromStr` impl checks +/// all three constraints. +#[test] +fn test_allof_three_patterns() { + let mut type_space = TypeSpace::default(); + + let path = Path::new("tests/all_of.json"); + let file = File::open(path).unwrap(); + let reader = BufReader::new(file); + + let schema: RootSchema = serde_json::from_reader(reader).unwrap(); + + type_space.add_root_schema(schema).unwrap(); + + let file = type_space.to_stream(); + + let fmt = rustfmt_wrapper::rustfmt(file.to_string()).unwrap(); + + expectorate::assert_contents("tests/all_of.out", fmt.as_str()); +} diff --git a/typify-test/Cargo.toml b/typify-test/Cargo.toml index 8e134288..e9e60c6c 100644 --- a/typify-test/Cargo.toml +++ b/typify-test/Cargo.toml @@ -15,4 +15,5 @@ ipnetwork = { workspace = true } prettyplease = { workspace = true } schemars = { workspace = true } serde = { workspace = true } +serde_json = { workspace = true } syn = { workspace = true } diff --git a/typify-test/build.rs b/typify-test/build.rs index f5c77373..69c5aff7 100644 --- a/typify-test/build.rs +++ b/typify-test/build.rs @@ -166,6 +166,18 @@ fn main() { let mut out_file = Path::new(&env::var("OUT_DIR").unwrap()).to_path_buf(); out_file.push("codegen_custommap.rs"); fs::write(out_file, contents).unwrap(); + + // Generate types from the allOf test schema. + println!("cargo:rerun-if-changed=../typify-impl/tests/all_of.json"); + let content = fs::read_to_string("../typify-impl/tests/all_of.json").unwrap(); + let schema = serde_json::from_str::(&content).unwrap(); + let mut type_space = TypeSpace::default(); + type_space.add_root_schema(schema).unwrap(); + let contents = + prettyplease::unparse(&syn::parse2::(type_space.to_stream()).unwrap()); + let mut out_file = Path::new(&env::var("OUT_DIR").unwrap()).to_path_buf(); + out_file.push("codegen_all_of.rs"); + fs::write(out_file, contents).unwrap(); } trait AddType { diff --git a/typify-test/src/main.rs b/typify-test/src/main.rs index 3ae1bc88..d140452e 100644 --- a/typify-test/src/main.rs +++ b/typify-test/src/main.rs @@ -70,6 +70,34 @@ mod hashmap { } } +mod all_of { + #![allow(dead_code)] + + include!(concat!(env!("OUT_DIR"), "/codegen_all_of.rs")); + + #[test] + fn test_triple_pattern() { + // Must satisfy all three patterns simultaneously: + // 1. ^[a-z].+$ — starts with lowercase + // 2. ^.{4,8}$ — 4–8 characters long + // 3. .+[a-z]$ — ends with lowercase + + // Valid: 4 lowercase letters + assert!(TriplePattern::try_from("abcd").is_ok()); + // Valid: 6 lowercase letters + assert!(TriplePattern::try_from("abcdef").is_ok()); + + // Fails: starts with uppercase + assert!(TriplePattern::try_from("Abcd").is_err()); + // Fails: ends with uppercase + assert!(TriplePattern::try_from("abcD").is_err()); + // Fails: too short + assert!(TriplePattern::try_from("abc").is_err()); + // Fails: too long + assert!(TriplePattern::try_from("abcdefghijkl").is_err()); + } +} + mod custom_map { #![allow(dead_code)]