A better Rust ATProto crate
1

Configure Feed

Select the types of activity you want to include in your feed.

at main 42 kB View raw
1//! Runtime validation of Data values against lexicon schemas 2//! 3//! This module provides infrastructure for validating untyped `Data` values against 4//! lexicon schemas, enabling partial deserialization, debugging, and schema migration. 5 6use crate::lexicon::{LexArrayItem, LexObjectProperty}; 7use crate::ref_utils::RefPath; 8use crate::schema::SchemaRegistry; 9use cid::Cid as IpldCid; 10use dashmap::DashMap; 11use jacquard_common::{deps::smol_str, types::value::Data}; 12use sha2::{Digest, Sha256}; 13use smol_str::SmolStr; 14use std::{ 15 fmt, 16 sync::{Arc, LazyLock}, 17}; 18 19/// Path to a value within a data structure 20/// 21/// Tracks the location of values during validation for precise error reporting. 22#[derive(Debug, Clone, PartialEq, Eq)] 23pub struct ValidationPath { 24 segments: Vec<PathSegment>, 25} 26 27/// A segment in a validation path 28#[derive(Debug, Clone, PartialEq, Eq)] 29pub enum PathSegment { 30 /// Object field access 31 Field(SmolStr), 32 /// Array index access 33 Index(usize), 34 /// Union variant discriminator 35 UnionVariant(SmolStr), 36} 37 38impl ValidationPath { 39 /// Create a new empty path 40 pub fn new() -> Self { 41 Self { 42 segments: Vec::new(), 43 } 44 } 45 46 /// Create a path with a single field segment 47 pub fn from_field(name: &str) -> Self { 48 let mut path = Self::new(); 49 path.push_field(name); 50 path 51 } 52 53 /// Add a field segment to the path 54 pub fn push_field(&mut self, name: &str) { 55 self.segments.push(PathSegment::Field(name.into())); 56 } 57 58 /// Add an index segment to the path 59 pub fn push_index(&mut self, idx: usize) { 60 self.segments.push(PathSegment::Index(idx)); 61 } 62 63 /// Add a union variant segment to the path 64 pub fn push_variant(&mut self, type_str: &str) { 65 self.segments 66 .push(PathSegment::UnionVariant(type_str.into())); 67 } 68 69 /// Remove the last segment from the path 70 pub fn pop(&mut self) { 71 self.segments.pop(); 72 } 73 74 /// Get the depth of the path 75 pub fn depth(&self) -> usize { 76 self.segments.len() 77 } 78 79 /// Check if the path is empty 80 pub fn is_empty(&self) -> bool { 81 self.segments.is_empty() 82 } 83 84 pub fn segments(&self) -> &[PathSegment] { 85 &self.segments 86 } 87} 88 89impl Default for ValidationPath { 90 fn default() -> Self { 91 Self::new() 92 } 93} 94 95impl fmt::Display for ValidationPath { 96 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 97 if self.segments.is_empty() { 98 return write!(f, "(root)"); 99 } 100 101 for seg in &self.segments { 102 match seg { 103 PathSegment::Field(name) => write!(f, ".{}", name)?, 104 PathSegment::Index(idx) => write!(f, "[{}]", idx)?, 105 PathSegment::UnionVariant(t) => write!(f, "($type={})", t)?, 106 } 107 } 108 Ok(()) 109 } 110} 111 112/// Structural validation errors 113/// 114/// These errors indicate that the data structure doesn't match the schema's type expectations. 115#[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)] 116#[non_exhaustive] 117pub enum StructuralError { 118 #[error("Type mismatch at {path}: expected {expected}, got {actual}")] 119 TypeMismatch { 120 path: ValidationPath, 121 expected: jacquard_common::types::DataModelType, 122 actual: jacquard_common::types::DataModelType, 123 }, 124 125 #[error("Missing required field at {path}: '{field}'")] 126 MissingRequiredField { 127 path: ValidationPath, 128 field: SmolStr, 129 }, 130 131 #[error("Missing union discriminator ($type) at {path}")] 132 MissingUnionDiscriminator { path: ValidationPath }, 133 134 #[error("Union type mismatch at {path}: $type='{actual_type}' not in [{expected_refs}]")] 135 UnionNoMatch { 136 path: ValidationPath, 137 actual_type: SmolStr, 138 expected_refs: SmolStr, 139 }, 140 141 #[error("Unresolved ref at {path}: '{ref_nsid}'")] 142 UnresolvedRef { 143 path: ValidationPath, 144 ref_nsid: SmolStr, 145 }, 146 147 #[error("Reference cycle detected at {path}: '{ref_nsid}' (stack: {stack})")] 148 RefCycle { 149 path: ValidationPath, 150 ref_nsid: SmolStr, 151 stack: SmolStr, 152 }, 153 154 #[error("Max validation depth exceeded at {path}: {max}")] 155 MaxDepthExceeded { path: ValidationPath, max: usize }, 156} 157 158/// Constraint validation errors 159/// 160/// These errors indicate that the data violates lexicon constraints like max_length, 161/// max_graphemes, ranges, etc. The structure is correct but values are out of bounds. 162#[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)] 163#[non_exhaustive] 164pub enum ConstraintError { 165 #[error("{path} exceeds max length: {actual} > {max}")] 166 MaxLength { 167 path: ValidationPath, 168 max: usize, 169 actual: usize, 170 }, 171 172 #[error("{path} exceeds max graphemes: {actual} > {max}")] 173 MaxGraphemes { 174 path: ValidationPath, 175 max: usize, 176 actual: usize, 177 }, 178 179 #[error("{path} below min length: {actual} < {min}")] 180 MinLength { 181 path: ValidationPath, 182 min: usize, 183 actual: usize, 184 }, 185 186 #[error("{path} below min graphemes: {actual} < {min}")] 187 MinGraphemes { 188 path: ValidationPath, 189 min: usize, 190 actual: usize, 191 }, 192 193 #[error("{path} value {actual} exceeds maximum: {max}")] 194 Maximum { 195 path: ValidationPath, 196 max: i64, 197 actual: i64, 198 }, 199 200 #[error("{path} value {actual} below minimum: {min}")] 201 Minimum { 202 path: ValidationPath, 203 min: i64, 204 actual: i64, 205 }, 206 207 #[error("{path} blob size {actual} exceeds maximum: {max}")] 208 BlobTooLarge { 209 path: ValidationPath, 210 max: usize, 211 actual: usize, 212 }, 213 214 #[error("{path} blob MIME type '{actual}' not in accepted types: {accepted:?}")] 215 BlobMimeTypeNotAccepted { 216 path: ValidationPath, 217 accepted: Vec<String>, 218 actual: String, 219 }, 220} 221 222/// Unified validation error type 223#[derive(Debug, Clone, thiserror::Error)] 224#[non_exhaustive] 225pub enum ValidationError { 226 #[error(transparent)] 227 Structural(#[from] StructuralError), 228 229 #[error(transparent)] 230 Constraint(#[from] ConstraintError), 231} 232 233/// Cache key for validation results 234/// 235/// Content-addressed by CID to enable efficient caching across identical data. 236#[derive(Debug, Clone, Hash, Eq, PartialEq)] 237struct ValidationCacheKey { 238 nsid: SmolStr, 239 def_name: SmolStr, 240 cid: IpldCid, 241} 242 243impl ValidationCacheKey { 244 /// Create cache key from schema info and data 245 fn from_data<T: crate::schema::LexiconSchema>( 246 data: &Data, 247 ) -> Result<Self, CidComputationError> { 248 let cid = compute_data_cid(data)?; 249 Ok(Self { 250 nsid: SmolStr::new_static(T::nsid()), 251 def_name: SmolStr::new_static(T::def_name()), 252 cid, 253 }) 254 } 255} 256 257/// Errors that can occur when computing CIDs 258#[derive(Debug, thiserror::Error)] 259#[non_exhaustive] 260pub enum CidComputationError { 261 #[error("Failed to serialize data to DAG-CBOR: {0}")] 262 DagCborEncode(#[from] serde_ipld_dagcbor::EncodeError<std::collections::TryReserveError>), 263 264 #[error("Failed to create multihash: {0}")] 265 Multihash(#[from] multihash::Error), 266} 267 268/// Compute CID for Data value 269/// 270/// Uses SHA-256 hash and DAG-CBOR codec for content addressing. 271fn compute_data_cid(data: &Data) -> Result<IpldCid, CidComputationError> { 272 // Serialize to DAG-CBOR 273 let dag_cbor = data.to_dag_cbor()?; 274 275 // Compute SHA-256 hash 276 let hash = Sha256::digest(&dag_cbor); 277 278 // Create multihash (code 0x12 = sha2-256) 279 let multihash = multihash::Multihash::wrap(0x12, &hash)?; 280 281 // Create CIDv1 with dag-cbor codec (0x71) 282 Ok(IpldCid::new_v1(0x71, multihash)) 283} 284 285/// Trait for converting lexicon types to object properties 286/// 287/// This enables type-safe conversion between array items and object properties 288/// for unified validation logic. 289trait IntoObjectProperty<'a> { 290 /// Convert this type to an equivalent object property 291 fn into_object_property(self) -> LexObjectProperty<'a>; 292} 293 294impl<'a> IntoObjectProperty<'a> for LexArrayItem<'a> { 295 fn into_object_property(self) -> LexObjectProperty<'a> { 296 match self { 297 LexArrayItem::String(s) => LexObjectProperty::String(s), 298 LexArrayItem::Integer(i) => LexObjectProperty::Integer(i), 299 LexArrayItem::Boolean(b) => LexObjectProperty::Boolean(b), 300 LexArrayItem::Object(o) => LexObjectProperty::Object(o), 301 LexArrayItem::Unknown(u) => LexObjectProperty::Unknown(u), 302 LexArrayItem::Bytes(b) => LexObjectProperty::Bytes(b), 303 LexArrayItem::CidLink(c) => LexObjectProperty::CidLink(c), 304 LexArrayItem::Blob(b) => LexObjectProperty::Blob(b), 305 LexArrayItem::Ref(r) => LexObjectProperty::Ref(r), 306 LexArrayItem::Union(u) => LexObjectProperty::Union(u), 307 } 308 } 309} 310 311/// Result of validating Data against a schema 312/// 313/// Distinguishes between structural errors (type mismatches, missing fields) and 314/// constraint violations (max_length, ranges, etc.). 315#[derive(Debug, Clone)] 316pub enum ValidationResult { 317 /// Only structural validation was performed (or data was structurally invalid) 318 StructuralOnly { structural: Vec<StructuralError> }, 319 /// Both structural and constraint validation were performed 320 Complete { 321 structural: Vec<StructuralError>, 322 constraints: Vec<ConstraintError>, 323 }, 324} 325 326impl ValidationResult { 327 /// Check if validation passed (no structural or constraint errors) 328 pub fn is_valid(&self) -> bool { 329 match self { 330 ValidationResult::StructuralOnly { structural } => structural.is_empty(), 331 ValidationResult::Complete { 332 structural, 333 constraints, 334 } => structural.is_empty() && constraints.is_empty(), 335 } 336 } 337 338 /// Check if structurally valid (ignoring constraint checks) 339 pub fn is_structurally_valid(&self) -> bool { 340 match self { 341 ValidationResult::StructuralOnly { structural } => structural.is_empty(), 342 ValidationResult::Complete { structural, .. } => structural.is_empty(), 343 } 344 } 345 346 /// Get structural errors 347 pub fn structural_errors(&self) -> &[StructuralError] { 348 match self { 349 ValidationResult::StructuralOnly { structural } => structural, 350 ValidationResult::Complete { structural, .. } => structural, 351 } 352 } 353 354 /// Get constraint errors 355 pub fn constraint_errors(&self) -> &[ConstraintError] { 356 match self { 357 ValidationResult::StructuralOnly { .. } => &[], 358 ValidationResult::Complete { constraints, .. } => constraints, 359 } 360 } 361 362 /// Check if there are any constraint violations 363 pub fn has_constraint_violations(&self) -> bool { 364 !self.constraint_errors().is_empty() 365 } 366 367 /// Get all errors (structural and constraint) 368 pub fn all_errors(&self) -> impl Iterator<Item = ValidationError> + '_ { 369 self.structural_errors() 370 .iter() 371 .cloned() 372 .map(ValidationError::Structural) 373 .chain( 374 self.constraint_errors() 375 .iter() 376 .cloned() 377 .map(ValidationError::Constraint), 378 ) 379 } 380} 381 382/// Schema validator with caching 383/// 384/// Validates Data values against lexicon schemas, caching results by content hash. 385pub struct SchemaValidator { 386 registry: SchemaRegistry, 387 cache: DashMap<ValidationCacheKey, Arc<ValidationResult>>, 388} 389 390static VALIDATOR: LazyLock<SchemaValidator> = LazyLock::new(|| SchemaValidator { 391 registry: SchemaRegistry::from_inventory(), 392 cache: DashMap::new(), 393}); 394 395impl SchemaValidator { 396 /// Get the global validator instance 397 pub fn global() -> &'static Self { 398 &VALIDATOR 399 } 400 401 /// Create a new validator with empty registry 402 pub fn new() -> Self { 403 Self { 404 registry: SchemaRegistry::new(), 405 cache: DashMap::new(), 406 } 407 } 408 409 pub fn from_registry(registry: SchemaRegistry) -> Self { 410 Self { 411 registry, 412 cache: DashMap::new(), 413 } 414 } 415 416 /// Validate data against a schema (structural and constraints) 417 /// 418 /// Performs both structural validation (types, required fields) and constraint 419 /// validation (max_length, ranges, etc.). Results are cached by content hash. 420 pub fn validate<T: crate::schema::LexiconSchema>( 421 &self, 422 data: &Data, 423 ) -> Result<ValidationResult, CidComputationError> { 424 // Compute cache key 425 let key = ValidationCacheKey::from_data::<T>(data)?; 426 427 // Check cache (clone Arc immediately to avoid holding ref) 428 if let Some(cached) = self.cache.get(&key).map(|r| Arc::clone(&r)) { 429 return Ok((*cached).clone()); 430 } 431 432 // Perform validation 433 let result = self.validate_uncached::<T>(data); 434 435 // Cache result 436 self.cache.insert(key, Arc::new(result.clone())); 437 438 Ok(result) 439 } 440 441 /// Validate only the structural aspects of data against a schema 442 /// 443 /// Only checks types, required fields, and schema structure. Does not check 444 /// constraints like max_length, ranges, etc. This is faster when you only 445 /// care about type correctness. 446 pub fn validate_structural<T: crate::schema::LexiconSchema>( 447 &self, 448 data: &Data, 449 ) -> ValidationResult { 450 self.validate_structural_uncached::<T>(data) 451 } 452 453 /// Validate without caching (internal) 454 fn validate_uncached<T: crate::schema::LexiconSchema>(&self, data: &Data) -> ValidationResult { 455 let def = match self.registry.get_def(T::nsid(), T::def_name()) { 456 Some(d) => d, 457 None => { 458 // Schema not found - this is a structural error 459 return ValidationResult::StructuralOnly { 460 structural: vec![StructuralError::UnresolvedRef { 461 path: ValidationPath::new(), 462 ref_nsid: format!("{}#{}", T::nsid(), T::def_name()).into(), 463 }], 464 }; 465 } 466 }; 467 468 let mut path = ValidationPath::new(); 469 let mut ctx = ValidationContext::new(T::nsid(), T::def_name()); 470 471 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx); 472 473 // If structurally invalid, return structural errors only 474 if !structural_errors.is_empty() { 475 return ValidationResult::StructuralOnly { 476 structural: structural_errors, 477 }; 478 } 479 480 // Structurally valid - compute constraints eagerly 481 let mut path = ValidationPath::new(); 482 let constraint_errors = validate_constraints( 483 &mut path, 484 data, 485 T::nsid(), 486 T::def_name(), 487 Some(&Arc::new(self.registry.clone())), 488 ); 489 490 ValidationResult::Complete { 491 structural: structural_errors, 492 constraints: constraint_errors, 493 } 494 } 495 496 /// Validate structural aspects only without caching (internal) 497 fn validate_structural_uncached<T: crate::schema::LexiconSchema>( 498 &self, 499 data: &Data, 500 ) -> ValidationResult { 501 let def = match self.registry.get_def(T::nsid(), T::def_name()) { 502 Some(d) => d, 503 None => { 504 // Schema not found - this is a structural error 505 return ValidationResult::StructuralOnly { 506 structural: vec![StructuralError::UnresolvedRef { 507 path: ValidationPath::new(), 508 ref_nsid: format!("{}#{}", T::nsid(), T::def_name()).into(), 509 }], 510 }; 511 } 512 }; 513 514 let mut path = ValidationPath::new(); 515 let mut ctx = ValidationContext::new(T::nsid(), T::def_name()); 516 517 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx); 518 519 ValidationResult::StructuralOnly { 520 structural: structural_errors, 521 } 522 } 523 524 pub fn validate_by_nsid_structural(&self, nsid: &str, data: &Data) -> ValidationResult { 525 let mut split = nsid.split('#'); 526 let nsid = split.next().unwrap(); 527 let def_name = split.next().unwrap_or("main"); 528 let def = match self.registry.get_def(nsid, def_name) { 529 Some(d) => d, 530 None => { 531 // Schema not found - this is a structural error 532 return ValidationResult::StructuralOnly { 533 structural: vec![StructuralError::UnresolvedRef { 534 path: ValidationPath::new(), 535 ref_nsid: format!("{}#{}", nsid, def_name).into(), 536 }], 537 }; 538 } 539 }; 540 541 let mut path = ValidationPath::new(); 542 let mut ctx = ValidationContext::new(nsid, def_name); 543 544 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx); 545 546 ValidationResult::StructuralOnly { 547 structural: structural_errors, 548 } 549 } 550 551 pub fn validate_by_nsid(&self, nsid: &str, data: &Data) -> ValidationResult { 552 let mut split = nsid.split('#'); 553 let nsid = split.next().unwrap(); 554 let def_name = split.next().unwrap_or("main"); 555 let def = match self.registry.get_def(nsid, def_name) { 556 Some(d) => d, 557 None => { 558 // Schema not found - this is a structural error 559 return ValidationResult::StructuralOnly { 560 structural: vec![StructuralError::UnresolvedRef { 561 path: ValidationPath::new(), 562 ref_nsid: format!("{}#{}", nsid, def_name).into(), 563 }], 564 }; 565 } 566 }; 567 568 let mut path = ValidationPath::new(); 569 let mut ctx = ValidationContext::new(nsid, def_name); 570 571 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx); 572 573 // If structurally invalid, return structural errors only 574 if !structural_errors.is_empty() { 575 return ValidationResult::StructuralOnly { 576 structural: structural_errors, 577 }; 578 } 579 580 // Structurally valid - compute constraints eagerly 581 let mut path = ValidationPath::new(); 582 let constraint_errors = validate_constraints( 583 &mut path, 584 data, 585 nsid, 586 def_name, 587 Some(&Arc::new(self.registry.clone())), 588 ); 589 590 ValidationResult::Complete { 591 structural: structural_errors, 592 constraints: constraint_errors, 593 } 594 } 595 596 /// Get the schema registry 597 pub fn registry(&self) -> &SchemaRegistry { 598 &self.registry 599 } 600} 601 602impl Default for SchemaValidator { 603 fn default() -> Self { 604 Self::new() 605 } 606} 607 608/// Validation context for tracking refs and preventing cycles 609struct ValidationContext { 610 current_nsid: String, 611 current_def: String, 612 ref_stack: Vec<String>, 613 max_depth: usize, 614} 615 616impl ValidationContext { 617 fn new(nsid: &str, def_name: &str) -> Self { 618 Self { 619 current_nsid: nsid.to_string(), 620 current_def: def_name.to_string(), 621 ref_stack: Vec::new(), 622 max_depth: 32, 623 } 624 } 625} 626 627/// Validate data against a lexicon def 628fn validate_def( 629 path: &mut ValidationPath, 630 data: &Data, 631 def: &crate::lexicon::LexUserType, 632 registry: &SchemaRegistry, 633 ctx: &mut ValidationContext, 634) -> Vec<StructuralError> { 635 use crate::lexicon::LexUserType; 636 use jacquard_common::types::DataModelType; 637 638 match def { 639 LexUserType::Object(obj) => { 640 // Must be an object 641 let Data::Object(obj_data) = data else { 642 return vec![StructuralError::TypeMismatch { 643 path: path.clone(), 644 expected: DataModelType::Object, 645 actual: data.data_type(), 646 }]; 647 }; 648 649 let mut errors = Vec::new(); 650 651 // Check required fields 652 if let Some(required) = &obj.required { 653 for field in required { 654 if !obj_data.get(field.as_ref()).is_some() { 655 errors.push(StructuralError::MissingRequiredField { 656 path: path.clone(), 657 field: field.clone(), 658 }); 659 } 660 } 661 } 662 663 // Validate each property that's present 664 for (name, prop) in &obj.properties { 665 if let Some(field_data) = obj_data.get(name.as_ref()) { 666 path.push_field(name.as_ref()); 667 errors.extend(validate_property(path, field_data, prop, registry, ctx)); 668 path.pop(); 669 } 670 } 671 672 errors 673 } 674 LexUserType::Record(rec) => { 675 // Records are objects with record-specific metadata 676 let crate::lexicon::LexRecordRecord::Object(obj) = &rec.record; 677 678 let Data::Object(obj_data) = data else { 679 return vec![StructuralError::TypeMismatch { 680 path: path.clone(), 681 expected: data.data_type(), 682 actual: DataModelType::Object, 683 }]; 684 }; 685 686 let mut errors = Vec::new(); 687 688 // Check required fields 689 if let Some(required) = &obj.required { 690 for field in required { 691 if !obj_data.get(field.as_ref()).is_some() { 692 errors.push(StructuralError::MissingRequiredField { 693 path: path.clone(), 694 field: field.clone(), 695 }); 696 } 697 } 698 } 699 700 // Validate each property that's present 701 for (name, prop) in &obj.properties { 702 if let Some(field_data) = obj_data.get(name.as_ref()) { 703 path.push_field(name.as_ref()); 704 errors.extend(validate_property(path, field_data, prop, registry, ctx)); 705 path.pop(); 706 } 707 } 708 709 errors 710 } 711 // Token types are unit types, no validation needed beyond type checking 712 LexUserType::Token(_) => Vec::new(), 713 // XRPC types are endpoint definitions, not data types 714 LexUserType::XrpcQuery(_) 715 | LexUserType::XrpcProcedure(_) 716 | LexUserType::XrpcSubscription(_) => Vec::new(), 717 // Other types 718 _ => Vec::new(), 719 } 720} 721 722/// Validate data against a property schema 723fn validate_property( 724 path: &mut ValidationPath, 725 data: &Data, 726 prop: &crate::lexicon::LexObjectProperty, 727 registry: &SchemaRegistry, 728 ctx: &mut ValidationContext, 729) -> Vec<StructuralError> { 730 use crate::lexicon::LexObjectProperty; 731 use jacquard_common::types::DataModelType; 732 733 match prop { 734 LexObjectProperty::String(_) => { 735 // Accept any string type 736 if !matches!(data.data_type(), DataModelType::String(_)) { 737 vec![StructuralError::TypeMismatch { 738 path: path.clone(), 739 expected: DataModelType::String( 740 jacquard_common::types::LexiconStringType::String, 741 ), 742 actual: data.data_type(), 743 }] 744 } else { 745 Vec::new() 746 } 747 } 748 749 LexObjectProperty::Integer(_) => { 750 if !matches!(data.data_type(), DataModelType::Integer) { 751 vec![StructuralError::TypeMismatch { 752 path: path.clone(), 753 expected: DataModelType::Integer, 754 actual: data.data_type(), 755 }] 756 } else { 757 Vec::new() 758 } 759 } 760 761 LexObjectProperty::Boolean(_) => { 762 if !matches!(data.data_type(), DataModelType::Boolean) { 763 vec![StructuralError::TypeMismatch { 764 path: path.clone(), 765 expected: DataModelType::Boolean, 766 actual: data.data_type(), 767 }] 768 } else { 769 Vec::new() 770 } 771 } 772 773 LexObjectProperty::Object(obj) => { 774 let Data::Object(obj_data) = data else { 775 return vec![StructuralError::TypeMismatch { 776 path: path.clone(), 777 expected: DataModelType::Object, 778 actual: data.data_type(), 779 }]; 780 }; 781 782 let mut errors = Vec::new(); 783 784 // Check required fields 785 if let Some(required) = &obj.required { 786 for field in required { 787 if !obj_data.get(field.as_ref()).is_some() { 788 errors.push(StructuralError::MissingRequiredField { 789 path: path.clone(), 790 field: field.clone(), 791 }); 792 } 793 } 794 } 795 796 // Recursively validate each property 797 for (name, schema_prop) in &obj.properties { 798 if let Some(field_data) = obj_data.get(name.as_ref()) { 799 path.push_field(name.as_ref()); 800 errors.extend(validate_property( 801 path, 802 field_data, 803 schema_prop, 804 registry, 805 ctx, 806 )); 807 path.pop(); 808 } 809 } 810 811 errors 812 } 813 814 LexObjectProperty::Array(arr) => { 815 let Data::Array(array) = data else { 816 return vec![StructuralError::TypeMismatch { 817 path: path.clone(), 818 expected: DataModelType::Array, 819 actual: data.data_type(), 820 }]; 821 }; 822 823 let mut errors = Vec::new(); 824 for (idx, item) in array.iter().enumerate() { 825 path.push_index(idx); 826 errors.extend(validate_array_item(path, item, &arr.items, registry, ctx)); 827 path.pop(); 828 } 829 errors 830 } 831 832 LexObjectProperty::Union(u) => { 833 let Data::Object(obj) = data else { 834 return vec![StructuralError::TypeMismatch { 835 path: path.clone(), 836 expected: DataModelType::Object, 837 actual: data.data_type(), 838 }]; 839 }; 840 841 // Get $type discriminator 842 let Some(type_str) = obj.type_discriminator() else { 843 return vec![StructuralError::MissingUnionDiscriminator { path: path.clone() }]; 844 }; 845 846 // Reject empty $type 847 if type_str.is_empty() { 848 return vec![StructuralError::MissingUnionDiscriminator { path: path.clone() }]; 849 } 850 851 // Try to match against refs 852 for variant_ref in &u.refs { 853 let ref_path = RefPath::parse(variant_ref.as_ref(), Some(&ctx.current_nsid)); 854 let variant_nsid = ref_path.nsid().to_string(); 855 let variant_def = ref_path.def().to_string(); 856 let full_variant = ref_path.full_ref(); 857 858 // Match by full ref or just nsid 859 if type_str == full_variant || type_str == variant_nsid { 860 // Found match - validate against this variant 861 let Some(variant_def_type) = registry.get_def(&variant_nsid, &variant_def) 862 else { 863 return vec![StructuralError::UnresolvedRef { 864 path: path.clone(), 865 ref_nsid: full_variant.into(), 866 }]; 867 }; 868 869 path.push_variant(type_str); 870 let old_nsid = std::mem::replace(&mut ctx.current_nsid, variant_nsid); 871 let old_def = std::mem::replace(&mut ctx.current_def, variant_def); 872 873 let errors = validate_def(path, data, &variant_def_type, registry, ctx); 874 875 ctx.current_nsid = old_nsid; 876 ctx.current_def = old_def; 877 path.pop(); 878 879 return errors; 880 } 881 } 882 883 // No match found 884 if u.closed.unwrap_or(false) { 885 // Closed union - this is an error 886 let expected_refs = u 887 .refs 888 .iter() 889 .map(|r| r.as_ref()) 890 .collect::<Vec<_>>() 891 .join(", "); 892 vec![StructuralError::UnionNoMatch { 893 path: path.clone(), 894 actual_type: type_str.into(), 895 expected_refs: expected_refs.into(), 896 }] 897 } else { 898 // Open union - allow unknown variants 899 Vec::new() 900 } 901 } 902 903 LexObjectProperty::Ref(r) => { 904 // Depth check 905 if path.depth() >= ctx.max_depth { 906 return vec![StructuralError::MaxDepthExceeded { 907 path: path.clone(), 908 max: ctx.max_depth, 909 }]; 910 } 911 912 // Normalize ref 913 let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(&ctx.current_nsid)); 914 let ref_nsid = ref_path.nsid().to_string(); 915 let ref_def = ref_path.def().to_string(); 916 let full_ref = ref_path.full_ref(); 917 918 // Cycle detection 919 if ctx.ref_stack.contains(&full_ref) { 920 let stack = ctx.ref_stack.join(" -> "); 921 return vec![StructuralError::RefCycle { 922 path: path.clone(), 923 ref_nsid: full_ref.into(), 924 stack: stack.into(), 925 }]; 926 } 927 928 // Look up ref 929 let Some(ref_def_type) = registry.get_def(&ref_nsid, &ref_def) else { 930 return vec![StructuralError::UnresolvedRef { 931 path: path.clone(), 932 ref_nsid: full_ref.into(), 933 }]; 934 }; 935 936 // Push, validate, pop 937 ctx.ref_stack.push(full_ref); 938 let old_nsid = std::mem::replace(&mut ctx.current_nsid, ref_nsid); 939 let old_def = std::mem::replace(&mut ctx.current_def, ref_def); 940 941 let errors = validate_def(path, data, &ref_def_type, registry, ctx); 942 943 ctx.current_nsid = old_nsid; 944 ctx.current_def = old_def; 945 ctx.ref_stack.pop(); 946 947 errors 948 } 949 950 LexObjectProperty::Bytes(_) => { 951 if !matches!(data.data_type(), DataModelType::Bytes) { 952 vec![StructuralError::TypeMismatch { 953 path: path.clone(), 954 expected: DataModelType::Bytes, 955 actual: data.data_type(), 956 }] 957 } else { 958 Vec::new() 959 } 960 } 961 962 LexObjectProperty::CidLink(_) => { 963 if !matches!(data.data_type(), DataModelType::CidLink) { 964 vec![StructuralError::TypeMismatch { 965 path: path.clone(), 966 expected: DataModelType::CidLink, 967 actual: data.data_type(), 968 }] 969 } else { 970 Vec::new() 971 } 972 } 973 974 LexObjectProperty::Blob(_) => { 975 if !matches!(data.data_type(), DataModelType::Blob) { 976 vec![StructuralError::TypeMismatch { 977 path: path.clone(), 978 expected: DataModelType::Blob, 979 actual: data.data_type(), 980 }] 981 } else { 982 Vec::new() 983 } 984 } 985 986 LexObjectProperty::Unknown(_) => { 987 // Any type allowed 988 Vec::new() 989 } 990 } 991} 992 993/// Validate array item against array item schema 994fn validate_array_item( 995 path: &mut ValidationPath, 996 data: &Data, 997 item_schema: &LexArrayItem, 998 registry: &SchemaRegistry, 999 ctx: &mut ValidationContext, 1000) -> Vec<StructuralError> { 1001 validate_property( 1002 path, 1003 data, 1004 &item_schema.clone().into_object_property(), 1005 registry, 1006 ctx, 1007 ) 1008} 1009 1010// ============================================================================ 1011// CONSTRAINT VALIDATION 1012// ============================================================================ 1013 1014/// Validate constraints on data against schema (entry point with optional registry) 1015fn validate_constraints( 1016 path: &mut ValidationPath, 1017 data: &Data, 1018 nsid: &str, 1019 def_name: &str, 1020 registry: Option<&Arc<SchemaRegistry>>, 1021) -> Vec<ConstraintError> { 1022 // Use provided registry or fall back to global inventory 1023 let fallback_registry; 1024 let registry_ref = match registry { 1025 Some(r) => r.as_ref(), 1026 None => { 1027 fallback_registry = SchemaRegistry::from_inventory(); 1028 &fallback_registry 1029 } 1030 }; 1031 1032 validate_constraints_impl(path, data, nsid, def_name, registry_ref) 1033} 1034 1035/// Internal implementation that takes materialized registry 1036fn validate_constraints_impl( 1037 path: &mut ValidationPath, 1038 data: &Data, 1039 nsid: &str, 1040 def_name: &str, 1041 registry: &SchemaRegistry, 1042) -> Vec<ConstraintError> { 1043 use crate::lexicon::LexUserType; 1044 1045 // Get schema def 1046 let Some(def) = registry.get_def(nsid, def_name) else { 1047 return Vec::new(); 1048 }; 1049 1050 match def { 1051 LexUserType::Object(obj) => { 1052 let Data::Object(obj_data) = data else { 1053 return Vec::new(); 1054 }; 1055 1056 let mut errors = Vec::new(); 1057 1058 // Check constraints on each property 1059 for (name, prop) in &obj.properties { 1060 if let Some(field_data) = obj_data.get(name.as_ref()) { 1061 path.push_field(name.as_ref()); 1062 errors.extend(check_property_constraints( 1063 path, field_data, prop, nsid, registry, 1064 )); 1065 path.pop(); 1066 } 1067 } 1068 1069 errors 1070 } 1071 LexUserType::Record(rec) => { 1072 // Records are objects with record-specific metadata 1073 let crate::lexicon::LexRecordRecord::Object(obj) = &rec.record; 1074 1075 let Data::Object(obj_data) = data else { 1076 return Vec::new(); 1077 }; 1078 1079 let mut errors = Vec::new(); 1080 1081 // Check constraints on each property 1082 for (name, prop) in &obj.properties { 1083 if let Some(field_data) = obj_data.get(name.as_ref()) { 1084 path.push_field(name.as_ref()); 1085 errors.extend(check_property_constraints( 1086 path, field_data, prop, nsid, registry, 1087 )); 1088 path.pop(); 1089 } 1090 } 1091 1092 errors 1093 } 1094 // Token types, XRPC types, and other types don't have constraints 1095 _ => Vec::new(), 1096 } 1097} 1098 1099/// Check constraints on a property 1100fn check_property_constraints( 1101 path: &mut ValidationPath, 1102 data: &Data, 1103 prop: &crate::lexicon::LexObjectProperty, 1104 current_nsid: &str, 1105 registry: &SchemaRegistry, 1106) -> Vec<ConstraintError> { 1107 use crate::lexicon::LexObjectProperty; 1108 1109 match prop { 1110 LexObjectProperty::String(s) => { 1111 if let Data::String(str_val) = data { 1112 check_string_constraints(path, str_val.as_str(), s) 1113 } else { 1114 Vec::new() 1115 } 1116 } 1117 1118 LexObjectProperty::Integer(i) => { 1119 if let Data::Integer(int_val) = data { 1120 check_integer_constraints(path, *int_val, i) 1121 } else { 1122 Vec::new() 1123 } 1124 } 1125 1126 LexObjectProperty::Array(arr) => { 1127 if let Data::Array(array) = data { 1128 let mut errors = check_array_constraints(path, array, arr); 1129 1130 // Also check constraints on array items 1131 for (idx, item) in array.iter().enumerate() { 1132 path.push_index(idx); 1133 errors.extend(check_array_item_constraints( 1134 path, 1135 item, 1136 &arr.items, 1137 current_nsid, 1138 registry, 1139 )); 1140 path.pop(); 1141 } 1142 1143 errors 1144 } else { 1145 Vec::new() 1146 } 1147 } 1148 1149 LexObjectProperty::Object(obj) => { 1150 if let Data::Object(obj_data) = data { 1151 let mut errors = Vec::new(); 1152 1153 // Recursively check nested object properties 1154 for (name, schema_prop) in &obj.properties { 1155 if let Some(field_data) = obj_data.get(name.as_ref()) { 1156 path.push_field(name.as_ref()); 1157 errors.extend(check_property_constraints( 1158 path, 1159 field_data, 1160 schema_prop, 1161 current_nsid, 1162 registry, 1163 )); 1164 path.pop(); 1165 } 1166 } 1167 1168 errors 1169 } else { 1170 Vec::new() 1171 } 1172 } 1173 1174 LexObjectProperty::Ref(r) => { 1175 // Follow ref and check constraints 1176 let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(current_nsid)); 1177 let ref_nsid = ref_path.nsid(); 1178 let ref_def = ref_path.def(); 1179 1180 if registry.get_def(ref_nsid, ref_def).is_some() { 1181 validate_constraints_impl(path, data, ref_nsid, ref_def, registry) 1182 } else { 1183 Vec::new() 1184 } 1185 } 1186 1187 // Other property types don't have constraints 1188 _ => Vec::new(), 1189 } 1190} 1191 1192/// Check string constraints 1193fn check_string_constraints( 1194 path: &ValidationPath, 1195 value: &str, 1196 schema: &crate::lexicon::LexString, 1197) -> Vec<ConstraintError> { 1198 let mut errors = Vec::new(); 1199 1200 // Check byte length constraints 1201 let byte_len = value.len(); 1202 1203 if let Some(min) = schema.min_length { 1204 if byte_len < min as usize { 1205 errors.push(ConstraintError::MinLength { 1206 path: path.clone(), 1207 min: min as usize, 1208 actual: byte_len, 1209 }); 1210 } 1211 } 1212 1213 if let Some(max) = schema.max_length { 1214 if byte_len > max as usize { 1215 errors.push(ConstraintError::MaxLength { 1216 path: path.clone(), 1217 max: max as usize, 1218 actual: byte_len, 1219 }); 1220 } 1221 } 1222 1223 // Check grapheme count constraints 1224 if schema.min_graphemes.is_some() || schema.max_graphemes.is_some() { 1225 use unicode_segmentation::UnicodeSegmentation; 1226 let grapheme_count = value.graphemes(true).count(); 1227 1228 if let Some(min) = schema.min_graphemes { 1229 if grapheme_count < min as usize { 1230 errors.push(ConstraintError::MinGraphemes { 1231 path: path.clone(), 1232 min: min as usize, 1233 actual: grapheme_count, 1234 }); 1235 } 1236 } 1237 1238 if let Some(max) = schema.max_graphemes { 1239 if grapheme_count > max as usize { 1240 errors.push(ConstraintError::MaxGraphemes { 1241 path: path.clone(), 1242 max: max as usize, 1243 actual: grapheme_count, 1244 }); 1245 } 1246 } 1247 } 1248 1249 errors 1250} 1251 1252/// Check integer constraints 1253fn check_integer_constraints( 1254 path: &ValidationPath, 1255 value: i64, 1256 schema: &crate::lexicon::LexInteger, 1257) -> Vec<ConstraintError> { 1258 let mut errors = Vec::new(); 1259 1260 if let Some(min) = schema.minimum { 1261 if value < min { 1262 errors.push(ConstraintError::Minimum { 1263 path: path.clone(), 1264 min, 1265 actual: value, 1266 }); 1267 } 1268 } 1269 1270 if let Some(max) = schema.maximum { 1271 if value > max { 1272 errors.push(ConstraintError::Maximum { 1273 path: path.clone(), 1274 max, 1275 actual: value, 1276 }); 1277 } 1278 } 1279 1280 errors 1281} 1282 1283/// Check array length constraints 1284fn check_array_constraints( 1285 path: &ValidationPath, 1286 array: &jacquard_common::types::value::Array, 1287 schema: &crate::lexicon::LexArray, 1288) -> Vec<ConstraintError> { 1289 let mut errors = Vec::new(); 1290 let len = array.len(); 1291 1292 if let Some(min) = schema.min_length { 1293 if len < min as usize { 1294 errors.push(ConstraintError::MinLength { 1295 path: path.clone(), 1296 min: min as usize, 1297 actual: len, 1298 }); 1299 } 1300 } 1301 1302 if let Some(max) = schema.max_length { 1303 if len > max as usize { 1304 errors.push(ConstraintError::MaxLength { 1305 path: path.clone(), 1306 max: max as usize, 1307 actual: len, 1308 }); 1309 } 1310 } 1311 1312 errors 1313} 1314 1315/// Check constraints on array items 1316fn check_array_item_constraints( 1317 path: &mut ValidationPath, 1318 data: &Data, 1319 item_schema: &LexArrayItem, 1320 current_nsid: &str, 1321 registry: &SchemaRegistry, 1322) -> Vec<ConstraintError> { 1323 check_property_constraints( 1324 path, 1325 data, 1326 &item_schema.clone().into_object_property(), 1327 current_nsid, 1328 registry, 1329 ) 1330} 1331 1332#[cfg(test)] 1333mod tests;