A better Rust ATProto crate
1//! Runtime validation of Data values against lexicon schemas
2//!
3//! This module provides infrastructure for validating untyped `Data` values against
4//! lexicon schemas, enabling partial deserialization, debugging, and schema migration.
5
6use crate::lexicon::{LexArrayItem, LexObjectProperty};
7use crate::ref_utils::RefPath;
8use crate::schema::SchemaRegistry;
9use cid::Cid as IpldCid;
10use dashmap::DashMap;
11use jacquard_common::{deps::smol_str, types::value::Data};
12use sha2::{Digest, Sha256};
13use smol_str::SmolStr;
14use std::{
15 fmt,
16 sync::{Arc, LazyLock},
17};
18
19/// Path to a value within a data structure
20///
21/// Tracks the location of values during validation for precise error reporting.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub struct ValidationPath {
24 segments: Vec<PathSegment>,
25}
26
27/// A segment in a validation path
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum PathSegment {
30 /// Object field access
31 Field(SmolStr),
32 /// Array index access
33 Index(usize),
34 /// Union variant discriminator
35 UnionVariant(SmolStr),
36}
37
38impl ValidationPath {
39 /// Create a new empty path
40 pub fn new() -> Self {
41 Self {
42 segments: Vec::new(),
43 }
44 }
45
46 /// Create a path with a single field segment
47 pub fn from_field(name: &str) -> Self {
48 let mut path = Self::new();
49 path.push_field(name);
50 path
51 }
52
53 /// Add a field segment to the path
54 pub fn push_field(&mut self, name: &str) {
55 self.segments.push(PathSegment::Field(name.into()));
56 }
57
58 /// Add an index segment to the path
59 pub fn push_index(&mut self, idx: usize) {
60 self.segments.push(PathSegment::Index(idx));
61 }
62
63 /// Add a union variant segment to the path
64 pub fn push_variant(&mut self, type_str: &str) {
65 self.segments
66 .push(PathSegment::UnionVariant(type_str.into()));
67 }
68
69 /// Remove the last segment from the path
70 pub fn pop(&mut self) {
71 self.segments.pop();
72 }
73
74 /// Get the depth of the path
75 pub fn depth(&self) -> usize {
76 self.segments.len()
77 }
78
79 /// Check if the path is empty
80 pub fn is_empty(&self) -> bool {
81 self.segments.is_empty()
82 }
83
84 pub fn segments(&self) -> &[PathSegment] {
85 &self.segments
86 }
87}
88
89impl Default for ValidationPath {
90 fn default() -> Self {
91 Self::new()
92 }
93}
94
95impl fmt::Display for ValidationPath {
96 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
97 if self.segments.is_empty() {
98 return write!(f, "(root)");
99 }
100
101 for seg in &self.segments {
102 match seg {
103 PathSegment::Field(name) => write!(f, ".{}", name)?,
104 PathSegment::Index(idx) => write!(f, "[{}]", idx)?,
105 PathSegment::UnionVariant(t) => write!(f, "($type={})", t)?,
106 }
107 }
108 Ok(())
109 }
110}
111
112/// Structural validation errors
113///
114/// These errors indicate that the data structure doesn't match the schema's type expectations.
115#[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)]
116#[non_exhaustive]
117pub enum StructuralError {
118 #[error("Type mismatch at {path}: expected {expected}, got {actual}")]
119 TypeMismatch {
120 path: ValidationPath,
121 expected: jacquard_common::types::DataModelType,
122 actual: jacquard_common::types::DataModelType,
123 },
124
125 #[error("Missing required field at {path}: '{field}'")]
126 MissingRequiredField {
127 path: ValidationPath,
128 field: SmolStr,
129 },
130
131 #[error("Missing union discriminator ($type) at {path}")]
132 MissingUnionDiscriminator { path: ValidationPath },
133
134 #[error("Union type mismatch at {path}: $type='{actual_type}' not in [{expected_refs}]")]
135 UnionNoMatch {
136 path: ValidationPath,
137 actual_type: SmolStr,
138 expected_refs: SmolStr,
139 },
140
141 #[error("Unresolved ref at {path}: '{ref_nsid}'")]
142 UnresolvedRef {
143 path: ValidationPath,
144 ref_nsid: SmolStr,
145 },
146
147 #[error("Reference cycle detected at {path}: '{ref_nsid}' (stack: {stack})")]
148 RefCycle {
149 path: ValidationPath,
150 ref_nsid: SmolStr,
151 stack: SmolStr,
152 },
153
154 #[error("Max validation depth exceeded at {path}: {max}")]
155 MaxDepthExceeded { path: ValidationPath, max: usize },
156}
157
158/// Constraint validation errors
159///
160/// These errors indicate that the data violates lexicon constraints like max_length,
161/// max_graphemes, ranges, etc. The structure is correct but values are out of bounds.
162#[derive(Debug, Clone, thiserror::Error, miette::Diagnostic)]
163#[non_exhaustive]
164pub enum ConstraintError {
165 #[error("{path} exceeds max length: {actual} > {max}")]
166 MaxLength {
167 path: ValidationPath,
168 max: usize,
169 actual: usize,
170 },
171
172 #[error("{path} exceeds max graphemes: {actual} > {max}")]
173 MaxGraphemes {
174 path: ValidationPath,
175 max: usize,
176 actual: usize,
177 },
178
179 #[error("{path} below min length: {actual} < {min}")]
180 MinLength {
181 path: ValidationPath,
182 min: usize,
183 actual: usize,
184 },
185
186 #[error("{path} below min graphemes: {actual} < {min}")]
187 MinGraphemes {
188 path: ValidationPath,
189 min: usize,
190 actual: usize,
191 },
192
193 #[error("{path} value {actual} exceeds maximum: {max}")]
194 Maximum {
195 path: ValidationPath,
196 max: i64,
197 actual: i64,
198 },
199
200 #[error("{path} value {actual} below minimum: {min}")]
201 Minimum {
202 path: ValidationPath,
203 min: i64,
204 actual: i64,
205 },
206
207 #[error("{path} blob size {actual} exceeds maximum: {max}")]
208 BlobTooLarge {
209 path: ValidationPath,
210 max: usize,
211 actual: usize,
212 },
213
214 #[error("{path} blob MIME type '{actual}' not in accepted types: {accepted:?}")]
215 BlobMimeTypeNotAccepted {
216 path: ValidationPath,
217 accepted: Vec<String>,
218 actual: String,
219 },
220}
221
222/// Unified validation error type
223#[derive(Debug, Clone, thiserror::Error)]
224#[non_exhaustive]
225pub enum ValidationError {
226 #[error(transparent)]
227 Structural(#[from] StructuralError),
228
229 #[error(transparent)]
230 Constraint(#[from] ConstraintError),
231}
232
233/// Cache key for validation results
234///
235/// Content-addressed by CID to enable efficient caching across identical data.
236#[derive(Debug, Clone, Hash, Eq, PartialEq)]
237struct ValidationCacheKey {
238 nsid: SmolStr,
239 def_name: SmolStr,
240 cid: IpldCid,
241}
242
243impl ValidationCacheKey {
244 /// Create cache key from schema info and data
245 fn from_data<T: crate::schema::LexiconSchema>(
246 data: &Data,
247 ) -> Result<Self, CidComputationError> {
248 let cid = compute_data_cid(data)?;
249 Ok(Self {
250 nsid: SmolStr::new_static(T::nsid()),
251 def_name: SmolStr::new_static(T::def_name()),
252 cid,
253 })
254 }
255}
256
257/// Errors that can occur when computing CIDs
258#[derive(Debug, thiserror::Error)]
259#[non_exhaustive]
260pub enum CidComputationError {
261 #[error("Failed to serialize data to DAG-CBOR: {0}")]
262 DagCborEncode(#[from] serde_ipld_dagcbor::EncodeError<std::collections::TryReserveError>),
263
264 #[error("Failed to create multihash: {0}")]
265 Multihash(#[from] multihash::Error),
266}
267
268/// Compute CID for Data value
269///
270/// Uses SHA-256 hash and DAG-CBOR codec for content addressing.
271fn compute_data_cid(data: &Data) -> Result<IpldCid, CidComputationError> {
272 // Serialize to DAG-CBOR
273 let dag_cbor = data.to_dag_cbor()?;
274
275 // Compute SHA-256 hash
276 let hash = Sha256::digest(&dag_cbor);
277
278 // Create multihash (code 0x12 = sha2-256)
279 let multihash = multihash::Multihash::wrap(0x12, &hash)?;
280
281 // Create CIDv1 with dag-cbor codec (0x71)
282 Ok(IpldCid::new_v1(0x71, multihash))
283}
284
285/// Trait for converting lexicon types to object properties
286///
287/// This enables type-safe conversion between array items and object properties
288/// for unified validation logic.
289trait IntoObjectProperty<'a> {
290 /// Convert this type to an equivalent object property
291 fn into_object_property(self) -> LexObjectProperty<'a>;
292}
293
294impl<'a> IntoObjectProperty<'a> for LexArrayItem<'a> {
295 fn into_object_property(self) -> LexObjectProperty<'a> {
296 match self {
297 LexArrayItem::String(s) => LexObjectProperty::String(s),
298 LexArrayItem::Integer(i) => LexObjectProperty::Integer(i),
299 LexArrayItem::Boolean(b) => LexObjectProperty::Boolean(b),
300 LexArrayItem::Object(o) => LexObjectProperty::Object(o),
301 LexArrayItem::Unknown(u) => LexObjectProperty::Unknown(u),
302 LexArrayItem::Bytes(b) => LexObjectProperty::Bytes(b),
303 LexArrayItem::CidLink(c) => LexObjectProperty::CidLink(c),
304 LexArrayItem::Blob(b) => LexObjectProperty::Blob(b),
305 LexArrayItem::Ref(r) => LexObjectProperty::Ref(r),
306 LexArrayItem::Union(u) => LexObjectProperty::Union(u),
307 }
308 }
309}
310
311/// Result of validating Data against a schema
312///
313/// Distinguishes between structural errors (type mismatches, missing fields) and
314/// constraint violations (max_length, ranges, etc.).
315#[derive(Debug, Clone)]
316pub enum ValidationResult {
317 /// Only structural validation was performed (or data was structurally invalid)
318 StructuralOnly { structural: Vec<StructuralError> },
319 /// Both structural and constraint validation were performed
320 Complete {
321 structural: Vec<StructuralError>,
322 constraints: Vec<ConstraintError>,
323 },
324}
325
326impl ValidationResult {
327 /// Check if validation passed (no structural or constraint errors)
328 pub fn is_valid(&self) -> bool {
329 match self {
330 ValidationResult::StructuralOnly { structural } => structural.is_empty(),
331 ValidationResult::Complete {
332 structural,
333 constraints,
334 } => structural.is_empty() && constraints.is_empty(),
335 }
336 }
337
338 /// Check if structurally valid (ignoring constraint checks)
339 pub fn is_structurally_valid(&self) -> bool {
340 match self {
341 ValidationResult::StructuralOnly { structural } => structural.is_empty(),
342 ValidationResult::Complete { structural, .. } => structural.is_empty(),
343 }
344 }
345
346 /// Get structural errors
347 pub fn structural_errors(&self) -> &[StructuralError] {
348 match self {
349 ValidationResult::StructuralOnly { structural } => structural,
350 ValidationResult::Complete { structural, .. } => structural,
351 }
352 }
353
354 /// Get constraint errors
355 pub fn constraint_errors(&self) -> &[ConstraintError] {
356 match self {
357 ValidationResult::StructuralOnly { .. } => &[],
358 ValidationResult::Complete { constraints, .. } => constraints,
359 }
360 }
361
362 /// Check if there are any constraint violations
363 pub fn has_constraint_violations(&self) -> bool {
364 !self.constraint_errors().is_empty()
365 }
366
367 /// Get all errors (structural and constraint)
368 pub fn all_errors(&self) -> impl Iterator<Item = ValidationError> + '_ {
369 self.structural_errors()
370 .iter()
371 .cloned()
372 .map(ValidationError::Structural)
373 .chain(
374 self.constraint_errors()
375 .iter()
376 .cloned()
377 .map(ValidationError::Constraint),
378 )
379 }
380}
381
382/// Schema validator with caching
383///
384/// Validates Data values against lexicon schemas, caching results by content hash.
385pub struct SchemaValidator {
386 registry: SchemaRegistry,
387 cache: DashMap<ValidationCacheKey, Arc<ValidationResult>>,
388}
389
390static VALIDATOR: LazyLock<SchemaValidator> = LazyLock::new(|| SchemaValidator {
391 registry: SchemaRegistry::from_inventory(),
392 cache: DashMap::new(),
393});
394
395impl SchemaValidator {
396 /// Get the global validator instance
397 pub fn global() -> &'static Self {
398 &VALIDATOR
399 }
400
401 /// Create a new validator with empty registry
402 pub fn new() -> Self {
403 Self {
404 registry: SchemaRegistry::new(),
405 cache: DashMap::new(),
406 }
407 }
408
409 pub fn from_registry(registry: SchemaRegistry) -> Self {
410 Self {
411 registry,
412 cache: DashMap::new(),
413 }
414 }
415
416 /// Validate data against a schema (structural and constraints)
417 ///
418 /// Performs both structural validation (types, required fields) and constraint
419 /// validation (max_length, ranges, etc.). Results are cached by content hash.
420 pub fn validate<T: crate::schema::LexiconSchema>(
421 &self,
422 data: &Data,
423 ) -> Result<ValidationResult, CidComputationError> {
424 // Compute cache key
425 let key = ValidationCacheKey::from_data::<T>(data)?;
426
427 // Check cache (clone Arc immediately to avoid holding ref)
428 if let Some(cached) = self.cache.get(&key).map(|r| Arc::clone(&r)) {
429 return Ok((*cached).clone());
430 }
431
432 // Perform validation
433 let result = self.validate_uncached::<T>(data);
434
435 // Cache result
436 self.cache.insert(key, Arc::new(result.clone()));
437
438 Ok(result)
439 }
440
441 /// Validate only the structural aspects of data against a schema
442 ///
443 /// Only checks types, required fields, and schema structure. Does not check
444 /// constraints like max_length, ranges, etc. This is faster when you only
445 /// care about type correctness.
446 pub fn validate_structural<T: crate::schema::LexiconSchema>(
447 &self,
448 data: &Data,
449 ) -> ValidationResult {
450 self.validate_structural_uncached::<T>(data)
451 }
452
453 /// Validate without caching (internal)
454 fn validate_uncached<T: crate::schema::LexiconSchema>(&self, data: &Data) -> ValidationResult {
455 let def = match self.registry.get_def(T::nsid(), T::def_name()) {
456 Some(d) => d,
457 None => {
458 // Schema not found - this is a structural error
459 return ValidationResult::StructuralOnly {
460 structural: vec![StructuralError::UnresolvedRef {
461 path: ValidationPath::new(),
462 ref_nsid: format!("{}#{}", T::nsid(), T::def_name()).into(),
463 }],
464 };
465 }
466 };
467
468 let mut path = ValidationPath::new();
469 let mut ctx = ValidationContext::new(T::nsid(), T::def_name());
470
471 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx);
472
473 // If structurally invalid, return structural errors only
474 if !structural_errors.is_empty() {
475 return ValidationResult::StructuralOnly {
476 structural: structural_errors,
477 };
478 }
479
480 // Structurally valid - compute constraints eagerly
481 let mut path = ValidationPath::new();
482 let constraint_errors = validate_constraints(
483 &mut path,
484 data,
485 T::nsid(),
486 T::def_name(),
487 Some(&Arc::new(self.registry.clone())),
488 );
489
490 ValidationResult::Complete {
491 structural: structural_errors,
492 constraints: constraint_errors,
493 }
494 }
495
496 /// Validate structural aspects only without caching (internal)
497 fn validate_structural_uncached<T: crate::schema::LexiconSchema>(
498 &self,
499 data: &Data,
500 ) -> ValidationResult {
501 let def = match self.registry.get_def(T::nsid(), T::def_name()) {
502 Some(d) => d,
503 None => {
504 // Schema not found - this is a structural error
505 return ValidationResult::StructuralOnly {
506 structural: vec![StructuralError::UnresolvedRef {
507 path: ValidationPath::new(),
508 ref_nsid: format!("{}#{}", T::nsid(), T::def_name()).into(),
509 }],
510 };
511 }
512 };
513
514 let mut path = ValidationPath::new();
515 let mut ctx = ValidationContext::new(T::nsid(), T::def_name());
516
517 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx);
518
519 ValidationResult::StructuralOnly {
520 structural: structural_errors,
521 }
522 }
523
524 pub fn validate_by_nsid_structural(&self, nsid: &str, data: &Data) -> ValidationResult {
525 let mut split = nsid.split('#');
526 let nsid = split.next().unwrap();
527 let def_name = split.next().unwrap_or("main");
528 let def = match self.registry.get_def(nsid, def_name) {
529 Some(d) => d,
530 None => {
531 // Schema not found - this is a structural error
532 return ValidationResult::StructuralOnly {
533 structural: vec![StructuralError::UnresolvedRef {
534 path: ValidationPath::new(),
535 ref_nsid: format!("{}#{}", nsid, def_name).into(),
536 }],
537 };
538 }
539 };
540
541 let mut path = ValidationPath::new();
542 let mut ctx = ValidationContext::new(nsid, def_name);
543
544 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx);
545
546 ValidationResult::StructuralOnly {
547 structural: structural_errors,
548 }
549 }
550
551 pub fn validate_by_nsid(&self, nsid: &str, data: &Data) -> ValidationResult {
552 let mut split = nsid.split('#');
553 let nsid = split.next().unwrap();
554 let def_name = split.next().unwrap_or("main");
555 let def = match self.registry.get_def(nsid, def_name) {
556 Some(d) => d,
557 None => {
558 // Schema not found - this is a structural error
559 return ValidationResult::StructuralOnly {
560 structural: vec![StructuralError::UnresolvedRef {
561 path: ValidationPath::new(),
562 ref_nsid: format!("{}#{}", nsid, def_name).into(),
563 }],
564 };
565 }
566 };
567
568 let mut path = ValidationPath::new();
569 let mut ctx = ValidationContext::new(nsid, def_name);
570
571 let structural_errors = validate_def(&mut path, data, &def, &self.registry, &mut ctx);
572
573 // If structurally invalid, return structural errors only
574 if !structural_errors.is_empty() {
575 return ValidationResult::StructuralOnly {
576 structural: structural_errors,
577 };
578 }
579
580 // Structurally valid - compute constraints eagerly
581 let mut path = ValidationPath::new();
582 let constraint_errors = validate_constraints(
583 &mut path,
584 data,
585 nsid,
586 def_name,
587 Some(&Arc::new(self.registry.clone())),
588 );
589
590 ValidationResult::Complete {
591 structural: structural_errors,
592 constraints: constraint_errors,
593 }
594 }
595
596 /// Get the schema registry
597 pub fn registry(&self) -> &SchemaRegistry {
598 &self.registry
599 }
600}
601
602impl Default for SchemaValidator {
603 fn default() -> Self {
604 Self::new()
605 }
606}
607
608/// Validation context for tracking refs and preventing cycles
609struct ValidationContext {
610 current_nsid: String,
611 current_def: String,
612 ref_stack: Vec<String>,
613 max_depth: usize,
614}
615
616impl ValidationContext {
617 fn new(nsid: &str, def_name: &str) -> Self {
618 Self {
619 current_nsid: nsid.to_string(),
620 current_def: def_name.to_string(),
621 ref_stack: Vec::new(),
622 max_depth: 32,
623 }
624 }
625}
626
627/// Validate data against a lexicon def
628fn validate_def(
629 path: &mut ValidationPath,
630 data: &Data,
631 def: &crate::lexicon::LexUserType,
632 registry: &SchemaRegistry,
633 ctx: &mut ValidationContext,
634) -> Vec<StructuralError> {
635 use crate::lexicon::LexUserType;
636 use jacquard_common::types::DataModelType;
637
638 match def {
639 LexUserType::Object(obj) => {
640 // Must be an object
641 let Data::Object(obj_data) = data else {
642 return vec![StructuralError::TypeMismatch {
643 path: path.clone(),
644 expected: DataModelType::Object,
645 actual: data.data_type(),
646 }];
647 };
648
649 let mut errors = Vec::new();
650
651 // Check required fields
652 if let Some(required) = &obj.required {
653 for field in required {
654 if !obj_data.get(field.as_ref()).is_some() {
655 errors.push(StructuralError::MissingRequiredField {
656 path: path.clone(),
657 field: field.clone(),
658 });
659 }
660 }
661 }
662
663 // Validate each property that's present
664 for (name, prop) in &obj.properties {
665 if let Some(field_data) = obj_data.get(name.as_ref()) {
666 path.push_field(name.as_ref());
667 errors.extend(validate_property(path, field_data, prop, registry, ctx));
668 path.pop();
669 }
670 }
671
672 errors
673 }
674 LexUserType::Record(rec) => {
675 // Records are objects with record-specific metadata
676 let crate::lexicon::LexRecordRecord::Object(obj) = &rec.record;
677
678 let Data::Object(obj_data) = data else {
679 return vec![StructuralError::TypeMismatch {
680 path: path.clone(),
681 expected: data.data_type(),
682 actual: DataModelType::Object,
683 }];
684 };
685
686 let mut errors = Vec::new();
687
688 // Check required fields
689 if let Some(required) = &obj.required {
690 for field in required {
691 if !obj_data.get(field.as_ref()).is_some() {
692 errors.push(StructuralError::MissingRequiredField {
693 path: path.clone(),
694 field: field.clone(),
695 });
696 }
697 }
698 }
699
700 // Validate each property that's present
701 for (name, prop) in &obj.properties {
702 if let Some(field_data) = obj_data.get(name.as_ref()) {
703 path.push_field(name.as_ref());
704 errors.extend(validate_property(path, field_data, prop, registry, ctx));
705 path.pop();
706 }
707 }
708
709 errors
710 }
711 // Token types are unit types, no validation needed beyond type checking
712 LexUserType::Token(_) => Vec::new(),
713 // XRPC types are endpoint definitions, not data types
714 LexUserType::XrpcQuery(_)
715 | LexUserType::XrpcProcedure(_)
716 | LexUserType::XrpcSubscription(_) => Vec::new(),
717 // Other types
718 _ => Vec::new(),
719 }
720}
721
722/// Validate data against a property schema
723fn validate_property(
724 path: &mut ValidationPath,
725 data: &Data,
726 prop: &crate::lexicon::LexObjectProperty,
727 registry: &SchemaRegistry,
728 ctx: &mut ValidationContext,
729) -> Vec<StructuralError> {
730 use crate::lexicon::LexObjectProperty;
731 use jacquard_common::types::DataModelType;
732
733 match prop {
734 LexObjectProperty::String(_) => {
735 // Accept any string type
736 if !matches!(data.data_type(), DataModelType::String(_)) {
737 vec![StructuralError::TypeMismatch {
738 path: path.clone(),
739 expected: DataModelType::String(
740 jacquard_common::types::LexiconStringType::String,
741 ),
742 actual: data.data_type(),
743 }]
744 } else {
745 Vec::new()
746 }
747 }
748
749 LexObjectProperty::Integer(_) => {
750 if !matches!(data.data_type(), DataModelType::Integer) {
751 vec![StructuralError::TypeMismatch {
752 path: path.clone(),
753 expected: DataModelType::Integer,
754 actual: data.data_type(),
755 }]
756 } else {
757 Vec::new()
758 }
759 }
760
761 LexObjectProperty::Boolean(_) => {
762 if !matches!(data.data_type(), DataModelType::Boolean) {
763 vec![StructuralError::TypeMismatch {
764 path: path.clone(),
765 expected: DataModelType::Boolean,
766 actual: data.data_type(),
767 }]
768 } else {
769 Vec::new()
770 }
771 }
772
773 LexObjectProperty::Object(obj) => {
774 let Data::Object(obj_data) = data else {
775 return vec![StructuralError::TypeMismatch {
776 path: path.clone(),
777 expected: DataModelType::Object,
778 actual: data.data_type(),
779 }];
780 };
781
782 let mut errors = Vec::new();
783
784 // Check required fields
785 if let Some(required) = &obj.required {
786 for field in required {
787 if !obj_data.get(field.as_ref()).is_some() {
788 errors.push(StructuralError::MissingRequiredField {
789 path: path.clone(),
790 field: field.clone(),
791 });
792 }
793 }
794 }
795
796 // Recursively validate each property
797 for (name, schema_prop) in &obj.properties {
798 if let Some(field_data) = obj_data.get(name.as_ref()) {
799 path.push_field(name.as_ref());
800 errors.extend(validate_property(
801 path,
802 field_data,
803 schema_prop,
804 registry,
805 ctx,
806 ));
807 path.pop();
808 }
809 }
810
811 errors
812 }
813
814 LexObjectProperty::Array(arr) => {
815 let Data::Array(array) = data else {
816 return vec![StructuralError::TypeMismatch {
817 path: path.clone(),
818 expected: DataModelType::Array,
819 actual: data.data_type(),
820 }];
821 };
822
823 let mut errors = Vec::new();
824 for (idx, item) in array.iter().enumerate() {
825 path.push_index(idx);
826 errors.extend(validate_array_item(path, item, &arr.items, registry, ctx));
827 path.pop();
828 }
829 errors
830 }
831
832 LexObjectProperty::Union(u) => {
833 let Data::Object(obj) = data else {
834 return vec![StructuralError::TypeMismatch {
835 path: path.clone(),
836 expected: DataModelType::Object,
837 actual: data.data_type(),
838 }];
839 };
840
841 // Get $type discriminator
842 let Some(type_str) = obj.type_discriminator() else {
843 return vec![StructuralError::MissingUnionDiscriminator { path: path.clone() }];
844 };
845
846 // Reject empty $type
847 if type_str.is_empty() {
848 return vec![StructuralError::MissingUnionDiscriminator { path: path.clone() }];
849 }
850
851 // Try to match against refs
852 for variant_ref in &u.refs {
853 let ref_path = RefPath::parse(variant_ref.as_ref(), Some(&ctx.current_nsid));
854 let variant_nsid = ref_path.nsid().to_string();
855 let variant_def = ref_path.def().to_string();
856 let full_variant = ref_path.full_ref();
857
858 // Match by full ref or just nsid
859 if type_str == full_variant || type_str == variant_nsid {
860 // Found match - validate against this variant
861 let Some(variant_def_type) = registry.get_def(&variant_nsid, &variant_def)
862 else {
863 return vec![StructuralError::UnresolvedRef {
864 path: path.clone(),
865 ref_nsid: full_variant.into(),
866 }];
867 };
868
869 path.push_variant(type_str);
870 let old_nsid = std::mem::replace(&mut ctx.current_nsid, variant_nsid);
871 let old_def = std::mem::replace(&mut ctx.current_def, variant_def);
872
873 let errors = validate_def(path, data, &variant_def_type, registry, ctx);
874
875 ctx.current_nsid = old_nsid;
876 ctx.current_def = old_def;
877 path.pop();
878
879 return errors;
880 }
881 }
882
883 // No match found
884 if u.closed.unwrap_or(false) {
885 // Closed union - this is an error
886 let expected_refs = u
887 .refs
888 .iter()
889 .map(|r| r.as_ref())
890 .collect::<Vec<_>>()
891 .join(", ");
892 vec![StructuralError::UnionNoMatch {
893 path: path.clone(),
894 actual_type: type_str.into(),
895 expected_refs: expected_refs.into(),
896 }]
897 } else {
898 // Open union - allow unknown variants
899 Vec::new()
900 }
901 }
902
903 LexObjectProperty::Ref(r) => {
904 // Depth check
905 if path.depth() >= ctx.max_depth {
906 return vec![StructuralError::MaxDepthExceeded {
907 path: path.clone(),
908 max: ctx.max_depth,
909 }];
910 }
911
912 // Normalize ref
913 let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(&ctx.current_nsid));
914 let ref_nsid = ref_path.nsid().to_string();
915 let ref_def = ref_path.def().to_string();
916 let full_ref = ref_path.full_ref();
917
918 // Cycle detection
919 if ctx.ref_stack.contains(&full_ref) {
920 let stack = ctx.ref_stack.join(" -> ");
921 return vec![StructuralError::RefCycle {
922 path: path.clone(),
923 ref_nsid: full_ref.into(),
924 stack: stack.into(),
925 }];
926 }
927
928 // Look up ref
929 let Some(ref_def_type) = registry.get_def(&ref_nsid, &ref_def) else {
930 return vec![StructuralError::UnresolvedRef {
931 path: path.clone(),
932 ref_nsid: full_ref.into(),
933 }];
934 };
935
936 // Push, validate, pop
937 ctx.ref_stack.push(full_ref);
938 let old_nsid = std::mem::replace(&mut ctx.current_nsid, ref_nsid);
939 let old_def = std::mem::replace(&mut ctx.current_def, ref_def);
940
941 let errors = validate_def(path, data, &ref_def_type, registry, ctx);
942
943 ctx.current_nsid = old_nsid;
944 ctx.current_def = old_def;
945 ctx.ref_stack.pop();
946
947 errors
948 }
949
950 LexObjectProperty::Bytes(_) => {
951 if !matches!(data.data_type(), DataModelType::Bytes) {
952 vec![StructuralError::TypeMismatch {
953 path: path.clone(),
954 expected: DataModelType::Bytes,
955 actual: data.data_type(),
956 }]
957 } else {
958 Vec::new()
959 }
960 }
961
962 LexObjectProperty::CidLink(_) => {
963 if !matches!(data.data_type(), DataModelType::CidLink) {
964 vec![StructuralError::TypeMismatch {
965 path: path.clone(),
966 expected: DataModelType::CidLink,
967 actual: data.data_type(),
968 }]
969 } else {
970 Vec::new()
971 }
972 }
973
974 LexObjectProperty::Blob(_) => {
975 if !matches!(data.data_type(), DataModelType::Blob) {
976 vec![StructuralError::TypeMismatch {
977 path: path.clone(),
978 expected: DataModelType::Blob,
979 actual: data.data_type(),
980 }]
981 } else {
982 Vec::new()
983 }
984 }
985
986 LexObjectProperty::Unknown(_) => {
987 // Any type allowed
988 Vec::new()
989 }
990 }
991}
992
993/// Validate array item against array item schema
994fn validate_array_item(
995 path: &mut ValidationPath,
996 data: &Data,
997 item_schema: &LexArrayItem,
998 registry: &SchemaRegistry,
999 ctx: &mut ValidationContext,
1000) -> Vec<StructuralError> {
1001 validate_property(
1002 path,
1003 data,
1004 &item_schema.clone().into_object_property(),
1005 registry,
1006 ctx,
1007 )
1008}
1009
1010// ============================================================================
1011// CONSTRAINT VALIDATION
1012// ============================================================================
1013
1014/// Validate constraints on data against schema (entry point with optional registry)
1015fn validate_constraints(
1016 path: &mut ValidationPath,
1017 data: &Data,
1018 nsid: &str,
1019 def_name: &str,
1020 registry: Option<&Arc<SchemaRegistry>>,
1021) -> Vec<ConstraintError> {
1022 // Use provided registry or fall back to global inventory
1023 let fallback_registry;
1024 let registry_ref = match registry {
1025 Some(r) => r.as_ref(),
1026 None => {
1027 fallback_registry = SchemaRegistry::from_inventory();
1028 &fallback_registry
1029 }
1030 };
1031
1032 validate_constraints_impl(path, data, nsid, def_name, registry_ref)
1033}
1034
1035/// Internal implementation that takes materialized registry
1036fn validate_constraints_impl(
1037 path: &mut ValidationPath,
1038 data: &Data,
1039 nsid: &str,
1040 def_name: &str,
1041 registry: &SchemaRegistry,
1042) -> Vec<ConstraintError> {
1043 use crate::lexicon::LexUserType;
1044
1045 // Get schema def
1046 let Some(def) = registry.get_def(nsid, def_name) else {
1047 return Vec::new();
1048 };
1049
1050 match def {
1051 LexUserType::Object(obj) => {
1052 let Data::Object(obj_data) = data else {
1053 return Vec::new();
1054 };
1055
1056 let mut errors = Vec::new();
1057
1058 // Check constraints on each property
1059 for (name, prop) in &obj.properties {
1060 if let Some(field_data) = obj_data.get(name.as_ref()) {
1061 path.push_field(name.as_ref());
1062 errors.extend(check_property_constraints(
1063 path, field_data, prop, nsid, registry,
1064 ));
1065 path.pop();
1066 }
1067 }
1068
1069 errors
1070 }
1071 LexUserType::Record(rec) => {
1072 // Records are objects with record-specific metadata
1073 let crate::lexicon::LexRecordRecord::Object(obj) = &rec.record;
1074
1075 let Data::Object(obj_data) = data else {
1076 return Vec::new();
1077 };
1078
1079 let mut errors = Vec::new();
1080
1081 // Check constraints on each property
1082 for (name, prop) in &obj.properties {
1083 if let Some(field_data) = obj_data.get(name.as_ref()) {
1084 path.push_field(name.as_ref());
1085 errors.extend(check_property_constraints(
1086 path, field_data, prop, nsid, registry,
1087 ));
1088 path.pop();
1089 }
1090 }
1091
1092 errors
1093 }
1094 // Token types, XRPC types, and other types don't have constraints
1095 _ => Vec::new(),
1096 }
1097}
1098
1099/// Check constraints on a property
1100fn check_property_constraints(
1101 path: &mut ValidationPath,
1102 data: &Data,
1103 prop: &crate::lexicon::LexObjectProperty,
1104 current_nsid: &str,
1105 registry: &SchemaRegistry,
1106) -> Vec<ConstraintError> {
1107 use crate::lexicon::LexObjectProperty;
1108
1109 match prop {
1110 LexObjectProperty::String(s) => {
1111 if let Data::String(str_val) = data {
1112 check_string_constraints(path, str_val.as_str(), s)
1113 } else {
1114 Vec::new()
1115 }
1116 }
1117
1118 LexObjectProperty::Integer(i) => {
1119 if let Data::Integer(int_val) = data {
1120 check_integer_constraints(path, *int_val, i)
1121 } else {
1122 Vec::new()
1123 }
1124 }
1125
1126 LexObjectProperty::Array(arr) => {
1127 if let Data::Array(array) = data {
1128 let mut errors = check_array_constraints(path, array, arr);
1129
1130 // Also check constraints on array items
1131 for (idx, item) in array.iter().enumerate() {
1132 path.push_index(idx);
1133 errors.extend(check_array_item_constraints(
1134 path,
1135 item,
1136 &arr.items,
1137 current_nsid,
1138 registry,
1139 ));
1140 path.pop();
1141 }
1142
1143 errors
1144 } else {
1145 Vec::new()
1146 }
1147 }
1148
1149 LexObjectProperty::Object(obj) => {
1150 if let Data::Object(obj_data) = data {
1151 let mut errors = Vec::new();
1152
1153 // Recursively check nested object properties
1154 for (name, schema_prop) in &obj.properties {
1155 if let Some(field_data) = obj_data.get(name.as_ref()) {
1156 path.push_field(name.as_ref());
1157 errors.extend(check_property_constraints(
1158 path,
1159 field_data,
1160 schema_prop,
1161 current_nsid,
1162 registry,
1163 ));
1164 path.pop();
1165 }
1166 }
1167
1168 errors
1169 } else {
1170 Vec::new()
1171 }
1172 }
1173
1174 LexObjectProperty::Ref(r) => {
1175 // Follow ref and check constraints
1176 let ref_path = RefPath::parse(r.r#ref.as_ref(), Some(current_nsid));
1177 let ref_nsid = ref_path.nsid();
1178 let ref_def = ref_path.def();
1179
1180 if registry.get_def(ref_nsid, ref_def).is_some() {
1181 validate_constraints_impl(path, data, ref_nsid, ref_def, registry)
1182 } else {
1183 Vec::new()
1184 }
1185 }
1186
1187 // Other property types don't have constraints
1188 _ => Vec::new(),
1189 }
1190}
1191
1192/// Check string constraints
1193fn check_string_constraints(
1194 path: &ValidationPath,
1195 value: &str,
1196 schema: &crate::lexicon::LexString,
1197) -> Vec<ConstraintError> {
1198 let mut errors = Vec::new();
1199
1200 // Check byte length constraints
1201 let byte_len = value.len();
1202
1203 if let Some(min) = schema.min_length {
1204 if byte_len < min as usize {
1205 errors.push(ConstraintError::MinLength {
1206 path: path.clone(),
1207 min: min as usize,
1208 actual: byte_len,
1209 });
1210 }
1211 }
1212
1213 if let Some(max) = schema.max_length {
1214 if byte_len > max as usize {
1215 errors.push(ConstraintError::MaxLength {
1216 path: path.clone(),
1217 max: max as usize,
1218 actual: byte_len,
1219 });
1220 }
1221 }
1222
1223 // Check grapheme count constraints
1224 if schema.min_graphemes.is_some() || schema.max_graphemes.is_some() {
1225 use unicode_segmentation::UnicodeSegmentation;
1226 let grapheme_count = value.graphemes(true).count();
1227
1228 if let Some(min) = schema.min_graphemes {
1229 if grapheme_count < min as usize {
1230 errors.push(ConstraintError::MinGraphemes {
1231 path: path.clone(),
1232 min: min as usize,
1233 actual: grapheme_count,
1234 });
1235 }
1236 }
1237
1238 if let Some(max) = schema.max_graphemes {
1239 if grapheme_count > max as usize {
1240 errors.push(ConstraintError::MaxGraphemes {
1241 path: path.clone(),
1242 max: max as usize,
1243 actual: grapheme_count,
1244 });
1245 }
1246 }
1247 }
1248
1249 errors
1250}
1251
1252/// Check integer constraints
1253fn check_integer_constraints(
1254 path: &ValidationPath,
1255 value: i64,
1256 schema: &crate::lexicon::LexInteger,
1257) -> Vec<ConstraintError> {
1258 let mut errors = Vec::new();
1259
1260 if let Some(min) = schema.minimum {
1261 if value < min {
1262 errors.push(ConstraintError::Minimum {
1263 path: path.clone(),
1264 min,
1265 actual: value,
1266 });
1267 }
1268 }
1269
1270 if let Some(max) = schema.maximum {
1271 if value > max {
1272 errors.push(ConstraintError::Maximum {
1273 path: path.clone(),
1274 max,
1275 actual: value,
1276 });
1277 }
1278 }
1279
1280 errors
1281}
1282
1283/// Check array length constraints
1284fn check_array_constraints(
1285 path: &ValidationPath,
1286 array: &jacquard_common::types::value::Array,
1287 schema: &crate::lexicon::LexArray,
1288) -> Vec<ConstraintError> {
1289 let mut errors = Vec::new();
1290 let len = array.len();
1291
1292 if let Some(min) = schema.min_length {
1293 if len < min as usize {
1294 errors.push(ConstraintError::MinLength {
1295 path: path.clone(),
1296 min: min as usize,
1297 actual: len,
1298 });
1299 }
1300 }
1301
1302 if let Some(max) = schema.max_length {
1303 if len > max as usize {
1304 errors.push(ConstraintError::MaxLength {
1305 path: path.clone(),
1306 max: max as usize,
1307 actual: len,
1308 });
1309 }
1310 }
1311
1312 errors
1313}
1314
1315/// Check constraints on array items
1316fn check_array_item_constraints(
1317 path: &mut ValidationPath,
1318 data: &Data,
1319 item_schema: &LexArrayItem,
1320 current_nsid: &str,
1321 registry: &SchemaRegistry,
1322) -> Vec<ConstraintError> {
1323 check_property_constraints(
1324 path,
1325 data,
1326 &item_schema.clone().into_object_property(),
1327 current_nsid,
1328 registry,
1329 )
1330}
1331
1332#[cfg(test)]
1333mod tests;