A better Rust ATProto crate
1

Configure Feed

Select the types of activity you want to include in your feed.

at main 17 kB View raw
1use crate::error::{CodegenError, Result}; 2use crate::lexicon::{LexUserType, LexiconDoc}; 3use crate::ref_utils::RefPath; 4use jacquard_common::{deps::smol_str::SmolStr, into_static::IntoStatic}; 5use std::collections::BTreeMap; 6use std::fs; 7use std::path::Path; 8 9/// Check if content looks like a lexicon file. 10/// 11/// A file is considered a lexicon if it contains a `"lexicon"` key at the top level 12/// or one level down (for some wrapper formats). This allows us to distinguish 13/// "not a lexicon at all" (skip silently) from "broken lexicon" (report error). 14fn is_lexicon_content(content: &str) -> bool { 15 // Quick string scan first (fast path for non-JSON or unrelated JSON) 16 if !content.contains("\"lexicon\"") { 17 return false; 18 } 19 20 // Parse to Value and check structure 21 if let Ok(value) = serde_json::from_str::<serde_json::Value>(content) { 22 // Top-level lexicon field 23 if value.get("lexicon").is_some() { 24 return true; 25 } 26 // One level down (some wrapper formats) 27 if let Some(obj) = value.as_object() { 28 for v in obj.values() { 29 if v.get("lexicon").is_some() { 30 return true; 31 } 32 } 33 } 34 } 35 false 36} 37 38/// Raw lexicon doc for two-phase parsing - defs are kept as raw JSON Values 39/// so we can deserialize each separately with better error tracking. 40#[derive(Debug, serde::Deserialize)] 41struct RawLexiconDoc<'s> { 42 pub lexicon: crate::lexicon::Lexicon, 43 #[serde(borrow)] 44 pub id: jacquard_common::CowStr<'s>, 45 pub revision: Option<u32>, 46 #[serde(borrow)] 47 pub description: Option<jacquard_common::CowStr<'s>>, 48 pub defs: BTreeMap<SmolStr, serde_json::Value>, 49} 50 51/// Helper to create a parse error with path context. 52fn make_parse_error( 53 file_path: &Path, 54 json_path: &str, 55 message: String, 56 content: &str, 57) -> CodegenError { 58 CodegenError::ParseError { 59 path: file_path.to_path_buf(), 60 json_path: Some(json_path.to_string()), 61 message, 62 src: Some(content.to_string()), 63 span: None, 64 } 65} 66 67/// Recursively parse properties with path tracking. 68/// Returns parsed properties or an error with the full path. 69fn parse_properties_deep( 70 props_value: &serde_json::Value, 71 base_path: &str, 72 file_path: &Path, 73 content: &str, 74) -> std::result::Result<BTreeMap<SmolStr, crate::lexicon::LexObjectProperty<'static>>, CodegenError> 75{ 76 let props_obj = props_value.as_object().ok_or_else(|| { 77 make_parse_error( 78 file_path, 79 base_path, 80 "expected object for properties".to_string(), 81 content, 82 ) 83 })?; 84 85 let mut parsed_props = BTreeMap::new(); 86 for (prop_name, prop_value) in props_obj { 87 let prop_path = format!("{}.{}", base_path, prop_name); 88 89 // Try to parse this property 90 let parsed: crate::lexicon::LexObjectProperty = 91 serde_path_to_error::deserialize(prop_value).map_err(|e| { 92 let inner_path = e.path().to_string(); 93 let full_path = if inner_path.is_empty() { 94 prop_path.clone() 95 } else { 96 format!("{}.{}", prop_path, inner_path) 97 }; 98 make_parse_error(file_path, &full_path, e.inner().to_string(), content) 99 })?; 100 101 parsed_props.insert(SmolStr::new(prop_name), parsed.into_static()); 102 } 103 104 Ok(parsed_props) 105} 106 107/// Parse an object-like def with deep property tracking. 108fn parse_object_deep( 109 value: &serde_json::Value, 110 base_path: &str, 111 file_path: &Path, 112 content: &str, 113) -> std::result::Result<crate::lexicon::LexObject<'static>, CodegenError> { 114 use crate::lexicon::LexObject; 115 116 let obj = value.as_object().ok_or_else(|| { 117 make_parse_error(file_path, base_path, "expected object".to_string(), content) 118 })?; 119 120 // Parse properties deeply if present 121 let properties = if let Some(props) = obj.get("properties") { 122 let props_path = format!("{}.properties", base_path); 123 parse_properties_deep(props, &props_path, file_path, content)? 124 } else { 125 BTreeMap::new() 126 }; 127 128 // Parse the rest of the object normally 129 let description = obj 130 .get("description") 131 .and_then(|v| v.as_str()) 132 .map(|s| jacquard_common::CowStr::copy_from_str(s)); 133 let required: Option<Vec<SmolStr>> = obj 134 .get("required") 135 .map(|v| serde_json::from_value(v.clone())) 136 .transpose() 137 .map_err(|e| { 138 make_parse_error( 139 file_path, 140 &format!("{}.required", base_path), 141 e.to_string(), 142 content, 143 ) 144 })?; 145 let nullable: Option<Vec<SmolStr>> = obj 146 .get("nullable") 147 .map(|v| serde_json::from_value(v.clone())) 148 .transpose() 149 .map_err(|e| { 150 make_parse_error( 151 file_path, 152 &format!("{}.nullable", base_path), 153 e.to_string(), 154 content, 155 ) 156 })?; 157 158 Ok(LexObject { 159 description, 160 required, 161 nullable, 162 properties, 163 }) 164} 165 166/// Parse a def with deep path tracking for nested structures. 167fn parse_def_deep( 168 def_name: &str, 169 value: &serde_json::Value, 170 file_path: &Path, 171 content: &str, 172) -> std::result::Result<LexUserType<'static>, CodegenError> { 173 let base_path = format!("defs.{}", def_name); 174 175 // Check the type field to determine how to parse 176 let type_str = value 177 .get("type") 178 .and_then(|v| v.as_str()) 179 .unwrap_or("object"); 180 181 match type_str { 182 "object" => { 183 let obj = parse_object_deep(value, &base_path, file_path, content)?; 184 Ok(LexUserType::Object(obj)) 185 } 186 "record" => { 187 // Records have a nested record.properties structure 188 if let Some(record_value) = value.get("record") { 189 let record_path = format!("{}.record", base_path); 190 let inner_obj = parse_object_deep(record_value, &record_path, file_path, content)?; 191 192 // Parse the rest of the record 193 let obj = value.as_object().ok_or_else(|| { 194 make_parse_error( 195 file_path, 196 &base_path, 197 "expected object".to_string(), 198 content, 199 ) 200 })?; 201 202 let description = obj 203 .get("description") 204 .and_then(|v| v.as_str()) 205 .map(|s| jacquard_common::CowStr::copy_from_str(s)); 206 let key: Option<jacquard_common::CowStr<'static>> = obj 207 .get("key") 208 .and_then(|v| v.as_str()) 209 .map(|s| jacquard_common::CowStr::copy_from_str(s)); 210 211 Ok(LexUserType::Record(crate::lexicon::LexRecord { 212 description, 213 key, 214 record: crate::lexicon::LexRecordRecord::Object(inner_obj), 215 })) 216 } else { 217 // Fallback to normal parsing if no record field 218 serde_path_to_error::deserialize(value) 219 .map(|v: LexUserType| v.into_static()) 220 .map_err(|e| { 221 make_parse_error(file_path, &base_path, e.inner().to_string(), content) 222 }) 223 } 224 } 225 // For other types (query, procedure, etc.), use the simpler approach for now 226 // Could be extended later 227 _ => serde_path_to_error::deserialize(value) 228 .map(|v: LexUserType| v.into_static()) 229 .map_err(|e| { 230 let inner_path = e.path().to_string(); 231 let full_path = if inner_path.is_empty() { 232 base_path 233 } else { 234 format!("{}.{}", base_path, inner_path) 235 }; 236 make_parse_error(file_path, &full_path, e.inner().to_string(), content) 237 }), 238 } 239} 240 241/// Parse a lexicon with rich error context using deep recursive parsing. 242/// 243/// This parses the document structure recursively, tracking paths through: 244/// - defs → def_name → properties → prop_name → nested fields 245/// 246/// This gives us detailed error paths like "defs.main.properties.count.default" 247fn parse_lexicon_with_context( 248 content: &str, 249 path: &Path, 250) -> std::result::Result<LexiconDoc<'static>, CodegenError> { 251 // Phase 1: Parse the top-level structure with defs as raw Values 252 let raw_doc: RawLexiconDoc = 253 serde_json::from_str(content).map_err(|e| CodegenError::ParseError { 254 path: path.to_path_buf(), 255 json_path: None, 256 message: e.to_string(), 257 src: Some(content.to_string()), 258 span: None, 259 })?; 260 261 // Phase 2: Parse each def with deep path tracking 262 let mut parsed_defs = BTreeMap::new(); 263 for (def_name, def_value) in raw_doc.defs { 264 let parsed_def = parse_def_deep(&def_name, &def_value, path, content)?; 265 parsed_defs.insert(def_name, parsed_def); 266 } 267 268 // Reconstruct the full LexiconDoc 269 Ok(LexiconDoc { 270 lexicon: raw_doc.lexicon, 271 id: raw_doc.id.into_static(), 272 revision: raw_doc.revision, 273 description: raw_doc.description.map(|d| d.into_static()), 274 defs: parsed_defs, 275 }) 276} 277 278/// Registry of all loaded lexicons for reference resolution 279#[derive(Debug, Clone)] 280pub struct LexiconCorpus { 281 /// Map from NSID to lexicon document 282 docs: BTreeMap<SmolStr, LexiconDoc<'static>>, 283 /// Map from NSID to original source text (for error reporting) 284 sources: BTreeMap<SmolStr, String>, 285} 286 287impl LexiconCorpus { 288 /// Create an empty corpus 289 pub fn new() -> Self { 290 Self { 291 docs: BTreeMap::new(), 292 sources: BTreeMap::new(), 293 } 294 } 295 296 /// Load all lexicons from a directory 297 pub fn load_from_dir(path: impl AsRef<Path>) -> Result<Self> { 298 let mut corpus = Self::new(); 299 300 let schemas = crate::fs::find_schemas(path.as_ref())?; 301 for schema_path in schemas { 302 let content = fs::read_to_string(schema_path.as_ref())?; 303 304 // Check if this file is trying to be a lexicon 305 if !is_lexicon_content(&content) { 306 // Not a lexicon, skip silently 307 continue; 308 } 309 310 // This IS a lexicon - parse with good error reporting 311 let doc = parse_lexicon_with_context(&content, schema_path.as_ref())?; 312 313 let nsid = SmolStr::from(doc.id.to_string()); 314 corpus.docs.insert(nsid.clone(), doc); 315 corpus.sources.insert(nsid, content); 316 } 317 318 Ok(corpus) 319 } 320 321 /// Get a lexicon document by NSID 322 pub fn get(&self, nsid: &str) -> Option<&LexiconDoc<'static>> { 323 self.docs.get(nsid) 324 } 325 326 /// Get the source text for a lexicon by NSID 327 pub fn get_source(&self, nsid: &str) -> Option<&str> { 328 self.sources.get(nsid).map(|s| s.as_str()) 329 } 330 331 /// Resolve a reference, handling fragments 332 /// 333 /// Examples: 334 /// - `app.bsky.feed.post` → main def from that lexicon 335 /// - `app.bsky.feed.post#replyRef` → replyRef def from that lexicon 336 pub fn resolve_ref( 337 &self, 338 ref_str: &str, 339 ) -> Option<(&LexiconDoc<'static>, &LexUserType<'static>)> { 340 let ref_path = RefPath::parse(ref_str, None); 341 let doc = self.get(ref_path.nsid())?; 342 let def = doc.defs.get(ref_path.def())?; 343 Some((doc, def)) 344 } 345 346 /// Check if a reference exists 347 pub fn ref_exists(&self, ref_str: &str) -> bool { 348 self.resolve_ref(ref_str).is_some() 349 } 350 351 /// Iterate over all documents 352 pub fn iter(&self) -> impl Iterator<Item = (&SmolStr, &LexiconDoc<'static>)> { 353 self.docs.iter() 354 } 355 356 /// Number of loaded lexicons 357 pub fn len(&self) -> usize { 358 self.docs.len() 359 } 360 361 /// Check if corpus is empty 362 pub fn is_empty(&self) -> bool { 363 self.docs.is_empty() 364 } 365} 366 367impl Default for LexiconCorpus { 368 fn default() -> Self { 369 Self::new() 370 } 371} 372 373#[cfg(test)] 374mod tests { 375 use super::*; 376 use crate::lexicon::LexUserType; 377 378 #[test] 379 fn test_empty_corpus() { 380 let corpus = LexiconCorpus::new(); 381 assert!(corpus.is_empty()); 382 assert_eq!(corpus.len(), 0); 383 } 384 385 #[test] 386 fn test_load_lexicons() { 387 let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons") 388 .expect("failed to load lexicons"); 389 390 assert!(!corpus.is_empty()); 391 assert_eq!(corpus.len(), 17); // 10 original + 7 new edge case fixtures 392 393 // Check that we loaded the expected lexicons 394 assert!(corpus.get("app.bsky.feed.post").is_some()); 395 assert!(corpus.get("app.bsky.feed.getAuthorFeed").is_some()); 396 assert!(corpus.get("app.bsky.richtext.facet").is_some()); 397 assert!(corpus.get("app.bsky.embed.images").is_some()); 398 assert!(corpus.get("com.atproto.repo.strongRef").is_some()); 399 assert!(corpus.get("com.atproto.label.defs").is_some()); 400 } 401 402 #[test] 403 fn test_resolve_ref_without_fragment() { 404 let corpus = LexiconCorpus::load_from_dir("../jacquard-api/lexicons") 405 .expect("failed to load lexicons"); 406 407 // Without fragment should resolve to main def 408 let (doc, def) = corpus 409 .resolve_ref("app.bsky.feed.post") 410 .expect("should resolve"); 411 assert_eq!(doc.id.as_ref(), "app.bsky.feed.post"); 412 assert!(matches!(def, LexUserType::Record(_))); 413 } 414 415 #[test] 416 fn test_resolve_ref_with_fragment() { 417 let corpus = LexiconCorpus::load_from_dir("../jacquard-api/lexicons") 418 .expect("failed to load lexicons"); 419 420 // With fragment should resolve to specific def 421 let (doc, def) = corpus 422 .resolve_ref("app.bsky.richtext.facet#mention") 423 .expect("should resolve"); 424 assert_eq!(doc.id.as_ref(), "app.bsky.richtext.facet"); 425 assert!(matches!(def, LexUserType::Object(_))); 426 } 427 428 #[test] 429 fn test_ref_exists() { 430 let corpus = LexiconCorpus::load_from_dir("../jacquard-api/lexicons") 431 .expect("failed to load lexicons"); 432 433 // Existing refs 434 assert!(corpus.ref_exists("app.bsky.feed.post")); 435 assert!(corpus.ref_exists("app.bsky.feed.post#main")); 436 assert!(corpus.ref_exists("app.bsky.richtext.facet#mention")); 437 438 // Non-existing refs 439 assert!(!corpus.ref_exists("com.example.fake")); 440 assert!(!corpus.ref_exists("app.bsky.feed.post#nonexistent")); 441 } 442 443 #[test] 444 fn test_non_lexicon_json_skipped_silently() { 445 // The test_lexicons directory contains not_a_lexicon.json which should be skipped 446 let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons") 447 .expect("should succeed even with non-lexicon JSON files"); 448 449 // The non-lexicon file should not be in the corpus 450 assert!(corpus.get("some random config").is_none()); 451 452 // But valid lexicons should still load 453 assert!(corpus.get("app.bsky.feed.post").is_some()); 454 } 455 456 #[test] 457 fn test_is_lexicon_content_detection() { 458 // Not a lexicon - no "lexicon" key 459 assert!(!is_lexicon_content(r#"{"name": "test", "version": "1.0"}"#)); 460 461 // Not a lexicon - invalid JSON 462 assert!(!is_lexicon_content("not json at all")); 463 464 // Is a lexicon - has "lexicon" at top level 465 assert!(is_lexicon_content(r#"{"lexicon": 1, "id": "test.foo"}"#)); 466 467 // Is a lexicon - has "lexicon" one level down 468 assert!(is_lexicon_content( 469 r#"{"wrapper": {"lexicon": 1, "id": "test.foo"}}"# 470 )); 471 } 472 473 #[test] 474 fn test_broken_lexicon_returns_error_with_path() { 475 let result = LexiconCorpus::load_from_dir("tests/fixtures/error_cases"); 476 477 // Should fail because broken_lexicon.json is a lexicon (has "lexicon" key) 478 // but has invalid structure 479 let err = result.expect_err("should fail on broken lexicon"); 480 let err_str = err.to_string(); 481 482 // Error should include the full path to the broken property 483 assert!( 484 err_str.contains("defs.main.properties.count"), 485 "error should contain path to the broken property, got: {}", 486 err_str 487 ); 488 489 // Error should also include the actual error message 490 assert!( 491 err_str.contains("expected i64"), 492 "error should describe the type mismatch, got: {}", 493 err_str 494 ); 495 496 // Error should mention the file 497 assert!( 498 err_str.contains("broken_lexicon.json"), 499 "error should mention the file, got: {}", 500 err_str 501 ); 502 } 503}