A better Rust ATProto crate
1use crate::error::{CodegenError, Result};
2use crate::lexicon::{LexUserType, LexiconDoc};
3use crate::ref_utils::RefPath;
4use jacquard_common::{deps::smol_str::SmolStr, into_static::IntoStatic};
5use std::collections::BTreeMap;
6use std::fs;
7use std::path::Path;
8
9/// Check if content looks like a lexicon file.
10///
11/// A file is considered a lexicon if it contains a `"lexicon"` key at the top level
12/// or one level down (for some wrapper formats). This allows us to distinguish
13/// "not a lexicon at all" (skip silently) from "broken lexicon" (report error).
14fn is_lexicon_content(content: &str) -> bool {
15 // Quick string scan first (fast path for non-JSON or unrelated JSON)
16 if !content.contains("\"lexicon\"") {
17 return false;
18 }
19
20 // Parse to Value and check structure
21 if let Ok(value) = serde_json::from_str::<serde_json::Value>(content) {
22 // Top-level lexicon field
23 if value.get("lexicon").is_some() {
24 return true;
25 }
26 // One level down (some wrapper formats)
27 if let Some(obj) = value.as_object() {
28 for v in obj.values() {
29 if v.get("lexicon").is_some() {
30 return true;
31 }
32 }
33 }
34 }
35 false
36}
37
38/// Raw lexicon doc for two-phase parsing - defs are kept as raw JSON Values
39/// so we can deserialize each separately with better error tracking.
40#[derive(Debug, serde::Deserialize)]
41struct RawLexiconDoc<'s> {
42 pub lexicon: crate::lexicon::Lexicon,
43 #[serde(borrow)]
44 pub id: jacquard_common::CowStr<'s>,
45 pub revision: Option<u32>,
46 #[serde(borrow)]
47 pub description: Option<jacquard_common::CowStr<'s>>,
48 pub defs: BTreeMap<SmolStr, serde_json::Value>,
49}
50
51/// Helper to create a parse error with path context.
52fn make_parse_error(
53 file_path: &Path,
54 json_path: &str,
55 message: String,
56 content: &str,
57) -> CodegenError {
58 CodegenError::ParseError {
59 path: file_path.to_path_buf(),
60 json_path: Some(json_path.to_string()),
61 message,
62 src: Some(content.to_string()),
63 span: None,
64 }
65}
66
67/// Recursively parse properties with path tracking.
68/// Returns parsed properties or an error with the full path.
69fn parse_properties_deep(
70 props_value: &serde_json::Value,
71 base_path: &str,
72 file_path: &Path,
73 content: &str,
74) -> std::result::Result<BTreeMap<SmolStr, crate::lexicon::LexObjectProperty<'static>>, CodegenError>
75{
76 let props_obj = props_value.as_object().ok_or_else(|| {
77 make_parse_error(
78 file_path,
79 base_path,
80 "expected object for properties".to_string(),
81 content,
82 )
83 })?;
84
85 let mut parsed_props = BTreeMap::new();
86 for (prop_name, prop_value) in props_obj {
87 let prop_path = format!("{}.{}", base_path, prop_name);
88
89 // Try to parse this property
90 let parsed: crate::lexicon::LexObjectProperty =
91 serde_path_to_error::deserialize(prop_value).map_err(|e| {
92 let inner_path = e.path().to_string();
93 let full_path = if inner_path.is_empty() {
94 prop_path.clone()
95 } else {
96 format!("{}.{}", prop_path, inner_path)
97 };
98 make_parse_error(file_path, &full_path, e.inner().to_string(), content)
99 })?;
100
101 parsed_props.insert(SmolStr::new(prop_name), parsed.into_static());
102 }
103
104 Ok(parsed_props)
105}
106
107/// Parse an object-like def with deep property tracking.
108fn parse_object_deep(
109 value: &serde_json::Value,
110 base_path: &str,
111 file_path: &Path,
112 content: &str,
113) -> std::result::Result<crate::lexicon::LexObject<'static>, CodegenError> {
114 use crate::lexicon::LexObject;
115
116 let obj = value.as_object().ok_or_else(|| {
117 make_parse_error(file_path, base_path, "expected object".to_string(), content)
118 })?;
119
120 // Parse properties deeply if present
121 let properties = if let Some(props) = obj.get("properties") {
122 let props_path = format!("{}.properties", base_path);
123 parse_properties_deep(props, &props_path, file_path, content)?
124 } else {
125 BTreeMap::new()
126 };
127
128 // Parse the rest of the object normally
129 let description = obj
130 .get("description")
131 .and_then(|v| v.as_str())
132 .map(|s| jacquard_common::CowStr::copy_from_str(s));
133 let required: Option<Vec<SmolStr>> = obj
134 .get("required")
135 .map(|v| serde_json::from_value(v.clone()))
136 .transpose()
137 .map_err(|e| {
138 make_parse_error(
139 file_path,
140 &format!("{}.required", base_path),
141 e.to_string(),
142 content,
143 )
144 })?;
145 let nullable: Option<Vec<SmolStr>> = obj
146 .get("nullable")
147 .map(|v| serde_json::from_value(v.clone()))
148 .transpose()
149 .map_err(|e| {
150 make_parse_error(
151 file_path,
152 &format!("{}.nullable", base_path),
153 e.to_string(),
154 content,
155 )
156 })?;
157
158 Ok(LexObject {
159 description,
160 required,
161 nullable,
162 properties,
163 })
164}
165
166/// Parse a def with deep path tracking for nested structures.
167fn parse_def_deep(
168 def_name: &str,
169 value: &serde_json::Value,
170 file_path: &Path,
171 content: &str,
172) -> std::result::Result<LexUserType<'static>, CodegenError> {
173 let base_path = format!("defs.{}", def_name);
174
175 // Check the type field to determine how to parse
176 let type_str = value
177 .get("type")
178 .and_then(|v| v.as_str())
179 .unwrap_or("object");
180
181 match type_str {
182 "object" => {
183 let obj = parse_object_deep(value, &base_path, file_path, content)?;
184 Ok(LexUserType::Object(obj))
185 }
186 "record" => {
187 // Records have a nested record.properties structure
188 if let Some(record_value) = value.get("record") {
189 let record_path = format!("{}.record", base_path);
190 let inner_obj = parse_object_deep(record_value, &record_path, file_path, content)?;
191
192 // Parse the rest of the record
193 let obj = value.as_object().ok_or_else(|| {
194 make_parse_error(
195 file_path,
196 &base_path,
197 "expected object".to_string(),
198 content,
199 )
200 })?;
201
202 let description = obj
203 .get("description")
204 .and_then(|v| v.as_str())
205 .map(|s| jacquard_common::CowStr::copy_from_str(s));
206 let key: Option<jacquard_common::CowStr<'static>> = obj
207 .get("key")
208 .and_then(|v| v.as_str())
209 .map(|s| jacquard_common::CowStr::copy_from_str(s));
210
211 Ok(LexUserType::Record(crate::lexicon::LexRecord {
212 description,
213 key,
214 record: crate::lexicon::LexRecordRecord::Object(inner_obj),
215 }))
216 } else {
217 // Fallback to normal parsing if no record field
218 serde_path_to_error::deserialize(value)
219 .map(|v: LexUserType| v.into_static())
220 .map_err(|e| {
221 make_parse_error(file_path, &base_path, e.inner().to_string(), content)
222 })
223 }
224 }
225 // For other types (query, procedure, etc.), use the simpler approach for now
226 // Could be extended later
227 _ => serde_path_to_error::deserialize(value)
228 .map(|v: LexUserType| v.into_static())
229 .map_err(|e| {
230 let inner_path = e.path().to_string();
231 let full_path = if inner_path.is_empty() {
232 base_path
233 } else {
234 format!("{}.{}", base_path, inner_path)
235 };
236 make_parse_error(file_path, &full_path, e.inner().to_string(), content)
237 }),
238 }
239}
240
241/// Parse a lexicon with rich error context using deep recursive parsing.
242///
243/// This parses the document structure recursively, tracking paths through:
244/// - defs → def_name → properties → prop_name → nested fields
245///
246/// This gives us detailed error paths like "defs.main.properties.count.default"
247fn parse_lexicon_with_context(
248 content: &str,
249 path: &Path,
250) -> std::result::Result<LexiconDoc<'static>, CodegenError> {
251 // Phase 1: Parse the top-level structure with defs as raw Values
252 let raw_doc: RawLexiconDoc =
253 serde_json::from_str(content).map_err(|e| CodegenError::ParseError {
254 path: path.to_path_buf(),
255 json_path: None,
256 message: e.to_string(),
257 src: Some(content.to_string()),
258 span: None,
259 })?;
260
261 // Phase 2: Parse each def with deep path tracking
262 let mut parsed_defs = BTreeMap::new();
263 for (def_name, def_value) in raw_doc.defs {
264 let parsed_def = parse_def_deep(&def_name, &def_value, path, content)?;
265 parsed_defs.insert(def_name, parsed_def);
266 }
267
268 // Reconstruct the full LexiconDoc
269 Ok(LexiconDoc {
270 lexicon: raw_doc.lexicon,
271 id: raw_doc.id.into_static(),
272 revision: raw_doc.revision,
273 description: raw_doc.description.map(|d| d.into_static()),
274 defs: parsed_defs,
275 })
276}
277
278/// Registry of all loaded lexicons for reference resolution
279#[derive(Debug, Clone)]
280pub struct LexiconCorpus {
281 /// Map from NSID to lexicon document
282 docs: BTreeMap<SmolStr, LexiconDoc<'static>>,
283 /// Map from NSID to original source text (for error reporting)
284 sources: BTreeMap<SmolStr, String>,
285}
286
287impl LexiconCorpus {
288 /// Create an empty corpus
289 pub fn new() -> Self {
290 Self {
291 docs: BTreeMap::new(),
292 sources: BTreeMap::new(),
293 }
294 }
295
296 /// Load all lexicons from a directory
297 pub fn load_from_dir(path: impl AsRef<Path>) -> Result<Self> {
298 let mut corpus = Self::new();
299
300 let schemas = crate::fs::find_schemas(path.as_ref())?;
301 for schema_path in schemas {
302 let content = fs::read_to_string(schema_path.as_ref())?;
303
304 // Check if this file is trying to be a lexicon
305 if !is_lexicon_content(&content) {
306 // Not a lexicon, skip silently
307 continue;
308 }
309
310 // This IS a lexicon - parse with good error reporting
311 let doc = parse_lexicon_with_context(&content, schema_path.as_ref())?;
312
313 let nsid = SmolStr::from(doc.id.to_string());
314 corpus.docs.insert(nsid.clone(), doc);
315 corpus.sources.insert(nsid, content);
316 }
317
318 Ok(corpus)
319 }
320
321 /// Get a lexicon document by NSID
322 pub fn get(&self, nsid: &str) -> Option<&LexiconDoc<'static>> {
323 self.docs.get(nsid)
324 }
325
326 /// Get the source text for a lexicon by NSID
327 pub fn get_source(&self, nsid: &str) -> Option<&str> {
328 self.sources.get(nsid).map(|s| s.as_str())
329 }
330
331 /// Resolve a reference, handling fragments
332 ///
333 /// Examples:
334 /// - `app.bsky.feed.post` → main def from that lexicon
335 /// - `app.bsky.feed.post#replyRef` → replyRef def from that lexicon
336 pub fn resolve_ref(
337 &self,
338 ref_str: &str,
339 ) -> Option<(&LexiconDoc<'static>, &LexUserType<'static>)> {
340 let ref_path = RefPath::parse(ref_str, None);
341 let doc = self.get(ref_path.nsid())?;
342 let def = doc.defs.get(ref_path.def())?;
343 Some((doc, def))
344 }
345
346 /// Check if a reference exists
347 pub fn ref_exists(&self, ref_str: &str) -> bool {
348 self.resolve_ref(ref_str).is_some()
349 }
350
351 /// Iterate over all documents
352 pub fn iter(&self) -> impl Iterator<Item = (&SmolStr, &LexiconDoc<'static>)> {
353 self.docs.iter()
354 }
355
356 /// Number of loaded lexicons
357 pub fn len(&self) -> usize {
358 self.docs.len()
359 }
360
361 /// Check if corpus is empty
362 pub fn is_empty(&self) -> bool {
363 self.docs.is_empty()
364 }
365}
366
367impl Default for LexiconCorpus {
368 fn default() -> Self {
369 Self::new()
370 }
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376 use crate::lexicon::LexUserType;
377
378 #[test]
379 fn test_empty_corpus() {
380 let corpus = LexiconCorpus::new();
381 assert!(corpus.is_empty());
382 assert_eq!(corpus.len(), 0);
383 }
384
385 #[test]
386 fn test_load_lexicons() {
387 let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons")
388 .expect("failed to load lexicons");
389
390 assert!(!corpus.is_empty());
391 assert_eq!(corpus.len(), 17); // 10 original + 7 new edge case fixtures
392
393 // Check that we loaded the expected lexicons
394 assert!(corpus.get("app.bsky.feed.post").is_some());
395 assert!(corpus.get("app.bsky.feed.getAuthorFeed").is_some());
396 assert!(corpus.get("app.bsky.richtext.facet").is_some());
397 assert!(corpus.get("app.bsky.embed.images").is_some());
398 assert!(corpus.get("com.atproto.repo.strongRef").is_some());
399 assert!(corpus.get("com.atproto.label.defs").is_some());
400 }
401
402 #[test]
403 fn test_resolve_ref_without_fragment() {
404 let corpus = LexiconCorpus::load_from_dir("../jacquard-api/lexicons")
405 .expect("failed to load lexicons");
406
407 // Without fragment should resolve to main def
408 let (doc, def) = corpus
409 .resolve_ref("app.bsky.feed.post")
410 .expect("should resolve");
411 assert_eq!(doc.id.as_ref(), "app.bsky.feed.post");
412 assert!(matches!(def, LexUserType::Record(_)));
413 }
414
415 #[test]
416 fn test_resolve_ref_with_fragment() {
417 let corpus = LexiconCorpus::load_from_dir("../jacquard-api/lexicons")
418 .expect("failed to load lexicons");
419
420 // With fragment should resolve to specific def
421 let (doc, def) = corpus
422 .resolve_ref("app.bsky.richtext.facet#mention")
423 .expect("should resolve");
424 assert_eq!(doc.id.as_ref(), "app.bsky.richtext.facet");
425 assert!(matches!(def, LexUserType::Object(_)));
426 }
427
428 #[test]
429 fn test_ref_exists() {
430 let corpus = LexiconCorpus::load_from_dir("../jacquard-api/lexicons")
431 .expect("failed to load lexicons");
432
433 // Existing refs
434 assert!(corpus.ref_exists("app.bsky.feed.post"));
435 assert!(corpus.ref_exists("app.bsky.feed.post#main"));
436 assert!(corpus.ref_exists("app.bsky.richtext.facet#mention"));
437
438 // Non-existing refs
439 assert!(!corpus.ref_exists("com.example.fake"));
440 assert!(!corpus.ref_exists("app.bsky.feed.post#nonexistent"));
441 }
442
443 #[test]
444 fn test_non_lexicon_json_skipped_silently() {
445 // The test_lexicons directory contains not_a_lexicon.json which should be skipped
446 let corpus = LexiconCorpus::load_from_dir("tests/fixtures/test_lexicons")
447 .expect("should succeed even with non-lexicon JSON files");
448
449 // The non-lexicon file should not be in the corpus
450 assert!(corpus.get("some random config").is_none());
451
452 // But valid lexicons should still load
453 assert!(corpus.get("app.bsky.feed.post").is_some());
454 }
455
456 #[test]
457 fn test_is_lexicon_content_detection() {
458 // Not a lexicon - no "lexicon" key
459 assert!(!is_lexicon_content(r#"{"name": "test", "version": "1.0"}"#));
460
461 // Not a lexicon - invalid JSON
462 assert!(!is_lexicon_content("not json at all"));
463
464 // Is a lexicon - has "lexicon" at top level
465 assert!(is_lexicon_content(r#"{"lexicon": 1, "id": "test.foo"}"#));
466
467 // Is a lexicon - has "lexicon" one level down
468 assert!(is_lexicon_content(
469 r#"{"wrapper": {"lexicon": 1, "id": "test.foo"}}"#
470 ));
471 }
472
473 #[test]
474 fn test_broken_lexicon_returns_error_with_path() {
475 let result = LexiconCorpus::load_from_dir("tests/fixtures/error_cases");
476
477 // Should fail because broken_lexicon.json is a lexicon (has "lexicon" key)
478 // but has invalid structure
479 let err = result.expect_err("should fail on broken lexicon");
480 let err_str = err.to_string();
481
482 // Error should include the full path to the broken property
483 assert!(
484 err_str.contains("defs.main.properties.count"),
485 "error should contain path to the broken property, got: {}",
486 err_str
487 );
488
489 // Error should also include the actual error message
490 assert!(
491 err_str.contains("expected i64"),
492 "error should describe the type mismatch, got: {}",
493 err_str
494 );
495
496 // Error should mention the file
497 assert!(
498 err_str.contains("broken_lexicon.json"),
499 "error should mention the file, got: {}",
500 err_str
501 );
502 }
503}