crates/jacquard-repo/tests/mst_diff_suite.rs at main · nonbinary.computer/jacquard

A better Rust ATProto crate
jacquard / crates / jacquard-repo / tests / mst_diff_suite.rs
at main 17 kB View raw
Orual version bump, changelog updates 8mo ago
  1//! MST diff test suite runner
  2//!
  3//! Runs the mst-test-suite exhaustive diff test cases to validate:
  4//! - record_ops (creates/updates/deletes with CIDs)
  5//! - created_nodes (new MST blocks)
  6//! - deleted_nodes (removed MST blocks)
  7//! - proof_nodes (blocks needed for inclusion/exclusion proofs)
  8//! - inductive_proof_nodes (blocks needed for inductive validation)
  9
 10use bytes::Bytes;
 11use cid::Cid as IpldCid;
 12use jacquard_repo::car::parse_car_bytes;
 13use jacquard_repo::mst::{Mst, MstDiff};
 14use jacquard_repo::storage::MemoryBlockStore;
 15use serde::{Deserialize, Serialize};
 16use std::collections::{BTreeMap, BTreeSet};
 17use std::path::{Path, PathBuf};
 18use std::sync::Arc;
 19
 20const TEST_SUITE_PATH: &str = "/home/orual/Git_Repos/mst-test-suite";
 21
 22/// Test case format from mst-test-suite
 23#[derive(Debug, Deserialize, Serialize)]
 24struct MstDiffTestCase {
 25    #[serde(rename = "$type")]
 26    test_type: String,
 27
 28    description: String,
 29
 30    inputs: TestInputs,
 31
 32    results: ExpectedResults,
 33}
 34
 35#[derive(Debug, Deserialize, Serialize)]
 36struct TestInputs {
 37    /// Path to CAR file for tree A (relative to test suite root)
 38    mst_a: String,
 39
 40    /// Path to CAR file for tree B (relative to test suite root)
 41    mst_b: String,
 42}
 43
 44#[derive(Debug, Deserialize, Serialize)]
 45struct ExpectedResults {
 46    /// CIDs of newly created MST node blocks
 47    created_nodes: Vec<String>,
 48
 49    /// CIDs of deleted MST node blocks
 50    deleted_nodes: Vec<String>,
 51
 52    /// Record operations (sorted by rpath)
 53    record_ops: Vec<RecordOp>,
 54
 55    /// CIDs of MST nodes required for inclusion/exclusion proofs
 56    proof_nodes: Vec<String>,
 57
 58    /// CIDs of MST nodes required for inductive validation
 59    inductive_proof_nodes: Vec<String>,
 60
 61    /// CIDs expected in firehose broadcast (mostly marked TODO in fixtures)
 62    #[serde(skip_serializing_if = "Option::is_none")]
 63    firehose_cids: Option<serde_json::Value>,
 64}
 65
 66#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)]
 67struct RecordOp {
 68    /// Record path (rpath)
 69    rpath: String,
 70
 71    /// Old CID (null for creates)
 72    old_value: Option<String>,
 73
 74    /// New CID (null for deletes)
 75    new_value: Option<String>,
 76}
 77
 78/// Load and parse a CAR file, returning blocks and root CID
 79async fn load_car(path: &Path) -> anyhow::Result<(IpldCid, BTreeMap<IpldCid, Bytes>)> {
 80    let bytes = tokio::fs::read(path).await?;
 81    let parsed = parse_car_bytes(&bytes).await?;
 82    Ok((parsed.root, parsed.blocks))
 83}
 84
 85/// Convert IpldCid to base32 string (for comparison)
 86fn cid_to_string(cid: &IpldCid) -> String {
 87    cid.to_string()
 88}
 89
 90/// Find all .json test files in a directory recursively
 91fn find_test_files(dir: &Path) -> std::io::Result<Vec<PathBuf>> {
 92    let mut test_files = Vec::new();
 93
 94    if dir.is_dir() {
 95        for entry in std::fs::read_dir(dir)? {
 96            let entry = entry?;
 97            let path = entry.path();
 98
 99            if path.is_dir() {
100                test_files.extend(find_test_files(&path)?);
101            } else if path.extension().and_then(|s| s.to_str()) == Some("json") {
102                test_files.push(path);
103            }
104        }
105    }
106
107    Ok(test_files)
108}
109
110/// Run a single test case
111async fn run_test_case(test_path: &Path, suite_root: &Path) -> anyhow::Result<TestResult> {
112    // Parse test case JSON
113    let test_json = tokio::fs::read_to_string(test_path).await?;
114    let test_case: MstDiffTestCase = serde_json::from_str(&test_json)?;
115
116    // Load CAR files
117    let car_a_path = suite_root.join(&test_case.inputs.mst_a);
118    let car_b_path = suite_root.join(&test_case.inputs.mst_b);
119
120    let (root_a, blocks_a) = load_car(&car_a_path).await?;
121    let (root_b, blocks_b) = load_car(&car_b_path).await?;
122
123    // Create storage with both sets of blocks
124    let mut all_blocks = blocks_a;
125    all_blocks.extend(blocks_b);
126    let storage = Arc::new(MemoryBlockStore::new_from_blocks(all_blocks));
127
128    // Load MST instances
129    let mst_a = Mst::load(storage.clone(), root_a, None);
130    let mst_b = Mst::load(storage.clone(), root_b, None);
131
132    // Compute diff
133    let diff = mst_a.diff(&mst_b).await?;
134
135    // Replicate create_commit's relevant_blocks logic (from repo.rs:276-290)
136    let mut relevant_blocks = BTreeMap::new();
137    // For each operation, collect blocks along the path in BOTH trees for inductive validation
138    for (key, _cid) in &diff.creates {
139        mst_b
140            .blocks_for_path(key.as_str(), &mut relevant_blocks)
141            .await?;
142        // Always include old tree paths for CREATE (needed for exclusion proof)
143        mst_a
144            .blocks_for_path(key.as_str(), &mut relevant_blocks)
145            .await?;
146    }
147
148    for (key, _new_cid, _old_cid) in &diff.updates {
149        mst_b
150            .blocks_for_path(key.as_str(), &mut relevant_blocks)
151            .await?;
152        // Include old tree paths for UPDATE (needed for inductive validation)
153        mst_a
154            .blocks_for_path(key.as_str(), &mut relevant_blocks)
155            .await?;
156    }
157
158    for (key, _old_cid) in &diff.deletes {
159        mst_b
160            .blocks_for_path(key.as_str(), &mut relevant_blocks)
161            .await?;
162        // Include old tree paths for DELETE (needed for inductive validation)
163        mst_a
164            .blocks_for_path(key.as_str(), &mut relevant_blocks)
165            .await?;
166    }
167
168    // Union of new_mst_blocks and relevant_blocks (for inductive proof)
169    // NOTE: relevant_blocks may contain blocks from both old and new trees,
170    // but we should exclude blocks that were deleted (in removed_mst_blocks)
171    let removed_set: std::collections::HashSet<_> =
172        diff.removed_mst_blocks.iter().copied().collect();
173    let filtered_relevant: BTreeMap<_, _> = relevant_blocks
174        .into_iter()
175        .filter(|(cid, _)| !removed_set.contains(cid))
176        .collect();
177
178    let mut all_proof_blocks = diff.new_mst_blocks.clone();
179    all_proof_blocks.extend(filtered_relevant);
180
181    // Validate results
182    let mut result = TestResult {
183        test_name: test_path.file_name().unwrap().to_string_lossy().to_string(),
184        description: test_case.description.clone(),
185        passed: true,
186        record_ops_match: false,
187        created_nodes_match: false,
188        deleted_nodes_match: false,
189        proof_nodes_info: None,
190        inductive_proof_nodes_info: None,
191        errors: Vec::new(),
192    };
193
194    // Validate record_ops
195    let actual_ops = diff_to_record_ops(&diff);
196    let expected_ops = test_case.results.record_ops;
197    result.record_ops_match = actual_ops == expected_ops;
198    if !result.record_ops_match {
199        result.errors.push(format!(
200            "Record ops mismatch: expected {} ops, got {}",
201            expected_ops.len(),
202            actual_ops.len()
203        ));
204        result.passed = false;
205    }
206
207    // Validate created_nodes
208    let actual_created: BTreeSet<String> = diff.new_mst_blocks.keys().map(cid_to_string).collect();
209    let expected_created: BTreeSet<String> =
210        test_case.results.created_nodes.iter().cloned().collect();
211    result.created_nodes_match = actual_created == expected_created;
212    if !result.created_nodes_match {
213        result.errors.push(format!(
214            "Created nodes mismatch: expected {}, got {}",
215            expected_created.len(),
216            actual_created.len()
217        ));
218        result.passed = false;
219    }
220
221    // Validate deleted_nodes
222    let actual_deleted: BTreeSet<String> =
223        diff.removed_mst_blocks.iter().map(cid_to_string).collect();
224    let expected_deleted: BTreeSet<String> =
225        test_case.results.deleted_nodes.iter().cloned().collect();
226    result.deleted_nodes_match = actual_deleted == expected_deleted;
227    if !result.deleted_nodes_match {
228        result.errors.push(format!(
229            "Deleted nodes mismatch: expected {}, got {}",
230            expected_deleted.len(),
231            actual_deleted.len()
232        ));
233        result.passed = false;
234    }
235
236    // Compare proof_nodes (should equal new_mst_blocks)
237    let expected_proof: BTreeSet<String> = test_case.results.proof_nodes.iter().cloned().collect();
238    let actual_proof: BTreeSet<String> = diff.new_mst_blocks.keys().map(cid_to_string).collect();
239    let proof_match_status = compute_match_status(&actual_proof, &expected_proof);
240
241    result.proof_nodes_info = Some(ProofNodesInfo {
242        expected: expected_proof.clone(),
243        actual: actual_proof.clone(),
244        match_status: proof_match_status,
245    });
246
247    // Compare inductive_proof_nodes (should equal all_proof_blocks)
248    let expected_inductive: BTreeSet<String> = test_case
249        .results
250        .inductive_proof_nodes
251        .iter()
252        .cloned()
253        .collect();
254    let actual_inductive: BTreeSet<String> = all_proof_blocks.keys().map(cid_to_string).collect();
255    let inductive_match_status = compute_match_status(&actual_inductive, &expected_inductive);
256
257    result.inductive_proof_nodes_info = Some(ProofNodesInfo {
258        expected: expected_inductive.clone(),
259        actual: actual_inductive.clone(),
260        match_status: inductive_match_status,
261    });
262
263    Ok(result)
264}
265
266/// Compute match status between actual and expected sets
267fn compute_match_status(actual: &BTreeSet<String>, expected: &BTreeSet<String>) -> MatchStatus {
268    if actual == expected {
269        MatchStatus::Exact
270    } else if actual.is_subset(expected) {
271        MatchStatus::Subset
272    } else if actual.is_superset(expected) {
273        MatchStatus::Superset
274    } else {
275        MatchStatus::Different
276    }
277}
278
279/// Convert MstDiff to sorted record operations
280fn diff_to_record_ops(diff: &MstDiff) -> Vec<RecordOp> {
281    let mut ops = Vec::new();
282
283    // Creates
284    for (key, cid) in &diff.creates {
285        ops.push(RecordOp {
286            rpath: key.to_string(),
287            old_value: None,
288            new_value: Some(cid_to_string(cid)),
289        });
290    }
291
292    // Updates
293    for (key, new_cid, old_cid) in &diff.updates {
294        ops.push(RecordOp {
295            rpath: key.to_string(),
296            old_value: Some(cid_to_string(old_cid)),
297            new_value: Some(cid_to_string(new_cid)),
298        });
299    }
300
301    // Deletes
302    for (key, old_cid) in &diff.deletes {
303        ops.push(RecordOp {
304            rpath: key.to_string(),
305            old_value: Some(cid_to_string(old_cid)),
306            new_value: None,
307        });
308    }
309
310    // Sort by rpath
311    ops.sort();
312    ops
313}
314
315/// Test result for a single test case
316#[derive(Debug)]
317struct TestResult {
318    test_name: String,
319    description: String,
320    passed: bool,
321    record_ops_match: bool,
322    created_nodes_match: bool,
323    deleted_nodes_match: bool,
324    proof_nodes_info: Option<ProofNodesInfo>,
325    inductive_proof_nodes_info: Option<ProofNodesInfo>,
326    errors: Vec<String>,
327}
328
329#[derive(Debug)]
330struct ProofNodesInfo {
331    expected: BTreeSet<String>,
332    actual: BTreeSet<String>,
333    match_status: MatchStatus,
334}
335
336#[derive(Debug)]
337enum MatchStatus {
338    Exact,
339    Subset,    // actual is subset of expected (missing blocks)
340    Superset,  // actual is superset of expected (extra blocks)
341    Different, // neither subset nor superset
342}
343
344/// Summary statistics across all tests
345#[derive(Debug, Default)]
346struct TestSummary {
347    total_tests: usize,
348    passed_tests: usize,
349    failed_tests: usize,
350    record_ops_matches: usize,
351    created_nodes_matches: usize,
352    deleted_nodes_matches: usize,
353    proof_exact_matches: usize,
354    proof_subset_matches: usize,
355    proof_superset_matches: usize,
356    inductive_exact_matches: usize,
357    inductive_subset_matches: usize,
358    inductive_superset_matches: usize,
359}
360
361#[tokio::test]
362#[ignore] // Local-only: requires mst-test-suite at /home/orual/Git_Repos/mst-test-suite
363async fn run_mst_diff_suite() {
364    let suite_root = Path::new(TEST_SUITE_PATH);
365    let tests_dir = suite_root.join("tests");
366
367    // Find all test files
368    let test_files = find_test_files(&tests_dir).expect("Failed to find test files");
369
370    println!("Found {} test files", test_files.len());
371
372    let mut summary = TestSummary::default();
373    let mut failed_tests = Vec::new();
374
375    for test_path in &test_files {
376        summary.total_tests += 1;
377
378        match run_test_case(test_path, suite_root).await {
379            Ok(result) => {
380                let passed = result.passed;
381                let record_ops_match = result.record_ops_match;
382                let created_nodes_match = result.created_nodes_match;
383                let deleted_nodes_match = result.deleted_nodes_match;
384
385                // Track proof node match status
386                if let Some(ref proof_info) = result.proof_nodes_info {
387                    match proof_info.match_status {
388                        MatchStatus::Exact => summary.proof_exact_matches += 1,
389                        MatchStatus::Subset => summary.proof_subset_matches += 1,
390                        MatchStatus::Superset => summary.proof_superset_matches += 1,
391                        _ => {}
392                    }
393                }
394
395                if let Some(ref inductive_info) = result.inductive_proof_nodes_info {
396                    match inductive_info.match_status {
397                        MatchStatus::Exact => summary.inductive_exact_matches += 1,
398                        MatchStatus::Subset => summary.inductive_subset_matches += 1,
399                        MatchStatus::Superset => summary.inductive_superset_matches += 1,
400                        _ => {}
401                    }
402                }
403
404                if passed {
405                    summary.passed_tests += 1;
406                } else {
407                    summary.failed_tests += 1;
408                    failed_tests.push(result);
409                }
410
411                if record_ops_match {
412                    summary.record_ops_matches += 1;
413                }
414                if created_nodes_match {
415                    summary.created_nodes_matches += 1;
416                }
417                if deleted_nodes_match {
418                    summary.deleted_nodes_matches += 1;
419                }
420            }
421            Err(e) => {
422                summary.failed_tests += 1;
423                eprintln!("Error running test {:?}: {}", test_path.file_name(), e);
424            }
425        }
426    }
427
428    // Print summary
429    println!("\n=== MST Diff Suite Summary ===");
430    println!("Total tests: {}", summary.total_tests);
431    println!("Passed: {}", summary.passed_tests);
432    println!("Failed: {}", summary.failed_tests);
433    println!();
434    println!(
435        "Record ops matches: {}/{}",
436        summary.record_ops_matches, summary.total_tests
437    );
438    println!(
439        "Created nodes matches: {}/{}",
440        summary.created_nodes_matches, summary.total_tests
441    );
442    println!(
443        "Deleted nodes matches: {}/{}",
444        summary.deleted_nodes_matches, summary.total_tests
445    );
446    println!();
447    println!("Proof nodes (forward diff):");
448    println!("  Exact: {}", summary.proof_exact_matches);
449    println!(
450        "  Subset (missing blocks): {}",
451        summary.proof_subset_matches
452    );
453    println!(
454        "  Superset (extra blocks): {}",
455        summary.proof_superset_matches
456    );
457    println!();
458    println!("Inductive proof nodes:");
459    println!("  Exact: {}", summary.inductive_exact_matches);
460    println!(
461        "  Subset (missing blocks): {}",
462        summary.inductive_subset_matches
463    );
464    println!(
465        "  Superset (extra blocks): {}",
466        summary.inductive_superset_matches
467    );
468
469    // Collect tests with missing inductive proof blocks
470    let mut missing_block_cases = Vec::new();
471    for test_path in &test_files {
472        match run_test_case(test_path, suite_root).await {
473            Ok(result) => {
474                if let Some(ref info) = result.inductive_proof_nodes_info {
475                    if matches!(info.match_status, MatchStatus::Subset) {
476                        let missing: Vec<_> =
477                            info.expected.difference(&info.actual).cloned().collect();
478                        missing_block_cases.push((result.test_name, missing));
479                    }
480                }
481            }
482            Err(_) => {}
483        }
484    }
485
486    if !missing_block_cases.is_empty() {
487        println!("\n=== CRITICAL: Tests Missing Inductive Proof Blocks ===");
488        println!("Total cases missing blocks: {}", missing_block_cases.len());
489        println!("\nFirst 10 cases:");
490        for (test_name, missing) in missing_block_cases.iter().take(10) {
491            println!("\n{}", test_name);
492            println!("  Missing {} blocks:", missing.len());
493            for cid in missing {
494                println!("    {}", cid);
495            }
496        }
497    }
498
499    // Print first few failures for debugging
500    if !failed_tests.is_empty() {
501        println!("\n=== First 5 Failures (detailed) ===");
502        for result in failed_tests.iter().take(5) {
503            println!("\nTest: {}", result.test_name);
504            println!("Description: {}", result.description);
505            for error in &result.errors {
506                println!("  - {}", error);
507            }
508        }
509
510        println!("\n=== Failure Summary ===");
511        println!("Total failures: {}", failed_tests.len());
512    }
513
514    // Assert all tests passed
515    assert_eq!(
516        summary.failed_tests, 0,
517        "{} tests failed (see output above)",
518        summary.failed_tests
519    );
520}
Configure Feed

Configure Feed