Rust implementation of the CVM algorithm for counting distinct elements in a stream
0

Configure Feed

Select the types of activity you want to include in your feed.

Merge pull request #15 from urschrei/shugel/push-sqypwymtoyyu

Fix treap insert to correctly track size on duplicates

+69 -27
+69 -27
src/treap.rs
··· 83 83 } 84 84 85 85 /// Insert a key with a random priority 86 - pub fn insert<R: Rng>(&mut self, key: T, rng: &mut R) { 86 + /// 87 + /// Returns `true` if the key was inserted, `false` if it already existed. 88 + pub fn insert<R: Rng>(&mut self, key: T, rng: &mut R) -> bool { 87 89 let priority = rng.random(); 88 - self.root = Self::insert_node(self.root.take(), key, priority); 89 - self.size += 1; 90 + let (new_root, inserted) = Self::insert_node(self.root.take(), key, priority); 91 + self.root = new_root; 92 + if inserted { 93 + self.size += 1; 94 + } 95 + inserted 90 96 } 91 97 92 98 /// Check if the treap contains a key ··· 122 128 } 123 129 124 130 // Helper function to insert a node 125 - fn insert_node(node: Option<Box<Node<T>>>, key: T, priority: u32) -> Option<Box<Node<T>>> { 131 + // Returns (new_tree, was_inserted) tuple 132 + fn insert_node( 133 + node: Option<Box<Node<T>>>, 134 + key: T, 135 + priority: u32, 136 + ) -> (Option<Box<Node<T>>>, bool) { 126 137 match node { 127 - None => Some(Box::new(Node::new(key, priority))), 128 - Some(mut n) => { 129 - match key.cmp(&n.key) { 130 - Ordering::Less => { 131 - n.left = Self::insert_node(n.left, key, priority); 132 - // Maintain heap property 133 - if n.left.as_ref().unwrap().priority > n.priority { 134 - Self::rotate_right(n) 135 - } else { 136 - Some(n) 137 - } 138 - } 139 - Ordering::Greater => { 140 - n.right = Self::insert_node(n.right, key, priority); 141 - // Maintain heap property 142 - if n.right.as_ref().unwrap().priority > n.priority { 143 - Self::rotate_left(n) 144 - } else { 145 - Some(n) 146 - } 147 - } 148 - Ordering::Equal => Some(n), // Key already exists, do nothing 138 + None => (Some(Box::new(Node::new(key, priority))), true), 139 + Some(mut n) => match key.cmp(&n.key) { 140 + Ordering::Less => { 141 + let (new_left, inserted) = Self::insert_node(n.left, key, priority); 142 + n.left = new_left; 143 + // Maintain heap property (only rotate if we actually inserted) 144 + let result = if inserted && n.left.as_ref().unwrap().priority > n.priority { 145 + Self::rotate_right(n) 146 + } else { 147 + Some(n) 148 + }; 149 + (result, inserted) 150 + } 151 + Ordering::Greater => { 152 + let (new_right, inserted) = Self::insert_node(n.right, key, priority); 153 + n.right = new_right; 154 + // Maintain heap property (only rotate if we actually inserted) 155 + let result = if inserted && n.right.as_ref().unwrap().priority > n.priority { 156 + Self::rotate_left(n) 157 + } else { 158 + Some(n) 159 + }; 160 + (result, inserted) 149 161 } 150 - } 162 + Ordering::Equal => (Some(n), false), // Key already exists, do nothing 163 + }, 151 164 } 152 165 } 153 166 ··· 309 322 assert!(!treap.contains(&i)); 310 323 } 311 324 } 325 + } 326 + 327 + #[test] 328 + fn test_duplicate_insertion() { 329 + let mut treap = Treap::new(); 330 + let mut rng = StdRng::seed_from_u64(42); 331 + 332 + // Insert elements 333 + assert!(treap.insert(5, &mut rng)); // First insertion returns true 334 + assert!(treap.insert(3, &mut rng)); 335 + assert!(treap.insert(7, &mut rng)); 336 + assert_eq!(treap.len(), 3); 337 + 338 + // Try to insert duplicates - should return false and not change size 339 + assert!(!treap.insert(5, &mut rng)); 340 + assert!(!treap.insert(3, &mut rng)); 341 + assert!(!treap.insert(7, &mut rng)); 342 + assert_eq!(treap.len(), 3); // Size unchanged 343 + 344 + // Verify elements still exist 345 + assert!(treap.contains(&5)); 346 + assert!(treap.contains(&3)); 347 + assert!(treap.contains(&7)); 348 + 349 + // Remove and re-insert should work 350 + assert!(treap.remove(&5)); 351 + assert_eq!(treap.len(), 2); 352 + assert!(treap.insert(5, &mut rng)); // Re-insertion returns true 353 + assert_eq!(treap.len(), 3); 312 354 } 313 355 }