Rust implementation of the CVM algorithm for counting distinct elements in a stream
0

Configure Feed

Select the types of activity you want to include in your feed.

Placate clippy

+6 -6
+5 -5
benches/benchmarks.rs
··· 14 14 // generate 1 million 7-digit random positive integers 15 15 fn generate_random_numbers() -> Vec<i32> { 16 16 let mut rng = thread_rng(); 17 - let numbers = (0..1_000_000) 17 + 18 + (0..1_000_000) 18 19 .map(|_| rng.gen_range(1_000_000..10_000_000)) 19 - .collect(); 20 - numbers 20 + .collect() 21 21 } 22 22 23 23 fn open_file<P>(filename: P) -> BufReader<File> ··· 39 39 #[allow(unused_must_use)] 40 40 fn bench_count_strings_integers(c: &mut Criterion) { 41 41 c.bench_function( 42 - &format!("Count unique strings in The King in Yellow with regex regularization: e = 0.8, d = 0.1, s = 1000"), 42 + "Count unique strings in The King in Yellow with regex regularization: e = 0.8, d = 0.1, s = 1000", 43 43 |b| { 44 44 let input_file = "benches/kiy.txt"; 45 45 let epsilon = 0.8; ··· 56 56 }, 57 57 ); 58 58 c.bench_function( 59 - &format!("Count uniques in ten million 7-digit random positive integers: e = 0.8, d = 0.1, s = 1000"), 59 + "Count uniques in ten million 7-digit random positive integers: e = 0.8, d = 0.1, s = 1000", 60 60 |b| { 61 61 let epsilon = 0.8; 62 62 let delta = 0.1;
+1 -1
src/lib.rs
··· 48 48 // Round 0: if an element exists, remove it. Element is added back due to probability 1 49 49 // When buffer is full, remove half the elements 50 50 // Round 1: if an element exists, remove it. Element MAY be added back due to probability 0.5 51 - if self.buf.get(&elem).is_some() { 51 + if self.buf.contains(&elem) { 52 52 self.buf.remove(&elem); 53 53 } 54 54 if self.rng.gen_bool(self.probability) {