Rust implementation of the CVM algorithm for counting distinct elements in a stream
0

Configure Feed

Select the types of activity you want to include in your feed.

1#[macro_use] 2extern crate criterion; 3use std::{ 4 fs::File, 5 io::{BufRead, BufReader}, 6 path::Path, 7}; 8 9use criterion::Criterion; 10use cvmcount::CVM; 11use rand::{thread_rng, Rng}; 12use regex::Regex; 13 14// generate 1 million 7-digit random positive integers 15fn generate_random_numbers() -> Vec<i32> { 16 let mut rng = thread_rng(); 17 18 (0..1_000_000) 19 .map(|_| rng.gen_range(1_000_000..10_000_000)) 20 .collect() 21} 22 23fn open_file<P>(filename: P) -> BufReader<File> 24where 25 P: AsRef<Path>, 26{ 27 let f = File::open(filename).expect("Couldn't read from file"); 28 BufReader::new(f) 29} 30 31fn line_to_word(re: &Regex, cvm: &mut CVM<String>, line: &str) { 32 let words = line.split(' '); 33 words.for_each(|word| { 34 let clean_word = re.replace_all(word, "").to_lowercase(); 35 cvm.process_element(clean_word) 36 }) 37} 38 39#[allow(unused_must_use)] 40fn bench_count_strings_integers(c: &mut Criterion) { 41 c.bench_function( 42 "Count unique strings in The King in Yellow with regex regularization: e = 0.8, d = 0.1, s = 1000", 43 |b| { 44 let input_file = "benches/kiy.txt"; 45 let epsilon = 0.8; 46 let delta = 0.1; 47 let stream_size = 1000; 48 let re = Regex::new(r"[^\w\s]").unwrap(); 49 b.iter(|| { 50 let mut string_counter: CVM<String> = CVM::new(epsilon, delta, stream_size); 51 let br = open_file(input_file); 52 br.lines() 53 .for_each(|line| line_to_word(&re, &mut string_counter, &line.unwrap())); 54 string_counter.calculate_final_result() 55 }) 56 }, 57 ); 58 c.bench_function( 59 "Count uniques in ten million 7-digit random positive integers: e = 0.8, d = 0.1, s = 1000", 60 |b| { 61 let epsilon = 0.8; 62 let delta = 0.1; 63 let stream_size = 1000; 64 let digits = generate_random_numbers(); 65 b.iter(|| { 66 let mut int_counter: CVM<i32> = CVM::new(epsilon, delta, stream_size); 67 digits.iter().for_each(|integer| int_counter.process_element(*integer)); 68 int_counter.calculate_final_result() 69 }) 70 } 71 ); 72} 73 74criterion_group!(benches, bench_count_strings_integers,); 75criterion_main!(benches);