···2121 .version(crate_version!())
2222 .author("Stephan Hügel <urschrei@gmail.com>")
2323 .about("Use the CVM algorithm to estimate the number of unique tokens in a stream")
2424- .arg(arg!(-t --tokens <FILE> "A text file containing words").required(true).value_parser(value_parser!(PathBuf)))
2525- .arg(arg!(-e --epsilon <EPSILON> "How close you want your estimate to be to the true number of distinct tokens. A smaller ε means you require a more precise estimate. For example, ε = 0.05 means you want your estimate to be within 5 % of the actual value. An epsilon of 0.8 is a good starting point for most applications").required(true).value_parser(value_parser!(f64)))
2626- .arg(arg!(-d --delta <DELTA> "The level of certainty that the algorithm's estimate will fall within the desired accuracy range. A higher confidence (e.g., 99.9 %) means you're very sure the estimate will be accurate, while a lower confidence (e.g., 90 %) means there's a higher chance the estimate might be outside the desired range. A delta of 0.1 is a good starting point for most applications").required(true).value_parser(value_parser!(f64)))
2727- .arg(arg!(-s --streamsize <STREAM_SIZE> "This is used to determine buffer size and can be a loose approximation. The closer it is to the stream size, the more accurate the results").required(true).value_parser(value_parser!(usize)))
2424+ .arg(arg!(-t --tokens <FILE> "A text file containing words")
2525+ .required(true)
2626+ .value_parser(value_parser!(PathBuf)))
2727+ .arg(arg!(-e --epsilon <EPSILON> "How close you want your estimate to be to the true number of distinct tokens. A smaller ε means you require a more precise estimate. For example, ε = 0.05 means you want your estimate to be within 5 % of the actual value. An epsilon of 0.8 is a good starting point for most applications")
2828+ .required(true)
2929+ .value_parser(clap::value_parser!(f64))
3030+ )
3131+ .arg(arg!(-d --delta <DELTA> "The level of certainty that the algorithm's estimate will fall within the desired accuracy range. A higher confidence (e.g., 99.9 %) means you're very sure the estimate will be accurate, while a lower confidence (e.g., 90 %) means there's a higher chance the estimate might be outside the desired range. A delta of 0.1 is a good starting point for most applications")
3232+ .required(true)
3333+ .value_parser(value_parser!(f64)))
3434+ .arg(arg!(-s --streamsize <STREAM_SIZE> "This is used to determine buffer size and can be a loose approximation. The closer it is to the stream size, the more accurate the results")
3535+ .required(true)
3636+ .value_parser(value_parser!(usize)))
2837 .get_matches();
2938 let input_file = params.get_one::<PathBuf>("tokens").unwrap();
3039 let epsilon = params.get_one::<f64>("epsilon").unwrap();