Use HashMap to detect duplicate

wip/vec_cmp
Benoît 2018-12-10 22:41:53 +01:00
parent e2da3e896a
commit 05136e295e
1 changed files with 32 additions and 5 deletions

View File

@ -19,6 +19,8 @@ use std::fmt;
use std::thread;
use std::sync::{Arc, Mutex};
use std::collections::HashMap;
const BUFFER_SIZE: usize = 1024;
/* Note for myself : CLAP = _C_ommand _L_ine _A_rgument _P_arser */
@ -245,12 +247,14 @@ fn main() {
let chunk_size = (files_candidate_len / num_cpus) + 1;
let modulus = files_candidate_len % num_cpus;
println!("Calculate {} file(s)", files_candidate_len);
println!("Compute Hash of {} file(s)", files_candidate_len);
println!("Use {} chunk(s) of size {}", modulus, chunk_size);
println!("Use {} chunk(s) of size {}", num_cpus - modulus, chunk_size - 1);
let mut work = files_candidate.lock().unwrap();
println!("\n=Starting Hash compute in Thread");
// Example from :
// https://stackoverflow.com/questions/33818141/how-do-i-pass-disjoint-slices-from-a-vector-to-different-threads
// Scoped threads allow the compiler to prove that no threads will outlive
@ -281,16 +285,39 @@ fn main() {
}
});
println!("=End");
// Sort Unique element in hashmap
let uniques: HashMap<_, _> = work.iter()
.map(|c| (&c.hash, c.name.as_str()))
.collect();
let num_dups = work.len() - uniques.len();
println!("Duplicate : {}", num_dups);
// for (bytes, name) in uniques.iter() {
// println!("Name : {:#?}", name);
// print!("[");
// for byte in *bytes {
// print!("{:02x}", byte);
// }
// println!("]");
// }
println!("\n=Result");
for _i in work.iter() {
}
println!("=End");
for i in work.iter() {
println!("{}", i);
}
// TODO with work !
// check for each hash duplication
// if so --> log to file and remove from list (store to done vector)
t.fg(term::color::CYAN).unwrap();
let end = PreciseTime::now();
println!("");
println!("=End");
println!("{} seconds.", start.to(end));
t.reset().unwrap();