visualhaar/src/visualhaar.rs

576 lines
24 KiB
Rust

extern crate xml;
use image;
use log::{info, trace, warn};
use std::{convert::TryInto, error::Error};
use stopwatch::{Stopwatch};
use ndarray as nd;
use super::heatmap as heatmap;
/// A haarclasifier based on opencv cascade XML files
/// Structure info from https://answers.opencv.org/question/8418/explanation-of-cascadexml-in-a-haar-classifier/
/// and the contribution of StevenPuttemans to OpenCV 3 Blueprints
/// Code examples of the book are https://github.com/OpenCVBlueprints/OpenCVBlueprints/tree/master/chapter_5/source_code
/// A little at SO https://stackoverflow.com/a/34897460
/// Nice and detailed: https://stackoverflow.com/a/41893728
/// Also great: https://medium.com/datadriveninvestor/understanding-and-implementing-the-viola-jones-image-classification-algorithm-85621f7fe20b
#[derive(Debug)]
pub struct HaarClassifier {
/// width of the object model
pub height: u8,
/// height of the object model
pub width: u8,
pub stage_max_weak_count: u8,
pub feature_max_cat_count: u8,
pub stage_num: usize,
pub stages: Vec<HaarClassifierStage>,
pub features: Vec<HaarClassifierFeature>
}
#[derive(Debug)]
pub struct HaarClassifierStage {
max_weak_count: usize,
treshold: f64,
weak_classifiers: Vec<HaarClassifierStageClassifier>
}
#[derive(Debug)]
pub struct HaarClassifierStageClassifier {
left: f32,
right: f32,
feature: HaarClassifierFeature,
threshold: f64, // stageTreshold - THRESHOLD_EPS = 1e-5f: https://github.com/opencv/opencv/blob/0cf479dd5ce8602040716811240bb7595de4ba9e/modules/objdetect/src/cascadedetect.cpp#L1503
leaf_values: Vec<f64>
}
#[derive(Clone,Debug)]
pub struct HaarClassifierFeature {
rects: Vec<HaarClassifierFeatureRect>,
tilted: bool, // Dont' implement for now
}
impl HaarClassifierFeature{
fn compute_feature(&self, image_window: &nd::ArrayView2<u32>, scale: &f64, x: usize, y: usize, scan_window_size: usize) -> f64 {
let mut score = 0.;
for rect in &self.rects{
score += rect.compute_rect(image_window, x, y);
}
score
}
fn draw(&self, draw_window: &mut nd::ArrayViewMut2<i16>, scale: &f64, x: usize, y: usize, scan_window_size: usize) {
for rect in &self.rects{
rect.draw(draw_window, scale, x, y, scan_window_size);
}
}
}
#[derive(Clone,Debug)]
pub struct HaarClassifierFeatureRect{
/// top left coordinate, x
tl_x: u8,
/// top left coordinate, y
tl_y: u8,
width: u8,
height: u8,
/// weight factor
weight: i16,
}
impl HaarClassifierFeatureRect{
fn get_coordinates_for_scale(&self, scale: &f64) -> (usize, usize, usize, usize) {
let x1 = (self.tl_x as f64 * scale).floor() as usize;
let y1 = (self.tl_y as f64 * scale).floor() as usize;
let x2 = x1 + (self.width as f64 * scale).floor() as usize;
let y2 = y1 + (self.height as f64 * scale).floor() as usize;
(x1, y1, x2, y2)
}
fn get_coordinates(&self) -> (usize, usize, usize, usize) {
let x1 = self.tl_x as usize;
let y1 = self.tl_y as usize;
let x2 = x1 + self.width as usize;
let y2 = y1 + self.height as usize;
(x1, y1, x2, y2)
}
/// The feature sum is finally calculated by first summing all values of the pixels inside the rectangle and then multiplying it with the weight factor. Finally, those weighted sums are combined together to yield as a final feature value. Keep in mind that all the coordinates retrieved for a single feature are in relation to the window/model size and not the complete image which is processed.
fn compute_rect(&self, image_window: &nd::ArrayView2<u32>, x: usize, y: usize) -> f64 {
let (x1, y1, x2, y2) = self.get_coordinates();
let sum = (image_window[[y+y2,x+x2]] + image_window[[y+y1,x+x1]] - image_window[[y+y1, x+x2]] - image_window[[y+y2, x+x1]]) as f64;
let sum = (sum) * self.weight as f64; // normalise: when the window grows, all values of the integral image become bigger by a factor scale-squared
return sum;
}
fn draw(&self, draw_window: &mut nd::ArrayViewMut2<i16>, scale: &f64, x: usize, y: usize, scan_window_size: usize) {
let (x1, y1, x2, y2) = self.get_coordinates_for_scale(scale);
// TODO how to speed this up?
// info!("Draw {} {} {} {} ({:?}),", x1, y1, x2, y2,self);
let mut rect = draw_window.slice_mut(s![y+y1..y+y2, x+x1..x+x2]); // semi slow (initially 500ms)
rect += self.weight; // super slow (initially 10.000 ms)
// info!("add")
// for x in x1..x2{
// for y in y1..y2{
// draw_window[[y, x]] = draw_window[[y, x]] as f64 + self.weight;
// }
// }
}
}
pub struct Outcome {
// frame: image::ImageBuffer<image::Luma<u8>, Vec<u8>>,
pub dynamic_img: image::DynamicImage,
}
impl HaarClassifier {
/// parse from xml , using roxmltree
pub fn from_xml(filename: &str) -> Result<Self, Box<dyn Error>> {
let text = std::fs::read_to_string(filename).unwrap();
let doc = match roxmltree::Document::parse(&text) {
Ok(doc) => doc,
Err(e) => {
println!("Error: {}.", e);
return Err(From::from(e));
},
};
// root: <opencv_storage>
let root_el = doc.root().first_element_child().unwrap();
// println!("{:?}", root_el);
let cascade = root_el.first_element_child().unwrap();
// println!("{:?}", cascade);
let features_el = cascade.children().find(|n| n.is_element() && n.has_tag_name("features")).unwrap();
let stages_el = cascade.children().find(|n| n.is_element() && n.has_tag_name("stages")).unwrap();
let height: u8 = cascade.children().find(|n| n.is_element() && n.has_tag_name("height")).unwrap().text().unwrap().parse()?;
let width: u8 = cascade.children().find(|n| n.is_element() && n.has_tag_name("width")).unwrap().text().unwrap().parse()?;
let stage_max_weak_count: u8 = cascade.children()
.find(|n| n.is_element() && n.has_tag_name("stageParams")).unwrap()
.children().find(|n| n.is_element() && n.has_tag_name("maxWeakCount")).unwrap()
.text().unwrap()
.parse()?;
let feature_max_cat_count: u8 = cascade.children()
.find(|n| n.is_element() && n.has_tag_name("featureParams")).unwrap()
.children().find(|n| n.is_element() && n.has_tag_name("maxCatCount")).unwrap()
.text().unwrap()
.parse()?;
let stage_num: usize = stages_el.children().count();
let feature_num: usize = features_el.children().count();
let mut stages = Vec::<HaarClassifierStage>::with_capacity(stage_num);
let mut features = Vec::<HaarClassifierFeature>::with_capacity(feature_num);
for node in features_el.children().filter(|n| n.is_element() && n.has_tag_name("_")) {
let rect_els = node.children().find(|n| n.has_tag_name("rects")).unwrap().children().filter(|n| n.is_element());
let mut rects = Vec::<HaarClassifierFeatureRect>::new();
for rect in rect_els {
// println!("{:?}",rect.text());
let v: Vec<&str> = rect.text().unwrap().split_whitespace().collect();
assert_eq!(v.len(), 5, "Expected values for features: x, y, width, height, weight");
let w: f64 = v[4].parse()?;
rects.push(HaarClassifierFeatureRect{
tl_x: v[0].parse()?,
tl_y: v[1].parse()?,
width: v[2].parse()?,
height: v[3].parse()?,
weight: w as i16,
});
}
features.push(
HaarClassifierFeature{
rects: rects,
tilted: false,
}
);
}
// loop the stages after the features, so we can immediately map features to the internalNodes
for node in stages_el.children().filter(|n| n.is_element() && n.has_tag_name("_")) {
let max_weak_count: usize = node.children().find(|n| n.has_tag_name("maxWeakCount")).unwrap().text().unwrap().parse()?;
let stage_treshold: f64 = node.children().find(|n| n.has_tag_name("stageThreshold")).unwrap().text().unwrap().parse()?;
let weak_classifier_els = node.children().find(|n| n.has_tag_name("weakClassifiers")).unwrap().children().filter(|n| n.is_element() && n.has_tag_name("_"));
let mut weak_classifiers = Vec::<HaarClassifierStageClassifier>::new();
for classifier_el in weak_classifier_els {
// <internalNodes>: left right featureIndex threshold ... rest is ignored
let mut internal_nodes = classifier_el.children().find(|n| n.has_tag_name("internalNodes")).unwrap().text().unwrap().trim().split_whitespace();
let left: f32 = internal_nodes.next().unwrap().parse().unwrap();
let right: f32 = internal_nodes.next().unwrap().parse().unwrap();
let feature_idx: usize = internal_nodes.next().unwrap().parse().unwrap();
let feature = features[feature_idx].clone(); // we just assume it exists. We copy instead of borrow to avoid lifetime parameters... I really need to figure these out.. ouch
let threshold: f64 = internal_nodes.next().unwrap().parse().unwrap();
// <leafValues>: the values for left and right ... the rest is ignored
let leaf_values_split = classifier_el.children().find(|n| n.has_tag_name("leafValues")).unwrap().text().unwrap().trim().split_whitespace();
let mut leaf_values: Vec<f64> = Vec::new();
for leaf_value in leaf_values_split{
leaf_values.push(leaf_value.parse().unwrap());
}
weak_classifiers.push(
HaarClassifierStageClassifier{
left: left,
right: right,
feature: feature,
threshold: threshold,
leaf_values: leaf_values
}
);
}
// let rectEls = node.children().find(|n| n.has_tag_name("rects")).unwrap().children().filter(|n| n.is_element());
// let mut rects = Vec::<HaarClassifierFeatureRect>::new();
// for rect in rectEls {
// println!("{:?}",rect.text());
// let v: Vec<&str> = rect.text().unwrap().split_whitespace().collect();
// assert_eq!(v.len(), 5, "Expected values for features: x, y, width, height, weight");
// rects.push(HaarClassifierFeatureRect{
// tl_x: v[0].parse()?,
// tl_y: v[1].parse()?,
// width: v[2].parse()?,
// height: v[3].parse()?,
// weight: v[4].parse()?,
// });
// }
stages.push(
HaarClassifierStage{
max_weak_count: max_weak_count,
treshold: stage_treshold,
weak_classifiers: weak_classifiers,
}
);
}
// println!("{:?}", features);
let haar = HaarClassifier{
height: height,
width: width,
stage_max_weak_count: stage_max_weak_count,
feature_max_cat_count: feature_max_cat_count,
stage_num: stage_num,
stages: stages,
features: features,
};
return Ok(haar);
}
// pub fn from_xml(filename: &str) -> Result<Self, Box<dyn Error>> {
// let file = File::open(filename)?;
// let file = BufReader::new(file);
// let parser = EventReader::new(file);
// let mut depth = 0;
// let mut current_tags: Vec<String> = Vec::new();
// let mut height: Option<u8> = None;
// let mut width: Option<u8> = None;
// let mut stage_max_weak_count: Option<u8> = None;
// let mut feature_max_cat_count: Option<u8> = None;
// let mut stage_num: Option<usize> = None;
// // let mut stages;
// for e in parser {
// match e {
// Ok(XmlEvent::StartElement { name, .. }) => {
// let l = name.local_name.clone();
// current_tags.push(l);
// depth += 1;
// if l == "_" {
// let last_tag = &current_tags[current_tags.len()-1];
// match second_to_last_tag.as_ref() {
// "stages" => ,
// "weakClassifiers" =>
// "features" =>
// }
// } else {}
// }
// Ok(XmlEvent::EndElement { name }) => {
// let last_tag = current_tags.pop().expect("Unexpected end of tag list");
// if last_tag != name.local_name {
// return Err(From::from("Error in XML? Ending not the latest tag"));
// }
// depth -= 1;
// }
// Ok(XmlEvent::Characters(value)) => {
// let last_tag = &current_tags[current_tags.len()-1];
// match last_tag.as_ref() {
// "height" => height = Some(value.parse()?),
// "width" => width = Some(value.parse()?),
// "maxWeakCount" => stage_max_weak_count = Some(value.parse()?),
// "maxCatCount" => feature_max_cat_count = Some(value.parse()?),
// "stageNum" => stage_num = Some(value.parse()?),
// "_" => {
// let second_to_last_tag = &current_tags[current_tags.len()-2];
// }
// _ => info!("Contents for unknown tag {:?} <{:?}>", value, last_tag)
// }
// }
// Err(e) => return Err(From::from(e)),
// _ => {}
// }
// }
// return Ok(HaarClassifier{
// height: height.unwrap(),
// width: width.unwrap(),
// stage_max_weak_count: stage_max_weak_count.unwrap(),
// feature_max_cat_count: feature_max_cat_count.unwrap(),
// stage_num: stage_num.unwrap(),
// stages: Vec::<HaarClassifierStage>::with_capacity(stage_num.unwrap()),
// });
// }
/// take an ImageBuffer and scan it for faces.
/// min_face_factor parameter gives starting size of scan window (frame height / factor). So higher number scans for smaller faces.
pub fn scan_image(&self, frame: image::ImageBuffer<image::Rgb<u8>, Vec<u8>>, heatmap: &Option<heatmap::Heatmap>, min_face_factor: u32) -> Result<Outcome, String> {
let sw = Stopwatch::start_new();
let img_bw = image::imageops::grayscale(&frame);
// let mut output_image = image::GrayImage::new(frame.width(), frame.height());
// let integral = Self::integral_image(&img_bw);
let mut output_frame: nd::Array2<i16> = nd::Array::zeros((
img_bw.height() as usize,
img_bw.width() as usize,
));
// let mut integral_view = integral.view();
// let mut output_draw_frame = output_frame.view_mut();
// info!("Frame: {:?} {:?}", integral[[0,0]], integral[[integral.dim().0-1,integral.dim().1-1]]);
// let rect = integral.slice(s![3..5, 2..4]);
// let min_size = self.width;
let min_size = frame.height() / min_face_factor; // TODO: Make min face size (or factor) variable
let max_window_size = std::cmp::min(img_bw.dimensions().0, img_bw.dimensions().1) as usize;
let mut window_size: usize = min_size.clone() as usize;
let mut count_faces = 0;
let mut count_not_faces = 0;
info!("preprocessing: {:?}ms", sw.elapsed_ms());
let mut loop_time: i64 = 0;
while window_size < max_window_size {
let sw = Stopwatch::start_new();
let scale = (window_size-1) as f64 / self.width as f64;
let img_bw_scaled = image::imageops::resize(
&img_bw,
(img_bw.width() as f64 / scale + 1.) as u32,
(img_bw.height() as f64 / scale + 1.) as u32,
image::imageops::FilterType::CatmullRom
);
let integral = Self::integral_image(&img_bw_scaled);
let mut scaled_output_frame: nd::Array2<i16> = nd::Array::zeros((
img_bw_scaled.dimensions().1 as usize,
img_bw_scaled.dimensions().0 as usize,
));
let integral_view = integral.view();
let mut scaled_output_draw_frame = scaled_output_frame.view_mut();
// to calculate a rect, we would need a -1 row, if we ignore that precision and add one at the end: (eg required when an item has width 20 (== feature width))
let scan_window_size = window_size + 1;
info!("Window size: {:?} {:?} {:?}", window_size, scale, scaled_output_draw_frame.dim());
// step by scale.ceil() as this is 1px in the model's size. (small is probably unnecesarily fine-grained)
for x in (0..(img_bw_scaled.width() as usize - self.width as usize)) {
for y in (0..(img_bw_scaled.height() as usize - self.height as usize)) {
// let window = integral.slice(s![y..y+scan_window_size, x..x+scan_window_size]);
// let mut output_window = output_frame.slice_mut(s![y..y+scan_window_size, x..x+scan_window_size]);
if self.scan_window(integral_view, 1., &mut scaled_output_draw_frame, x, y, window_size) {
count_faces += 1;
} else {
count_not_faces += 1;
}
}
}
for x in 0..img_bw.width() {
for y in 0..img_bw.height() {
let src_x = (x as f64 / scale) as usize;
let src_y = (y as f64 / scale) as usize;
let weight = scaled_output_draw_frame[[src_y, src_x]];
// let weight = img_bw_scaled.get_pixel(src_x, src_y).0.first().unwrap().clone() as i16;
// info!("Pixel: {:?} {:?} {:?} {:?} {:?} {:?} {:?}", x, y, output_frame.dim(), scale, src_x, src_y, img_bw_scaled.dimensions());
output_frame[[y as usize, x as usize]] += weight;
}
}
let elapsed = sw.elapsed_ms();
info!("\ttook: {:?}ms", elapsed);
// break;
loop_time += elapsed;
window_size = (window_size as f32 * 1.2) as usize; // TODO make grow-factor variable (now 1.2)
}
info!("Looping took: {:?}", loop_time);
let sw = Stopwatch::start_new();
// let mut test_window = output_frame.slice_mut(s![10..20,40..50]);
// test_window += 10.;
// Find the largest non-NaN in vector, or NaN otherwise:
let max_output_pixel = output_frame.iter().max().unwrap().clone();//when f64: output_frame.iter().cloned().fold(0./0., f64::max);
let min_output_pixel = output_frame.iter().min().unwrap().clone();//when f64: output_frame.iter().cloned().fold(f64::NAN, f64::min);
info!("Maximum pixel value in drawing: {:?} / min: {:?}", max_output_pixel, min_output_pixel);
info!("Count accepted/rejected windows: {:?}/{:?}", count_faces, count_not_faces);
// let max_output_pixel = output_frame.iter().par().unwrap().clone();
output_frame -= min_output_pixel;
let pix_diff = (max_output_pixel-min_output_pixel) as f64 / 256.;
if pix_diff.abs() > 1. {
let frac: i16 = if pix_diff.is_sign_positive(){
pix_diff.ceil() as i16
} else {
pix_diff.floor() as i16
};
output_frame /= frac;
}
// let image_frame = output_frame / (max_output_pixel as)
// convert to image, thanks to https://stackoverflow.com/a/56762490
let raw = output_frame.as_standard_layout().to_owned().into_raw_vec();
let raw: Vec<u8> = raw.into_iter().map(|x| x as u8).collect();
// info!("Img: {:?}", raw);
let final_img = image::GrayImage::from_raw(frame.width(), frame.height(), raw).unwrap();
// let dynamic = image::DynamicImage::ImageLuma8(img_bw);
let dynamic = image::DynamicImage::ImageLuma8(final_img);
info!("postprocessing: {:?}ms", sw.elapsed_ms());
let dynamic = match heatmap {
Some(hm) => {
// TODO remove intermediate DynamicImage conversin
let sw = Stopwatch::start_new();
let i = image::DynamicImage::ImageRgb8(hm.convert_image(dynamic));
info!("heatmap: {:?}ms", sw.elapsed_ms());
i
}
None => {
// no changes needed
dynamic
}
};
Ok(Outcome{
// frame: img_bw,
dynamic_img: dynamic,
})
}
fn scan_window(&self, integral_window: nd::ArrayView2<u32>, scale: f64, output_window: &mut nd::ArrayViewMut2<i16>, x: usize, y: usize, scan_window_size: usize) -> bool{
let mut failed = false; // let's assume the cascade will work
for stage in &self.stages{
let mut stage_sum = 0.;
let mut i = 0;
for classifier in &stage.weak_classifiers{
// or 'stumps'
let feature = classifier.feature.compute_feature(&integral_window, &scale, x, y, scan_window_size);
let stddev = 1.; // TODO what should we use here?
let threshold = classifier.threshold * stddev;
let idx = if feature < threshold {
stage_sum += classifier.leaf_values[0];
classifier.left
} else {
stage_sum += classifier.leaf_values[1];
// weak classifier bigger then threshold... draw it!
classifier.feature.draw(output_window, &scale, x, y, scan_window_size);
i+=1;
classifier.right
};
// TODO remove to use all stages (we need to speed up somewhere else)
// if i > 2{
// break;
// }
// classifier.feature.draw(output_window, &scale);
}
if stage_sum < stage.treshold{
failed = true;
break;
}
// break; // TODO we're super slow and mainly want to get a gist of what is happening .so we only render stage 1
}
if failed {
return false;
} else {
// info!("Face found?");
return true;
}
}
/// turn the ImageBuffer into an integral image vector for faster calculations of areas
/// the Array2 has y,x (analogous to numpy arrays when create from images)
pub fn integral_image(i: &image::ImageBuffer<image::Luma<u8>, Vec<u8>>) -> nd::Array2<u32>{
// Vec<u32>
// let size = i.dimensions().0 * i.dimensions().1;
let mut integral: nd::Array2<u32> = nd::Array::zeros((
i.dimensions().1 as usize,
i.dimensions().0 as usize,
));
let mut cumsum: nd::Array2<u32> = nd::Array::zeros((
i.dimensions().1 as usize,
i.dimensions().0 as usize,
));
for y in 0..i.dimensions().1{
for x in 0..i.dimensions().0{
cumsum[[y as usize, x as usize]] = i.get_pixel(x, y).0.first().unwrap().clone() as u32;
if y > 0 {
cumsum[[y as usize, x as usize]] += cumsum[[(y - 1) as usize, x as usize]];
}
integral[[y as usize, x as usize]] = cumsum[[y as usize, x as usize]];
if x > 0 {
integral[[y as usize, x as usize]] += integral[[y as usize, (x - 1) as usize]];
}
}
}
integral
}
}
impl Outcome {
}