av-metrics-0.9.1/Cargo.toml0000644000000024310000000000100111130ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "av-metrics" version = "0.9.1" authors = ["Josh Holmer "] include = ["src/**/*"] description = "A collection of algorithms for measuring audio/video metrics" license = "MIT" repository = "https://github.com/rust-av/av-metrics" resolver = "1" [lib] bench = false [[bench]] name = "bench" path = "benches/bench.rs" harness = false [dependencies.crossbeam] version = "0.8" [dependencies.itertools] version = "0.10.0" [dependencies.lab] version = "0.11.0" [dependencies.num-traits] version = "0.2" [dependencies.rayon] version = "1.5" [dependencies.serde] version = "1" features = ["derive"] optional = true [dependencies.thiserror] version = "1" [dependencies.v_frame] version = "0.3.1" [dev-dependencies.criterion] version = "0.4" [dev-dependencies.y4m] version = "0.8.0" av-metrics-0.9.1/Cargo.toml.orig000064400000000000000000000011631046102023000145750ustar 00000000000000[package] name = "av-metrics" version = "0.9.1" authors = ["Josh Holmer "] edition = "2021" description = "A collection of algorithms for measuring audio/video metrics" license = "MIT" repository = "https://github.com/rust-av/av-metrics" include = ["src/**/*"] [dependencies] crossbeam = "0.8" itertools = "0.10.0" lab = "0.11.0" num-traits = "0.2" rayon = "1.5" serde = { version = "1", features = ["derive"], optional = true } thiserror = "1" v_frame = "0.3.1" [dev-dependencies] criterion = "0.4" y4m = "0.8.0" [lib] bench = false [[bench]] name = "bench" harness = false path = "benches/bench.rs" av-metrics-0.9.1/src/lib.rs000064400000000000000000000036761046102023000136240ustar 00000000000000//! `av_metrics` is a collection of quality metrics for audio and video files. //! Currently only includes video metrics. Audio metrics will likely be added //! in the future. #![allow(clippy::cast_lossless)] #![allow(clippy::needless_range_loop)] #![allow(clippy::unreadable_literal)] #![allow(clippy::wrong_self_convention)] #![deny(missing_docs)] #[macro_use] extern crate itertools; #[macro_use] extern crate thiserror; pub mod video; /// Possible errors that may occur during processing of a metric. /// /// This enum may be added to in the future and should not be assumed to be exhaustive. #[derive(Debug, Error)] pub enum MetricsError { /// Indicates an input file could not be read for some reason. #[error("Could not read input file: {reason}")] MalformedInput { #[doc(hidden)] reason: &'static str, }, /// Indicates an input file could be read, but is not supported by the current metric. #[error("Input type not supported: {reason}")] UnsupportedInput { #[doc(hidden)] reason: &'static str, }, /// Indicates two inputs did not have matching formats or resolutions. #[error("Input videos must have matching formats: {reason}")] InputMismatch { #[doc(hidden)] reason: &'static str, }, /// Indicates the impossibility to process the two videos. #[error("Could not process the two videos: {reason}")] VideoError { #[doc(hidden)] reason: String, }, /// Indicates the impossibility to send two frames in order to be processed. #[error("Could not send two frames to be processed: {reason}")] SendError { #[doc(hidden)] reason: String, }, /// Indicates the impossibility to process two frames. #[error("Could not process two frames: {reason}")] ProcessError { #[doc(hidden)] reason: String, }, /// Placeholder #[doc(hidden)] #[error("Unreachable")] NonExhaustive, } av-metrics-0.9.1/src/video/ciede/delta_e/LICENSE000064400000000000000000000020711046102023000172550ustar 00000000000000The MIT License (MIT) Copyright (c) 2017 Elliot Jackson Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. av-metrics-0.9.1/src/video/ciede/delta_e/de2000.rs000064400000000000000000000105251046102023000175130ustar 00000000000000// Modified version of https://github.com/elliotekj/DeltaE use lab::Lab; use std::f32; use std::f32::consts::PI; pub struct DE2000; pub struct KSubArgs { pub l: f32, pub c: f32, pub h: f32, } #[allow(clippy::needless_doctest_main)] impl DE2000 { // Returns the difference between two `Lab` colors. // // ### Example // // ```ignore // extern crate delta_e; // extern crate lab; // // use delta_e::DE2000; // use lab::Lab; // // fn main() { // let color_1 = Lab { // l: 38.972, // a: 58.991, // b: 37.138, // }; // // let color_2 = Lab { // l: 54.528, // a: 42.416, // b: 54.497, // }; // // let delta_e = DE2000::new(color_1, color_2); // println!("The color difference is: {}", delta_e); // } // ``` #[allow(clippy::new_ret_no_self)] pub fn new(color_1: Lab, color_2: Lab, ksub: KSubArgs) -> f32 { let delta_l_prime = color_2.l - color_1.l; let l_bar = (color_1.l + color_2.l) / 2.0; let c1 = (color_1.a.powi(2) + color_1.b.powi(2)).sqrt(); let c2 = (color_2.a.powi(2) + color_2.b.powi(2)).sqrt(); let (a_prime_1, a_prime_2) = { let c_bar = (c1 + c2) / 2.0; let tmp = 1.0 - (c_bar.powi(7) / (c_bar.powi(7) + 25f32.powi(7))).sqrt(); ( color_1.a + (color_1.a / 2.0) * tmp, color_2.a + (color_2.a / 2.0) * tmp, ) }; let c_prime_1 = (a_prime_1.powi(2) + color_1.b.powi(2)).sqrt(); let c_prime_2 = (a_prime_2.powi(2) + color_2.b.powi(2)).sqrt(); let c_bar_prime = (c_prime_1 + c_prime_2) / 2.0; let delta_c_prime = c_prime_2 - c_prime_1; let s_sub_l = 1.0 + ((0.015 * (l_bar - 50.0).powi(2)) / (20.0 + (l_bar - 50.0).powi(2)).sqrt()); let s_sub_c = 1.0 + 0.045 * c_bar_prime; let h_prime_1 = get_h_prime_fn(color_1.b, a_prime_1); let h_prime_2 = get_h_prime_fn(color_2.b, a_prime_2); let delta_h_prime = get_delta_h_prime(c1, c2, h_prime_1, h_prime_2); let delta_upcase_h_prime = 2.0 * (c_prime_1 * c_prime_2).sqrt() * ((delta_h_prime) / 2.0).sin(); let upcase_h_bar_prime = get_upcase_h_bar_prime(h_prime_1, h_prime_2); let upcase_t = get_upcase_t(upcase_h_bar_prime); let s_sub_upcase_h = 1.0 + 0.015 * c_bar_prime * upcase_t; let r_sub_t = get_r_sub_t(c_bar_prime, upcase_h_bar_prime); let lightness: f32 = delta_l_prime / (ksub.l * s_sub_l); let chroma: f32 = delta_c_prime / (ksub.c * s_sub_c); let hue: f32 = delta_upcase_h_prime / (ksub.h * s_sub_upcase_h); (lightness.powi(2) + chroma.powi(2) + hue.powi(2) + r_sub_t * chroma * hue).sqrt() } } fn get_h_prime_fn(x: f32, y: f32) -> f32 { if x == 0.0 && y == 0.0 { return 0.0; } let mut hue_angle = x.atan2(y); if hue_angle < 0.0 { hue_angle += 2. * PI; } hue_angle } fn get_delta_h_prime(c1: f32, c2: f32, h_prime_1: f32, h_prime_2: f32) -> f32 { if 0.0 == c1 || 0.0 == c2 { return 0.0; } if (h_prime_1 - h_prime_2).abs() <= PI { return h_prime_2 - h_prime_1; } if h_prime_2 <= h_prime_1 { h_prime_2 - h_prime_1 + 2. * PI } else { h_prime_2 - h_prime_1 - 2. * PI } } fn get_upcase_h_bar_prime(h_prime_1: f32, h_prime_2: f32) -> f32 { if (h_prime_1 - h_prime_2).abs() > PI { return (h_prime_1 + h_prime_2 + 2.0 * PI) / 2.0; } (h_prime_1 + h_prime_2) / 2.0 } fn get_upcase_t(upcase_h_bar_prime: f32) -> f32 { 1.0 - 0.17 * (upcase_h_bar_prime - PI / 6.0).cos() + 0.24 * (2.0 * upcase_h_bar_prime).cos() + 0.32 * (3.0 * upcase_h_bar_prime + PI / 30.0).cos() - 0.20 * (4.0 * upcase_h_bar_prime - 7.0 * PI / 20.0).cos() } fn get_r_sub_t(c_bar_prime: f32, upcase_h_bar_prime: f32) -> f32 { let degrees = (radians_to_degrees(upcase_h_bar_prime) - 275.0) * (1.0 / 25.0); -2.0 * (c_bar_prime.powi(7) / (c_bar_prime.powi(7) + 25f32.powi(7))).sqrt() * (degrees_to_radians(60.0 * (-(degrees.powi(2))).exp())).sin() } fn radians_to_degrees(radians: f32) -> f32 { radians * (180.0 / f32::consts::PI) } fn degrees_to_radians(degrees: f32) -> f32 { degrees * (f32::consts::PI / 180.0) } av-metrics-0.9.1/src/video/ciede/delta_e/mod.rs000064400000000000000000000000401046102023000173670ustar 00000000000000mod de2000; pub use de2000::*; av-metrics-0.9.1/src/video/ciede/mod.rs000064400000000000000000000466261046102023000160160ustar 00000000000000#![allow(clippy::cast_ptr_alignment)] //! The CIEDE2000 color difference formula. //! //! CIEDE2000 implementation adapted from //! [Kyle Siefring's](https://github.com/KyleSiefring/dump_ciede2000). use crate::video::decode::Decoder; use crate::video::pixel::{CastFromPrimitive, Pixel}; use crate::video::VideoMetric; use crate::MetricsError; use std::f64; use std::mem::size_of; mod rgbtolab; use rgbtolab::*; mod delta_e; use delta_e::*; /// Calculate the CIEDE2000 metric between two video clips. Higher is better. /// /// This will return at the end of the shorter of the two clips, /// comparing any frames up to that point. /// /// Optionally, `frame_limit` can be set to only compare the first /// `frame_limit` frames in each video. #[inline] pub fn calculate_video_ciede( decoder1: &mut D, decoder2: &mut D, frame_limit: Option, progress_callback: F, ) -> Result> { Ciede2000::default().process_video(decoder1, decoder2, frame_limit, progress_callback) } /// Calculate the CIEDE2000 metric between two video clips. Higher is better. /// /// This version disables SIMD. It is intended to only be used /// by tests and benchmarks. #[inline] #[doc(hidden)] pub fn calculate_video_ciede_nosimd( decoder1: &mut D, decoder2: &mut D, frame_limit: Option, progress_callback: F, ) -> Result> { (Ciede2000 { use_simd: false }).process_video( decoder1, decoder2, frame_limit, progress_callback, ) } /// Calculate the CIEDE2000 metric between two video frames. Higher is better. #[inline] pub fn calculate_frame_ciede( frame1: &Frame, frame2: &Frame, bit_depth: usize, chroma_sampling: ChromaSampling, ) -> Result> { Ciede2000::default().process_frame(frame1, frame2, bit_depth, chroma_sampling) } /// Calculate the CIEDE2000 metric between two video frames. Higher is better. /// /// This version disables SIMD. It is intended to only be used /// by tests and benchmarks. #[inline] #[doc(hidden)] pub fn calculate_frame_ciede_nosimd( frame1: &Frame, frame2: &Frame, bit_depth: usize, chroma_sampling: ChromaSampling, ) -> Result> { (Ciede2000 { use_simd: false }).process_frame(frame1, frame2, bit_depth, chroma_sampling) } struct Ciede2000 { use_simd: bool, } impl Default for Ciede2000 { fn default() -> Self { Ciede2000 { use_simd: true } } } use rayon::prelude::*; use v_frame::frame::Frame; use v_frame::prelude::ChromaSampling; impl VideoMetric for Ciede2000 { type FrameResult = f64; type VideoResult = f64; fn process_frame( &self, frame1: &Frame, frame2: &Frame, bit_depth: usize, chroma_sampling: ChromaSampling, ) -> Result> { if (size_of::() == 1 && bit_depth > 8) || (size_of::() == 2 && bit_depth <= 8) { return Err(Box::new(MetricsError::InputMismatch { reason: "Bit depths does not match pixel width", })); } frame1.can_compare(frame2)?; let dec = chroma_sampling.get_decimation().unwrap_or((1, 1)); let y_width = frame1.planes[0].cfg.width; let y_height = frame1.planes[0].cfg.height; let c_width = frame1.planes[1].cfg.width; let delta_e_row_fn = get_delta_e_row_fn(bit_depth, dec.0, self.use_simd); // let mut delta_e_vec: Vec = vec![0.0; y_width * y_height]; let delta_e_per_line = (0..y_height).into_par_iter().map(|i| { let y_start = i * y_width; let y_end = y_start + y_width; let c_start = (i >> dec.1) * c_width; let c_end = c_start + c_width; let y_range = y_start..y_end; let c_range = c_start..c_end; let mut delta_e_vec = vec![0.0; y_end - y_start]; unsafe { delta_e_row_fn( FrameRow { y: &frame1.planes[0].data[y_range.clone()], u: &frame1.planes[1].data[c_range.clone()], v: &frame1.planes[2].data[c_range.clone()], }, FrameRow { y: &frame2.planes[0].data[y_range], u: &frame2.planes[1].data[c_range.clone()], v: &frame2.planes[2].data[c_range], }, &mut delta_e_vec[..], ); } delta_e_vec.iter().map(|x| *x as f64).sum::() }); let score = 45. - 20. * (delta_e_per_line.sum::() / ((y_width * y_height) as f64)).log10(); Ok(score.min(100.)) } fn aggregate_frame_results( &self, metrics: &[Self::FrameResult], ) -> Result> { Ok(metrics.iter().copied().sum::() / metrics.len() as f64) } } // Arguments for delta e // "Color Image Quality Assessment Based on CIEDE2000" // Yang Yang, Jun Ming and Nenghai Yu, 2012 // http://dx.doi.org/10.1155/2012/273723 const K_SUB: KSubArgs = KSubArgs { l: 0.65, c: 1.0, h: 4.0, }; pub(crate) struct FrameRow<'a, T: Pixel> { y: &'a [T], u: &'a [T], v: &'a [T], } type DeltaERowFn = unsafe fn(FrameRow, FrameRow, &mut [f32]); fn get_delta_e_row_fn(bit_depth: usize, xdec: usize, simd: bool) -> DeltaERowFn { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { if is_x86_feature_detected!("avx2") && xdec == 1 && simd { return match bit_depth { 8 => BD8::delta_e_row_avx2, 10 => BD10::delta_e_row_avx2, 12 => BD12::delta_e_row_avx2, _ => unreachable!(), }; } } match (bit_depth, xdec) { (8, 1) => BD8::delta_e_row_scalar, (10, 1) => BD10::delta_e_row_scalar, (12, 1) => BD12::delta_e_row_scalar, (8, 0) => BD8_444::delta_e_row_scalar, (10, 0) => BD10_444::delta_e_row_scalar, (12, 0) => BD12_444::delta_e_row_scalar, _ => unreachable!(), } } pub(crate) trait Colorspace { const BIT_DEPTH: u32; const X_DECIMATION: u32; } struct BD8; struct BD10; struct BD12; struct BD8_444; struct BD10_444; struct BD12_444; impl Colorspace for BD8 { const BIT_DEPTH: u32 = 8; const X_DECIMATION: u32 = 1; } impl Colorspace for BD10 { const BIT_DEPTH: u32 = 10; const X_DECIMATION: u32 = 1; } impl Colorspace for BD12 { const BIT_DEPTH: u32 = 12; const X_DECIMATION: u32 = 1; } impl Colorspace for BD8_444 { const BIT_DEPTH: u32 = 8; const X_DECIMATION: u32 = 0; } impl Colorspace for BD10_444 { const BIT_DEPTH: u32 = 10; const X_DECIMATION: u32 = 0; } impl Colorspace for BD12_444 { const BIT_DEPTH: u32 = 12; const X_DECIMATION: u32 = 0; } fn twice( i: T, ) -> itertools::Interleave<::IntoIter, ::IntoIter> where T: IntoIterator + Clone, { itertools::interleave(i.clone(), i) } pub(crate) trait DeltaEScalar: Colorspace { fn delta_e_scalar(yuv1: (u16, u16, u16), yuv2: (u16, u16, u16)) -> f32 { let scale = (1 << (Self::BIT_DEPTH - 8)) as f32; let yuv_to_rgb = |yuv: (u16, u16, u16)| { // Assumes BT.709 let y = (yuv.0 as f32 - 16. * scale) * (1. / (219. * scale)); let u = (yuv.1 as f32 - 128. * scale) * (1. / (224. * scale)); let v = (yuv.2 as f32 - 128. * scale) * (1. / (224. * scale)); // [-0.804677, 1.81723] let r = y + 1.28033 * v; // [−0.316650, 1.09589] let g = y - 0.21482 * u - 0.38059 * v; // [-1.28905, 2.29781] let b = y + 2.12798 * u; (r, g, b) }; let (r1, g1, b1) = yuv_to_rgb(yuv1); let (r2, g2, b2) = yuv_to_rgb(yuv2); DE2000::new(rgb_to_lab(&[r1, g1, b1]), rgb_to_lab(&[r2, g2, b2]), K_SUB) } unsafe fn delta_e_row_scalar( row1: FrameRow, row2: FrameRow, res_row: &mut [f32], ) { if Self::X_DECIMATION == 1 { for (y1, u1, v1, y2, u2, v2, res) in izip!( row1.y, twice(row1.u), twice(row1.v), row2.y, twice(row2.u), twice(row2.v), res_row ) { *res = Self::delta_e_scalar( ( u16::cast_from(*y1), u16::cast_from(*u1), u16::cast_from(*v1), ), ( u16::cast_from(*y2), u16::cast_from(*u2), u16::cast_from(*v2), ), ); } } else { for (y1, u1, v1, y2, u2, v2, res) in izip!(row1.y, row1.u, row1.v, row2.y, row2.u, row2.v, res_row) { *res = Self::delta_e_scalar( ( u16::cast_from(*y1), u16::cast_from(*u1), u16::cast_from(*v1), ), ( u16::cast_from(*y2), u16::cast_from(*u2), u16::cast_from(*v2), ), ); } } } } impl DeltaEScalar for BD8 {} impl DeltaEScalar for BD10 {} impl DeltaEScalar for BD12 {} impl DeltaEScalar for BD8_444 {} impl DeltaEScalar for BD10_444 {} impl DeltaEScalar for BD12_444 {} #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use self::avx2::*; use std::error::Error; use super::FrameCompare; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] mod avx2 { use super::*; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; pub(crate) trait DeltaEAVX2: Colorspace + DeltaEScalar { #[target_feature(enable = "avx2")] unsafe fn yuv_to_rgb(yuv: (__m256, __m256, __m256)) -> (__m256, __m256, __m256) { let scale: f32 = (1 << (Self::BIT_DEPTH - 8)) as f32; #[target_feature(enable = "avx2")] unsafe fn set1(val: f32) -> __m256 { _mm256_set1_ps(val) } let y = _mm256_mul_ps( _mm256_sub_ps(yuv.0, set1(16. * scale)), set1(1. / (219. * scale)), ); let u = _mm256_mul_ps( _mm256_sub_ps(yuv.1, set1(128. * scale)), set1(1. / (224. * scale)), ); let v = _mm256_mul_ps( _mm256_sub_ps(yuv.2, set1(128. * scale)), set1(1. / (224. * scale)), ); let r = _mm256_add_ps(y, _mm256_mul_ps(v, set1(1.28033))); let g = _mm256_add_ps( _mm256_add_ps(y, _mm256_mul_ps(u, set1(-0.21482))), _mm256_mul_ps(v, set1(-0.38059)), ); let b = _mm256_add_ps(y, _mm256_mul_ps(u, set1(2.12798))); (r, g, b) } #[target_feature(enable = "avx2")] unsafe fn delta_e_avx2( yuv1: (__m256, __m256, __m256), yuv2: (__m256, __m256, __m256), res_chunk: &mut [f32], ) { let (r1, g1, b1) = Self::yuv_to_rgb(yuv1); let (r2, g2, b2) = Self::yuv_to_rgb(yuv2); let lab1 = rgb_to_lab_avx2(&[r1, g1, b1]); let lab2 = rgb_to_lab_avx2(&[r2, g2, b2]); for i in 0..8 { res_chunk[i] = DE2000::new(lab1[i], lab2[i], K_SUB); } } #[target_feature(enable = "avx2")] unsafe fn delta_e_row_avx2( row1: FrameRow, row2: FrameRow, res_row: &mut [f32], ) { // Only one version should be compiled for each trait if Self::BIT_DEPTH == 8 { for (chunk1_y, chunk1_u, chunk1_v, chunk2_y, chunk2_u, chunk2_v, res_chunk) in izip!( row1.y.chunks(8), row1.u.chunks(4), row1.v.chunks(4), row2.y.chunks(8), row2.u.chunks(4), row2.v.chunks(4), res_row.chunks_mut(8) ) { if chunk1_y.len() == 8 { #[inline(always)] unsafe fn load_luma(chunk: &[u8]) -> __m256 { let tmp = _mm_loadl_epi64(chunk.as_ptr() as *const _); _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(tmp)) } #[inline(always)] unsafe fn load_chroma(chunk: &[u8]) -> __m256 { let tmp = _mm_cvtsi32_si128(*(chunk.as_ptr() as *const i32)); _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_unpacklo_epi8(tmp, tmp))) } Self::delta_e_avx2( ( load_luma( &chunk1_y .iter() .map(|p| u8::cast_from(*p)) .collect::>(), ), load_chroma( &chunk1_u .iter() .map(|p| u8::cast_from(*p)) .collect::>(), ), load_chroma( &chunk1_v .iter() .map(|p| u8::cast_from(*p)) .collect::>(), ), ), ( load_luma( &chunk2_y .iter() .map(|p| u8::cast_from(*p)) .collect::>(), ), load_chroma( &chunk2_u .iter() .map(|p| u8::cast_from(*p)) .collect::>(), ), load_chroma( &chunk2_v .iter() .map(|p| u8::cast_from(*p)) .collect::>(), ), ), res_chunk, ); } else { Self::delta_e_row_scalar( FrameRow { y: chunk1_y, u: chunk1_u, v: chunk1_v, }, FrameRow { y: chunk2_y, u: chunk2_u, v: chunk2_v, }, res_chunk, ); } } } else { for (chunk1_y, chunk1_u, chunk1_v, chunk2_y, chunk2_u, chunk2_v, res_chunk) in izip!( row1.y.chunks(8), row1.u.chunks(4), row1.v.chunks(4), row2.y.chunks(8), row2.u.chunks(4), row2.v.chunks(4), res_row.chunks_mut(8) ) { if chunk1_y.len() == 8 { #[inline(always)] unsafe fn load_luma(chunk: &[u16]) -> __m256 { let tmp = _mm_loadu_si128(chunk.as_ptr() as *const _); _mm256_cvtepi32_ps(_mm256_cvtepu16_epi32(tmp)) } #[inline(always)] unsafe fn load_chroma(chunk: &[u16]) -> __m256 { let tmp = _mm_loadl_epi64(chunk.as_ptr() as *const _); _mm256_cvtepi32_ps(_mm256_cvtepu16_epi32(_mm_unpacklo_epi16(tmp, tmp))) } Self::delta_e_avx2( ( load_luma( &chunk1_y .iter() .map(|p| u16::cast_from(*p)) .collect::>(), ), load_chroma( &chunk1_u .iter() .map(|p| u16::cast_from(*p)) .collect::>(), ), load_chroma( &chunk1_v .iter() .map(|p| u16::cast_from(*p)) .collect::>(), ), ), ( load_luma( &chunk2_y .iter() .map(|p| u16::cast_from(*p)) .collect::>(), ), load_chroma( &chunk2_u .iter() .map(|p| u16::cast_from(*p)) .collect::>(), ), load_chroma( &chunk2_v .iter() .map(|p| u16::cast_from(*p)) .collect::>(), ), ), res_chunk, ); } else { Self::delta_e_row_scalar( FrameRow { y: chunk1_y, u: chunk1_u, v: chunk1_v, }, FrameRow { y: chunk2_y, u: chunk2_u, v: chunk2_v, }, res_chunk, ); } } } } } impl DeltaEAVX2 for BD8 {} impl DeltaEAVX2 for BD10 {} impl DeltaEAVX2 for BD12 {} } av-metrics-0.9.1/src/video/ciede/rgbtolab/LICENSE000064400000000000000000000020711046102023000174540ustar 00000000000000The MIT License (MIT) Copyright (c) 2017 Elliot Jackson Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. av-metrics-0.9.1/src/video/ciede/rgbtolab/mod.rs000064400000000000000000000402311046102023000175740ustar 00000000000000// Modified version of https://github.com/TooManyBees/lab use lab::Lab; // κ and ε parameters used in conversion between XYZ and La*b*. See // http://www.brucelindbloom.com/LContinuity.html for explanation as to why // those are different values than those provided by CIE standard. const KAPPA: f32 = 24389.0 / 27.0; const EPSILON: f32 = 216.0 / 24389.0; pub fn rgb_to_lab(rgb: &[f32; 3]) -> Lab { xyz_to_lab(rgb_to_xyz(rgb)) } #[allow(clippy::excessive_precision)] fn rgb_to_xyz(rgb: &[f32; 3]) -> [f32; 3] { let r = rgb_to_xyz_map(rgb[0]); let g = rgb_to_xyz_map(rgb[1]); let b = rgb_to_xyz_map(rgb[2]); [ r * 0.4124564390896921 + g * 0.357576077643909 + b * 0.18043748326639894, r * 0.21267285140562248 + g * 0.715152155287818 + b * 0.07217499330655958, r * 0.019333895582329317 + g * 0.119192025881303 + b * 0.9503040785363677, ] } #[inline] fn rgb_to_xyz_map(c: f32) -> f32 { if c > 10. / 255. { const A: f32 = 0.055; const D: f32 = 1.0 / 1.055; pow_2_4((c + A) * D) } else { const D: f32 = 1.0 / 12.92; c * D } } fn xyz_to_lab(xyz: [f32; 3]) -> Lab { let x = xyz_to_lab_map(xyz[0] * (1.0 / 0.95047)); let y = xyz_to_lab_map(xyz[1]); let z = xyz_to_lab_map(xyz[2] * (1.0 / 1.08883)); Lab { l: (116.0 * y) - 16.0, a: 500.0 * (x - y), b: 200.0 * (y - z), } } #[inline] fn xyz_to_lab_map(c: f32) -> f32 { if c > EPSILON { cbrt_approx(c) } else { (KAPPA * c + 16.0) * (1.0 / 116.0) } } macro_rules! lookup_table_8 { (start: $start:expr, closure: $closure:expr) => { [ $closure($start + 0), $closure($start + 1), $closure($start + 2), $closure($start + 3), $closure($start + 4), $closure($start + 5), $closure($start + 6), $closure($start + 7), ] }; } macro_rules! lookup_table_16 { (start: $start:expr, closure: $closure:expr) => { [ $closure($start + 0), $closure($start + 1), $closure($start + 2), $closure($start + 3), $closure($start + 4), $closure($start + 5), $closure($start + 6), $closure($start + 7), $closure($start + 8), $closure($start + 9), $closure($start + 10), $closure($start + 11), $closure($start + 12), $closure($start + 13), $closure($start + 14), $closure($start + 15), ] }; } fn pow_2_4(x: f32) -> f32 { // Closely approximate x^2.4. // Divide x by its exponent and a truncated version of itself to get it as close to 1 as // possible. Calculate the power of 2.4 using the binomial method. Multiply what was divided to // the power of 2.4. // Lookup tables still have to be hardcoded. const FRAC_BITS: u32 = 3; // Cast x into an integer to manipulate its exponent and fractional parts into indexes for // lookup tables. let bits = x.to_bits(); // Get the integer log2 from the exponent part of bits let log2 = (bits >> 23) as i32 - 0x7f; // x is always >= (10/255 + A)*D so we only have to deal with a limited range in the exponent. // log2 range is [-4, 3] // Use a lookup table to offset for dividing by 2^log of x. // x^2.4 = (2^log2)^2.4 * (x/(2^log2))^2.4 let lookup_entry_exp_pow_2_4 = |log2: i32| (f32::from_bits(((log2 + 0x7f) << 23) as u32) as f64).powf(2.4) as f32; let lookup_table_exp_pow_2_4 = lookup_table_8!(start: -4, closure: lookup_entry_exp_pow_2_4); let exp_pow_2_4 = lookup_table_exp_pow_2_4[(log2 + 4) as usize]; // Zero the exponent of x or divide by 2^log. let x = f32::from_bits((bits & 0x807fffff) | 0x3f800000); // Use lookup tables to divide by a truncated version of x and get an offset for that division. // x^2.4 = a^2.4 * (x/a)^2.4 let lookup_entry_inv_truncated = |fraction: i32| { let truncated = 1.0 + (fraction as f64 + 0.5) / ((1 << FRAC_BITS) as f64); (1.0 / truncated) as f32 }; let lookup_table_inv_truncated = lookup_table_8!(start: 0, closure: lookup_entry_inv_truncated); let lookup_entry_truncated_pow_2_4 = |fraction: i32| (lookup_entry_inv_truncated(fraction) as f64).powf(-2.4) as f32; let lookup_table_truncated_pow_2_4 = lookup_table_8!(start: 0, closure: lookup_entry_truncated_pow_2_4); // Expose only FRAC_BITS of the fraction. let fraction = (bits >> (23 - FRAC_BITS) & ((1 << FRAC_BITS) - 1)) as usize; let truncated_pow_2_4 = lookup_table_truncated_pow_2_4[fraction]; let x = x * lookup_table_inv_truncated[fraction]; // Binomial series // Greater than 12 bits of precision. //let est = 7. / 25. - 24. / 25. * x + 42. / 25. * x.powi(2); // Plenty of precision. let est = 7. / 125. - 36. / 125. * x + 126. / 125. * x.powi(2) + 28. / 125. * x.powi(3); est * (truncated_pow_2_4 * exp_pow_2_4) } fn cbrt_approx(x: f32) -> f32 { // Closely approximate x^(1/3). // Divide x by its exponent and a truncated version of itself to get it as close to 1 as // possible. Calculate the power of 1/3 using the binomial method. Multiply what was divided to // the power of 1/3. // Lookup tables still have to be hardcoded. const FRAC_BITS: u32 = 3; // Cast x into an integer to manipulate its exponent and fractional parts into indexes for // lookup tables. let bits = x.to_bits(); // Get the integer log2 from the exponent part of bits let log2 = (bits >> 23) as i32 - 0x7f; // x is always > EPSILON so we only have to deal with a limited range in the exponent. // log2 range is [-7, 8] // Use a lookup table to offset for dividing by 2^log of x. // x^(1/3) = (2^log2)^(1/3) * (x/(2^log2))^(1/3) let lookup_entry_exp_cbrt = |log2: i32| (f32::from_bits(((log2 + 0x7f) << 23) as u32) as f64).powf(1. / 3.) as f32; let lookup_table_exp_cbrt = lookup_table_16!(start: -7, closure: lookup_entry_exp_cbrt); let exp_pow_cbrt = lookup_table_exp_cbrt[(log2 + 7) as usize]; // Zero the exponent of x or divide by 2^log. let x = f32::from_bits((bits & 0x807fffff) | 0x3f800000); // Use lookup tables to divide by a truncated version of x and get an offset for that division. // x^(1/3) = a^(1/3) * (x/a)^(1/3) let lookup_entry_inv_truncated = |fraction: i32| { let truncated = 1.0 + (fraction as f64 + 0.5) / ((1 << FRAC_BITS) as f64); (1.0 / truncated) as f32 }; let lookup_table_inv_truncated = lookup_table_8!(start: 0, closure: lookup_entry_inv_truncated); let lookup_entry_truncated_cbrt = |fraction: i32| (lookup_entry_inv_truncated(fraction) as f64).powf(-1. / 3.) as f32; let lookup_table_truncated_cbrt = lookup_table_8!(start: 0, closure: lookup_entry_truncated_cbrt); // Expose only FRAC_BITS of the fraction. let fraction = (bits >> (23 - FRAC_BITS) & ((1 << FRAC_BITS) - 1)) as usize; let truncated_pow_cbrt = lookup_table_truncated_cbrt[fraction]; let x = x * lookup_table_inv_truncated[fraction]; // Binomial series let est = 40. / 81. + 60. / 81. * x - 24. / 81. * x.powi(2) + 5. / 81. * x.powi(3); est * (truncated_pow_cbrt * exp_pow_cbrt) } #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub use self::avx2::*; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] mod avx2 { use super::*; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; macro_rules! lookup_table_8_avx2 { (start: $start:expr, closure: $closure:expr) => { _mm256_setr_ps( $closure($start + 0), $closure($start + 1), $closure($start + 2), $closure($start + 3), $closure($start + 4), $closure($start + 5), $closure($start + 6), $closure($start + 7), ) }; } macro_rules! lookup_table_16_avx2 { (start: $start:expr, closure: $closure:expr) => { ( lookup_table_8_avx2!(start: $start, closure: $closure), lookup_table_8_avx2!(start: $start + 8, closure: $closure), ) }; } macro_rules! sum_mult_avx { (($init:expr), $(($vec:expr, $mul:expr)),* ) => { { let mut sum = _mm256_set1_ps($init); $( sum = _mm256_add_ps(sum, _mm256_mul_ps($vec, _mm256_set1_ps($mul))); )* sum } }; ( $(($vec:expr, $mul:expr)),* ) => { sum_mult_avx!((0.0), $(($vec, $mul)),*) }; } #[target_feature(enable = "avx2")] pub unsafe fn rgb_to_lab_avx2(rgb: &[__m256; 3]) -> [Lab; 8] { xyz_to_lab_avx2(rgb_to_xyz_avx2(rgb)) } #[target_feature(enable = "avx2")] #[allow(clippy::excessive_precision)] #[allow(clippy::many_single_char_names)] unsafe fn rgb_to_xyz_avx2(rgb: &[__m256; 3]) -> [__m256; 3] { let r = rgb_to_xyz_map_avx2(rgb[0]); let g = rgb_to_xyz_map_avx2(rgb[1]); let b = rgb_to_xyz_map_avx2(rgb[2]); let x = sum_mult_avx!( (r, 0.4124564390896921), (g, 0.357576077643909), (b, 0.18043748326639894) ); let y = sum_mult_avx!( (r, 0.21267285140562248), (g, 0.715152155287818), (b, 0.07217499330655958) ); let z = sum_mult_avx!( (r, 0.019333895582329317), (g, 0.119192025881303), (b, 0.9503040785363677) ); [x, y, z] } #[inline] #[target_feature(enable = "avx2")] unsafe fn rgb_to_xyz_map_avx2(c: __m256) -> __m256 { let low = _mm256_mul_ps(c, _mm256_set1_ps(1.0 / 12.92)); let hi = pow_2_4_avx2(_mm256_mul_ps( _mm256_add_ps(c, _mm256_set1_ps(0.055)), _mm256_set1_ps(1.0 / 1.055), )); let select = _mm256_cmp_ps(c, _mm256_set1_ps(10. / 255.), _CMP_GT_OS); _mm256_blendv_ps(low, hi, select) } #[inline] #[target_feature(enable = "avx2")] #[allow(clippy::many_single_char_names)] unsafe fn xyz_to_lab_avx2(xyz: [__m256; 3]) -> [Lab; 8] { let x = xyz_to_lab_map_avx2(_mm256_mul_ps(xyz[0], _mm256_set1_ps(1.0 / 0.95047))); let y = xyz_to_lab_map_avx2(xyz[1]); let z = xyz_to_lab_map_avx2(_mm256_mul_ps(xyz[2], _mm256_set1_ps(1.0 / 1.08883))); let l = _mm256_sub_ps( _mm256_mul_ps(_mm256_set1_ps(116.0), y), _mm256_set1_ps(16.0), ); let a = _mm256_mul_ps(_mm256_sub_ps(x, y), _mm256_set1_ps(500.0)); let b = _mm256_mul_ps(_mm256_sub_ps(y, z), _mm256_set1_ps(200.0)); #[target_feature(enable = "avx2")] unsafe fn to_array(reg: __m256) -> [f32; 8] { std::mem::transmute(reg) } let l = to_array(l); let a = to_array(a); let b = to_array(b); let mut output = [Lab { l: 0., a: 0., b: 0., }; 8]; for i in 0..8 { output[i] = Lab { l: l[i], a: a[i], b: b[i], }; } output } #[inline] #[target_feature(enable = "avx2")] unsafe fn xyz_to_lab_map_avx2(c: __m256) -> __m256 { let low = _mm256_mul_ps( _mm256_add_ps( _mm256_mul_ps(c, _mm256_set1_ps(KAPPA)), _mm256_set1_ps(16.0), ), _mm256_set1_ps(1.0 / 116.0), ); let hi = cbrt_approx_avx2(c); let select = _mm256_cmp_ps(c, _mm256_set1_ps(EPSILON), _CMP_GT_OS); _mm256_blendv_ps(low, hi, select) } #[target_feature(enable = "avx2")] unsafe fn pow_2_4_avx2(x: __m256) -> __m256 { // See non-avx2 version const FRAC_BITS: u32 = 3; let bits = _mm256_castps_si256(x); let log2_index = _mm256_add_epi32(_mm256_srli_epi32(bits, 23), _mm256_set1_epi32(-0x7f + 4)); let lookup_entry_exp_pow_2_4 = |log2: i32| (f32::from_bits(((log2 + 0x7f) << 23) as u32) as f64).powf(2.4) as f32; let lookup_table_exp_pow_2_4 = lookup_table_8_avx2!(start: -4, closure: lookup_entry_exp_pow_2_4); let exp_pow_2_4 = _mm256_permutevar8x32_ps(lookup_table_exp_pow_2_4, log2_index); let x = _mm256_or_ps( _mm256_and_ps( x, _mm256_castsi256_ps(_mm256_set1_epi32(0x807fffffu32 as i32)), ), _mm256_castsi256_ps(_mm256_set1_epi32(0x3f800000)), ); let lookup_entry_inv_truncated = |fraction: i32| { let truncated = 1.0 + (fraction as f64 + 0.5) / ((1 << FRAC_BITS) as f64); (1.0 / truncated) as f32 }; let lookup_table_inv_truncated = lookup_table_8_avx2!(start: 0, closure: lookup_entry_inv_truncated); let lookup_entry_truncated_pow_2_4 = |fraction: i32| (lookup_entry_inv_truncated(fraction) as f64).powf(-2.4) as f32; let lookup_table_truncated_pow_2_4 = lookup_table_8_avx2!(start: 0, closure: lookup_entry_truncated_pow_2_4); // No reason to mask the higher bits let fraction = _mm256_srli_epi32(bits, 23 - FRAC_BITS as i32); let truncated_pow_2_4 = _mm256_permutevar8x32_ps(lookup_table_truncated_pow_2_4, fraction); let x = _mm256_mul_ps( x, _mm256_permutevar8x32_ps(lookup_table_inv_truncated, fraction), ); let x2 = _mm256_mul_ps(x, x); let x3 = _mm256_mul_ps(x2, x); let est = sum_mult_avx!( (7.0 / 125.0), (x, -36. / 125.), (x2, 126. / 125.), (x3, 28. / 125.) ); _mm256_mul_ps(est, _mm256_mul_ps(truncated_pow_2_4, exp_pow_2_4)) } #[target_feature(enable = "avx2")] unsafe fn cbrt_approx_avx2(x: __m256) -> __m256 { // See non-avx2 version const FRAC_BITS: u32 = 3; let bits = _mm256_castps_si256(x); let log2_index = _mm256_add_epi32(_mm256_srli_epi32(bits, 23), _mm256_set1_epi32(-0x7f + 7)); let lookup_entry_exp_cbrt = |log2: i32| (f32::from_bits(((log2 + 0x7f) << 23) as u32) as f64).powf(1. / 3.) as f32; let lookup_table_exp_cbrt = lookup_table_16_avx2!(start: -7, closure: lookup_entry_exp_cbrt); let exp_cbrt = _mm256_blendv_ps( _mm256_permutevar8x32_ps(lookup_table_exp_cbrt.0, log2_index), _mm256_permutevar8x32_ps(lookup_table_exp_cbrt.1, log2_index), // Check if log is greater than 7 _mm256_castsi256_ps(_mm256_slli_epi32(log2_index, 28)), ); let x = _mm256_or_ps( _mm256_and_ps( x, _mm256_castsi256_ps(_mm256_set1_epi32(0x807fffffu32 as i32)), ), _mm256_castsi256_ps(_mm256_set1_epi32(0x3f800000)), ); let lookup_entry_inv_truncated = |fraction: i32| { let truncated = 1.0 + (fraction as f64 + 0.5) / ((1 << FRAC_BITS) as f64); (1.0 / truncated) as f32 }; let lookup_table_inv_truncated = lookup_table_8_avx2!(start: 0, closure: lookup_entry_inv_truncated); let lookup_entry_truncated_cbrt = |fraction: i32| (lookup_entry_inv_truncated(fraction) as f64).powf(-1. / 3.) as f32; let lookup_table_truncated_cbrt = lookup_table_8_avx2!(start: 0, closure: lookup_entry_truncated_cbrt); // No reason to mask the higher bits let fraction = _mm256_srli_epi32(bits, 23 - FRAC_BITS as i32); let truncated_cbrt = _mm256_permutevar8x32_ps(lookup_table_truncated_cbrt, fraction); let x = _mm256_mul_ps( x, _mm256_permutevar8x32_ps(lookup_table_inv_truncated, fraction), ); let x2 = _mm256_mul_ps(x, x); let x3 = _mm256_mul_ps(x2, x); let est = sum_mult_avx!( (40. / 81.0), (x, 60. / 81.), (x2, -24. / 81.), (x3, 5. / 81.) ); _mm256_mul_ps(est, _mm256_mul_ps(truncated_cbrt, exp_cbrt)) } } av-metrics-0.9.1/src/video/decode.rs000064400000000000000000000143451046102023000154020ustar 00000000000000//! Contains a trait and utilities for implementing decoders. //! Prebuilt decoders are included in the `av-metrics-decoders` crate. use crate::video::pixel::Pixel; use crate::video::{ChromaSamplePosition, ChromaSampling}; use std::cmp; use v_frame::frame::Frame; use v_frame::pixel::CastFromPrimitive; use v_frame::plane::Plane; /// A trait for allowing metrics to decode generic video formats. /// /// Currently, y4m decoding support using the `y4m` crate is built-in /// to this crate. This trait is extensible so users may implement /// their own decoders. pub trait Decoder: Send { /// Read the next frame from the input video. /// /// Expected to return `Err` if the end of the video is reached. fn read_video_frame(&mut self) -> Option>; /// Read a specific frame from the input video /// /// Expected to return `Err` if the frame is not found. fn read_specific_frame(&mut self, frame_number: usize) -> Option> { let mut frame_no = 0; while frame_no <= frame_number { let frame = self.read_video_frame(); if frame_no == frame_number && frame.is_some() { return frame; } frame_no += 1; } None } /// Get the bit depth of the video. fn get_bit_depth(&self) -> usize; /// Get the Video Details fn get_video_details(&self) -> VideoDetails; } /// A Structure containing Video Details as per Plane's Config #[derive(Debug, Clone, Copy)] pub struct VideoDetails { /// Width in pixels. pub width: usize, /// Height in pixels. pub height: usize, /// Bit-depth of the Video pub bit_depth: usize, /// ChromaSampling of the Video. pub chroma_sampling: ChromaSampling, /// Chroma Sampling Position of the Video. pub chroma_sample_position: ChromaSamplePosition, /// Add Time base of the Video. pub time_base: Rational, /// Padding Constant pub luma_padding: usize, } impl Default for VideoDetails { fn default() -> Self { VideoDetails { width: 640, height: 480, bit_depth: 8, chroma_sampling: ChromaSampling::Cs420, chroma_sample_position: ChromaSamplePosition::Unknown, time_base: Rational { num: 30, den: 1 }, luma_padding: 0, } } } /// A rational number. #[derive(Clone, Copy, Debug)] #[repr(C)] pub struct Rational { /// Numerator. pub num: u64, /// Denominator. pub den: u64, } impl Rational { /// Creates a rational number from the given numerator and denominator. pub const fn new(num: u64, den: u64) -> Self { Rational { num, den } } /// Returns a rational number that is the reciprocal of the given one. pub const fn from_reciprocal(reciprocal: Self) -> Self { Rational { num: reciprocal.den, den: reciprocal.num, } } /// Returns the rational number as a floating-point number. pub fn as_f64(self) -> f64 { self.num as f64 / self.den as f64 } } /// The algorithms (as ported from daala-tools) expect a colocated or bilaterally located chroma /// sample position. This means that a vertical chroma sample position must be realigned /// in order to produce a correct result. pub fn convert_chroma_data( plane_data: &mut Plane, chroma_pos: ChromaSamplePosition, bit_depth: usize, source: &[u8], source_stride: usize, source_bytewidth: usize, ) { if chroma_pos != ChromaSamplePosition::Vertical { // TODO: Also convert Interpolated chromas plane_data.copy_from_raw_u8(source, source_stride, source_bytewidth); return; } let get_pixel = if source_bytewidth == 1 { fn convert_u8(line: &[u8], index: usize) -> i32 { i32::cast_from(line[index]) } convert_u8 } else { fn convert_u16(line: &[u8], index: usize) -> i32 { let index = index * 2; i32::cast_from(u16::cast_from(line[index + 1]) << 8 | u16::cast_from(line[index])) } convert_u16 }; let output_data = &mut plane_data.data; let width = plane_data.cfg.width; let height = plane_data.cfg.height; for y in 0..height { // Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos window. let in_row = &source[(y * source_stride)..]; let out_row = &mut output_data[(y * width)..]; let breakpoint = cmp::min(width, 2); for x in 0..breakpoint { out_row[x] = T::cast_from(clamp( (4 * get_pixel(in_row, 0) - 17 * get_pixel(in_row, x.saturating_sub(1)) + 114 * get_pixel(in_row, x) + 35 * get_pixel(in_row, cmp::min(x + 1, width - 1)) - 9 * get_pixel(in_row, cmp::min(x + 2, width - 1)) + get_pixel(in_row, cmp::min(x + 3, width - 1)) + 64) >> 7, 0, (1 << bit_depth) - 1, )); } let breakpoint2 = width - 3; for x in breakpoint..breakpoint2 { out_row[x] = T::cast_from(clamp( (4 * get_pixel(in_row, x - 2) - 17 * get_pixel(in_row, x - 1) + 114 * get_pixel(in_row, x) + 35 * get_pixel(in_row, x + 1) - 9 * get_pixel(in_row, x + 2) + get_pixel(in_row, x + 3) + 64) >> 7, 0, (1 << bit_depth) - 1, )); } for x in breakpoint2..width { out_row[x] = T::cast_from(clamp( (4 * get_pixel(in_row, x - 2) - 17 * get_pixel(in_row, x - 1) + 114 * get_pixel(in_row, x) + 35 * get_pixel(in_row, cmp::min(x + 1, width - 1)) - 9 * get_pixel(in_row, cmp::min(x + 2, width - 1)) + get_pixel(in_row, width - 1) + 64) >> 7, 0, (1 << bit_depth) - 1, )); } } } #[inline] fn clamp(input: T, min: T, max: T) -> T { if input < min { min } else if input > max { max } else { input } } av-metrics-0.9.1/src/video/mod.rs000064400000000000000000000206741046102023000147400ustar 00000000000000//! Contains metrics related to video/image quality. pub mod ciede; pub mod decode; mod pixel; pub mod psnr; pub mod psnr_hvs; pub mod ssim; use crate::MetricsError; use decode::*; use std::error::Error; pub use pixel::*; pub use v_frame::frame::Frame; pub use v_frame::plane::Plane; trait FrameCompare { fn can_compare(&self, other: &Self) -> Result<(), MetricsError>; } impl FrameCompare for Frame { fn can_compare(&self, other: &Self) -> Result<(), MetricsError> { self.planes[0].can_compare(&other.planes[0])?; self.planes[1].can_compare(&other.planes[1])?; self.planes[2].can_compare(&other.planes[2])?; Ok(()) } } pub(crate) trait PlaneCompare { fn can_compare(&self, other: &Self) -> Result<(), MetricsError>; } impl PlaneCompare for Plane { fn can_compare(&self, other: &Self) -> Result<(), MetricsError> { if self.cfg != other.cfg { return Err(MetricsError::InputMismatch { reason: "Video resolution does not match", }); } Ok(()) } } pub use v_frame::pixel::ChromaSampling; pub(crate) trait ChromaWeight { fn get_chroma_weight(self) -> f64; } impl ChromaWeight for ChromaSampling { /// The relative impact of chroma planes compared to luma fn get_chroma_weight(self) -> f64 { match self { ChromaSampling::Cs420 => 0.25, ChromaSampling::Cs422 => 0.5, ChromaSampling::Cs444 => 1.0, ChromaSampling::Cs400 => 0.0, } } } /// Sample position for subsampled chroma #[derive(Copy, Clone, Debug, PartialEq, Eq, Default)] pub enum ChromaSamplePosition { /// The source video transfer function is not signaled. This crate will assume /// no transformation needs to be done on this data, but there is a risk of metric /// calculations being inaccurate. #[default] Unknown, /// Horizontally co-located with (0, 0) luma sample, vertically positioned /// in the middle between two luma samples. Vertical, /// Co-located with (0, 0) luma sample. Colocated, /// Bilaterally located chroma plane in the diagonal space between luma samples. Bilateral, /// Interlaced content with interpolated chroma samples. Interpolated, } /// Certain metrics return a value per plane. This struct contains the output /// for those metrics per plane, as well as a weighted average of the planes. #[derive(Debug, Default, Clone, Copy, PartialEq)] #[cfg_attr(feature = "serde", derive(serde::Serialize))] pub struct PlanarMetrics { /// Metric value for the Y plane. pub y: f64, /// Metric value for the U/Cb plane. pub u: f64, /// Metric value for the V/Cb plane. pub v: f64, /// Weighted average of the three planes. pub avg: f64, } trait VideoMetric: Send + Sync { type FrameResult: Send + Sync; type VideoResult: Send + Sync; /// Generic method for internal use that processes multiple frames from a video /// into an aggregate metric. /// /// `frame_fn` is the function to calculate metrics on one frame of the video. /// `acc_fn` is the accumulator function to calculate the aggregate metric. fn process_video( &mut self, decoder1: &mut D, decoder2: &mut D, frame_limit: Option, progress_callback: F, ) -> Result> { if decoder1.get_bit_depth() != decoder2.get_bit_depth() { return Err(Box::new(MetricsError::InputMismatch { reason: "Bit depths do not match", })); } if decoder1.get_video_details().chroma_sampling != decoder2.get_video_details().chroma_sampling { return Err(Box::new(MetricsError::InputMismatch { reason: "Chroma samplings do not match", })); } if decoder1.get_bit_depth() > 8 { self.process_video_mt::(decoder1, decoder2, frame_limit, progress_callback) } else { self.process_video_mt::(decoder1, decoder2, frame_limit, progress_callback) } } fn process_frame( &self, frame1: &Frame, frame2: &Frame, bit_depth: usize, chroma_sampling: ChromaSampling, ) -> Result>; fn aggregate_frame_results( &self, metrics: &[Self::FrameResult], ) -> Result>; fn process_video_mt( &mut self, decoder1: &mut D, decoder2: &mut D, frame_limit: Option, progress_callback: F, ) -> Result> { let num_threads = (rayon::current_num_threads() - 1).max(1); let mut out = Vec::new(); let (send, recv) = crossbeam::channel::bounded(num_threads); let vid_info = decoder1.get_video_details(); match crossbeam::scope(|s| { let send_result = s.spawn(move |_| { let mut decoded = 0; while frame_limit.map(|limit| limit > decoded).unwrap_or(true) { decoded += 1; let frame1 = decoder1.read_video_frame::

(); let frame2 = decoder2.read_video_frame::

(); if let (Some(frame1), Some(frame2)) = (frame1, frame2) { progress_callback(decoded); if let Err(e) = send.send((frame1, frame2)) { let (frame1, frame2) = e.into_inner(); return Err(format!( "Error sending\n\nframe1: {frame1:?}\n\nframe2: {frame2:?}" )); } } else { break; } } // Mark the end of the decoding process progress_callback(usize::MAX); Ok(()) }); use rayon::prelude::*; let mut metrics = Vec::with_capacity(frame_limit.unwrap_or(0)); let mut process_error = Ok(()); loop { let working_set: Vec<_> = (0..num_threads) .into_par_iter() .filter_map(|_w| { recv.recv() .map(|(f1, f2)| { self.process_frame( &f1, &f2, vid_info.bit_depth, vid_info.chroma_sampling, ) .map_err(|e| { format!("\n\n{e} on\n\nframe1: {f1:?}\n\nand\n\nframe2: {f2:?}") }) }) .ok() }) .collect(); let work_set: Vec<_> = working_set .into_iter() .filter_map(|v| v.map_err(|e| process_error = Err(e)).ok()) .collect(); if work_set.is_empty() || process_error.is_err() { break; } else { metrics.extend(work_set); } } out = metrics; ( send_result .join() .unwrap_or_else(|_| Err("Failed joining the sender thread".to_owned())), process_error, ) }) { Ok((send_error, process_error)) => { if let Err(error) = send_error { return Err(MetricsError::SendError { reason: error }.into()); } if let Err(error) = process_error { return Err(MetricsError::ProcessError { reason: error }.into()); } if out.is_empty() { return Err(MetricsError::UnsupportedInput { reason: "No readable frames found in one or more input files", } .into()); } self.aggregate_frame_results(&out) } Err(e) => Err(MetricsError::VideoError { reason: format!("\n\nError {e:?} processing the two videos"), } .into()), } } } av-metrics-0.9.1/src/video/pixel.rs000064400000000000000000000002211046102023000152640ustar 00000000000000//! Traits for generic code over low and high bit depth video. //! //! Borrowed from rav1e. pub use v_frame::pixel::{CastFromPrimitive, Pixel}; av-metrics-0.9.1/src/video/psnr.rs000064400000000000000000000145101046102023000151330ustar 00000000000000//! Peak Signal-to-Noise Ratio metric. //! //! PSNR is most easily defined via the mean squared error between two images. //! //! See https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio for more details. use crate::video::decode::Decoder; use crate::video::pixel::CastFromPrimitive; use crate::video::pixel::Pixel; use crate::video::{PlanarMetrics, VideoMetric}; use crate::MetricsError; use std::error::Error; use std::mem::size_of; use v_frame::frame::Frame; use v_frame::plane::Plane; use v_frame::prelude::ChromaSampling; use super::FrameCompare; /// Calculates the PSNR for two videos. Higher is better. /// /// PSNR is capped at 100 in order to avoid skewed statistics /// from e.g. all black frames, which would /// otherwise show a PSNR of infinity. #[inline] pub fn calculate_video_psnr( decoder1: &mut D, decoder2: &mut D, frame_limit: Option, progress_callback: F, ) -> Result> { let metrics = Psnr.process_video(decoder1, decoder2, frame_limit, progress_callback)?; Ok(metrics.psnr) } /// Calculates the APSNR for two videos. Higher is better. /// /// APSNR is capped at 100 in order to avoid skewed statistics /// from e.g. all black frames, which would /// otherwise show a APSNR of infinity. #[inline] pub fn calculate_video_apsnr( decoder1: &mut D, decoder2: &mut D, frame_limit: Option, progress_callback: F, ) -> Result> { let metrics = Psnr.process_video(decoder1, decoder2, frame_limit, progress_callback)?; Ok(metrics.apsnr) } /// Calculates the PSNR for two video frames. Higher is better. /// /// PSNR is capped at 100 in order to avoid skewed statistics /// from e.g. all black frames, which would /// otherwise show a PSNR of infinity. #[inline] pub fn calculate_frame_psnr( frame1: &Frame, frame2: &Frame, bit_depth: usize, chroma_sampling: ChromaSampling, ) -> Result> { let metrics = Psnr.process_frame(frame1, frame2, bit_depth, chroma_sampling)?; Ok(PlanarMetrics { y: calculate_psnr(metrics[0]), u: calculate_psnr(metrics[1]), v: calculate_psnr(metrics[2]), avg: calculate_summed_psnr(&metrics), }) } #[derive(Debug, Clone, Copy)] struct PsnrResults { psnr: PlanarMetrics, apsnr: PlanarMetrics, } struct Psnr; impl VideoMetric for Psnr { type FrameResult = [PsnrMetrics; 3]; type VideoResult = PsnrResults; fn process_frame( &self, frame1: &Frame, frame2: &Frame, bit_depth: usize, _chroma_sampling: ChromaSampling, ) -> Result> { if (size_of::() == 1 && bit_depth > 8) || (size_of::() == 2 && bit_depth <= 8) { return Err(Box::new(MetricsError::InputMismatch { reason: "Bit depths does not match pixel width", })); } frame1.can_compare(frame2)?; let bit_depth = bit_depth; let mut y = Default::default(); let mut u = Default::default(); let mut v = Default::default(); rayon::scope(|s| { s.spawn(|_| { y = calculate_plane_psnr_metrics(&frame1.planes[0], &frame2.planes[0], bit_depth) }); s.spawn(|_| { u = calculate_plane_psnr_metrics(&frame1.planes[1], &frame2.planes[1], bit_depth) }); s.spawn(|_| { v = calculate_plane_psnr_metrics(&frame1.planes[2], &frame2.planes[2], bit_depth) }); }); Ok([y, u, v]) } fn aggregate_frame_results( &self, metrics: &[Self::FrameResult], ) -> Result> { let psnr = PlanarMetrics { y: calculate_summed_psnr(&metrics.iter().map(|m| m[0]).collect::>()), u: calculate_summed_psnr(&metrics.iter().map(|m| m[1]).collect::>()), v: calculate_summed_psnr(&metrics.iter().map(|m| m[2]).collect::>()), avg: calculate_summed_psnr(&metrics.iter().flatten().copied().collect::>()), }; let apsnr = PlanarMetrics { y: metrics.iter().map(|m| calculate_psnr(m[0])).sum::() / metrics.len() as f64, u: metrics.iter().map(|m| calculate_psnr(m[1])).sum::() / metrics.len() as f64, v: metrics.iter().map(|m| calculate_psnr(m[2])).sum::() / metrics.len() as f64, avg: metrics .iter() .map(|m| calculate_summed_psnr(m)) .sum::() / metrics.len() as f64, }; Ok(PsnrResults { psnr, apsnr }) } } #[derive(Debug, Clone, Copy, Default)] struct PsnrMetrics { sq_err: f64, n_pixels: usize, sample_max: usize, } fn calculate_summed_psnr(metrics: &[PsnrMetrics]) -> f64 { calculate_psnr( metrics .iter() .fold(PsnrMetrics::default(), |acc, plane| PsnrMetrics { sq_err: acc.sq_err + plane.sq_err, sample_max: plane.sample_max, n_pixels: acc.n_pixels + plane.n_pixels, }), ) } /// Calculate the PSNR metrics for a `Plane` by comparing the original (uncompressed) to /// the compressed version. fn calculate_plane_psnr_metrics( plane1: &Plane, plane2: &Plane, bit_depth: usize, ) -> PsnrMetrics { let sq_err = calculate_plane_total_squared_error(plane1, plane2); let max = (1 << bit_depth) - 1; PsnrMetrics { sq_err, n_pixels: plane1.cfg.width * plane1.cfg.height, sample_max: max, } } fn calculate_psnr(metrics: PsnrMetrics) -> f64 { if metrics.sq_err <= std::f64::EPSILON { return 100.0; } 10.0 * ((metrics.sample_max.pow(2) as f64).log10() + (metrics.n_pixels as f64).log10() - metrics.sq_err.log10()) } /// Calculate the squared error for a `Plane` by comparing the original (uncompressed) /// to the compressed version. fn calculate_plane_total_squared_error(plane1: &Plane, plane2: &Plane) -> f64 { plane1 .data .iter() .zip(plane2.data.iter()) .map(|(a, b)| (i32::cast_from(*a) - i32::cast_from(*b)).unsigned_abs() as u64) .map(|err| err * err) .sum::() as f64 } av-metrics-0.9.1/src/video/psnr_hvs.rs000064400000000000000000000376131046102023000160240ustar 00000000000000//! Peak Signal-to-Noise Ratio metric accounting for the Human Visual System. //! //! Humans perceive larger differences from certain factors of an image compared //! to other factors. This metric attempts to take the human perception factor //! into account. //! //! See https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio for more details. use crate::video::decode::Decoder; use crate::video::pixel::CastFromPrimitive; use crate::video::pixel::Pixel; use crate::video::ChromaWeight; use crate::video::{PlanarMetrics, VideoMetric}; use crate::MetricsError; use std::error::Error; use std::mem::size_of; use v_frame::frame::Frame; use v_frame::plane::Plane; use v_frame::prelude::ChromaSampling; use super::FrameCompare; /// Calculates the PSNR-HVS score between two videos. Higher is better. #[inline] pub fn calculate_video_psnr_hvs( decoder1: &mut D, decoder2: &mut D, frame_limit: Option, progress_callback: F, ) -> Result> { let cweight = Some( decoder1 .get_video_details() .chroma_sampling .get_chroma_weight(), ); PsnrHvs { cweight }.process_video(decoder1, decoder2, frame_limit, progress_callback) } /// Calculates the PSNR-HVS score between two video frames. Higher is better. #[inline] pub fn calculate_frame_psnr_hvs( frame1: &Frame, frame2: &Frame, bit_depth: usize, chroma_sampling: ChromaSampling, ) -> Result> { let processor = PsnrHvs::default(); let result = processor.process_frame(frame1, frame2, bit_depth, chroma_sampling)?; let cweight = chroma_sampling.get_chroma_weight(); Ok(PlanarMetrics { y: log10_convert(result.y, 1.0), u: log10_convert(result.u, 1.0), v: log10_convert(result.v, 1.0), avg: log10_convert( result.y + cweight * (result.u + result.v), 1.0 + 2.0 * cweight, ), }) } #[derive(Default)] struct PsnrHvs { pub cweight: Option, } impl VideoMetric for PsnrHvs { type FrameResult = PlanarMetrics; type VideoResult = PlanarMetrics; /// Returns the *unweighted* scores. Depending on whether we output per-frame /// or per-video, these will be weighted at different points. fn process_frame( &self, frame1: &Frame, frame2: &Frame, bit_depth: usize, _chroma_sampling: ChromaSampling, ) -> Result> { if (size_of::() == 1 && bit_depth > 8) || (size_of::() == 2 && bit_depth <= 8) { return Err(Box::new(MetricsError::InputMismatch { reason: "Bit depths does not match pixel width", })); } frame1.can_compare(frame2)?; let bit_depth = bit_depth; let mut y = 0.0; let mut u = 0.0; let mut v = 0.0; rayon::scope(|s| { s.spawn(|_| { y = calculate_plane_psnr_hvs(&frame1.planes[0], &frame2.planes[0], 0, bit_depth) }); s.spawn(|_| { u = calculate_plane_psnr_hvs(&frame1.planes[1], &frame2.planes[1], 1, bit_depth) }); s.spawn(|_| { v = calculate_plane_psnr_hvs(&frame1.planes[2], &frame2.planes[2], 2, bit_depth) }); }); Ok(PlanarMetrics { y, u, v, // field not used here avg: 0., }) } fn aggregate_frame_results( &self, metrics: &[Self::FrameResult], ) -> Result> { let cweight = self.cweight.unwrap_or(1.0); let sum_y = metrics.iter().map(|m| m.y).sum::(); let sum_u = metrics.iter().map(|m| m.u).sum::(); let sum_v = metrics.iter().map(|m| m.v).sum::(); Ok(PlanarMetrics { y: log10_convert(sum_y, 1. / metrics.len() as f64), u: log10_convert(sum_u, 1. / metrics.len() as f64), v: log10_convert(sum_v, 1. / metrics.len() as f64), avg: log10_convert( sum_y + cweight * (sum_u + sum_v), (1. + 2. * cweight) * 1. / metrics.len() as f64, ), }) } } // Normalized inverse quantization matrix for 8x8 DCT at the point of transparency. // This is not the JPEG based matrix from the paper, // this one gives a slightly higher MOS agreement. #[rustfmt::skip] const CSF_Y: [[f64; 8]; 8] = [ [1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, 1.00227514334, 0.678296995242, 0.466224900598, 0.3265091542], [2.2901594831, 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963, 0.868920337363, 0.61280991668, 0.436405793551], [2.08509755623, 2.04793073064, 1.34329019223, 1.09205635862, 0.875748795257, 0.670882927016, 0.501731932449, 0.372504254596], [1.48366094411, 1.68731108984, 1.09205635862, 0.772819797575, 0.605636379554, 0.48309405692, 0.380429446972, 0.295774038565], [1.00227514334, 1.2305666963, 0.875748795257, 0.605636379554, 0.448996256676, 0.352889268808, 0.283006984131, 0.226951348204], [0.678296995242, 0.868920337363, 0.670882927016, 0.48309405692, 0.352889268808, 0.27032073436, 0.215017739696, 0.17408067321], [0.466224900598, 0.61280991668, 0.501731932449, 0.380429446972, 0.283006984131, 0.215017739696, 0.168869545842, 0.136153931001], [0.3265091542, 0.436405793551, 0.372504254596, 0.295774038565, 0.226951348204, 0.17408067321, 0.136153931001, 0.109083846276] ]; #[rustfmt::skip] const CSF_CB420: [[f64; 8]; 8] = [ [1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788, 0.898018824055, 0.74725392039, 0.615105596242], [2.46074210438, 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972, 1.17428548929, 0.996404342439, 0.830890433625], [1.18284184739, 1.21363250036, 0.978712413627, 1.02624506078, 1.03145147362, 0.960060382087, 0.849823426169, 0.731221236837], [1.14982565193, 1.38190029285, 1.02624506078, 0.861317501629, 0.801821139099, 0.751437590932, 0.685398513368, 0.608694761374], [1.05017074788, 1.33100189972, 1.03145147362, 0.801821139099, 0.676555426187, 0.605503172737, 0.55002013668, 0.495804539034], [0.898018824055, 1.17428548929, 0.960060382087, 0.751437590932, 0.605503172737, 0.514674450957, 0.454353482512, 0.407050308965], [0.74725392039, 0.996404342439, 0.849823426169, 0.685398513368, 0.55002013668, 0.454353482512, 0.389234902883, 0.342353999733], [0.615105596242, 0.830890433625, 0.731221236837, 0.608694761374, 0.495804539034, 0.407050308965, 0.342353999733, 0.295530605237] ]; #[rustfmt::skip] const CSF_CR420: [[f64; 8]; 8] = [ [2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469, 0.867069376285, 0.721500455585, 0.593906509971], [2.62502345193, 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198, 1.13381474809, 0.962064122248, 0.802254508198], [1.26180942886, 1.17180569821, 0.944981930573, 0.990876405848, 0.995903384143, 0.926972725286, 0.820534991409, 0.706020324706], [1.11019789803, 1.3342742857, 0.990876405848, 0.831632933426, 0.77418706195, 0.725539939514, 0.661776842059, 0.587716619023], [1.01397751469, 1.28513006198, 0.995903384143, 0.77418706195, 0.653238524286, 0.584635025748, 0.531064164893, 0.478717061273], [0.867069376285, 1.13381474809, 0.926972725286, 0.725539939514, 0.584635025748, 0.496936637883, 0.438694579826, 0.393021669543], [0.721500455585, 0.962064122248, 0.820534991409, 0.661776842059, 0.531064164893, 0.438694579826, 0.375820256136, 0.330555063063], [0.593906509971, 0.802254508198, 0.706020324706, 0.587716619023, 0.478717061273, 0.393021669543, 0.330555063063, 0.285345396658] ]; fn calculate_plane_psnr_hvs( plane1: &Plane, plane2: &Plane, plane_idx: usize, bit_depth: usize, ) -> f64 { const STEP: usize = 7; let mut result = 0.0; let mut pixels = 0usize; let csf = match plane_idx { 0 => &CSF_Y, 1 => &CSF_CB420, 2 => &CSF_CR420, _ => unreachable!(), }; // In the PSNR-HVS-M paper[1] the authors describe the construction of // their masking table as "we have used the quantization table for the // color component Y of JPEG [6] that has been also obtained on the // basis of CSF. Note that the values in quantization table JPEG have // been normalized and then squared." Their CSF matrix (from PSNR-HVS) // was also constructed from the JPEG matrices. I can not find any obvious // scheme of normalizing to produce their table, but if I multiply their // CSF by 0.38857 and square the result I get their masking table. // I have no idea where this constant comes from, but deviating from it // too greatly hurts MOS agreement. // // [1] Nikolay Ponomarenko, Flavia Silvestri, Karen Egiazarian, Marco Carli, // Jaakko Astola, Vladimir Lukin, "On between-coefficient contrast masking // of DCT basis functions", CD-ROM Proceedings of the Third // International Workshop on Video Processing and Quality Metrics for Consumer // Electronics VPQM-07, Scottsdale, Arizona, USA, 25-26 January, 2007, 4 p. const CSF_MULTIPLIER: f64 = 0.3885746225901003; let mut mask = [[0.0; 8]; 8]; for x in 0..8 { for y in 0..8 { mask[x][y] = (csf[x][y] * CSF_MULTIPLIER).powi(2); } } let height = plane1.cfg.height; let width = plane1.cfg.width; let stride = plane1.cfg.stride; let mut p1 = [0i16; 8 * 8]; let mut p2 = [0i16; 8 * 8]; let mut dct_p1 = [0i32; 8 * 8]; let mut dct_p2 = [0i32; 8 * 8]; assert!(plane1.data.len() >= stride * height); assert!(plane2.data.len() >= stride * height); for y in (0..(height - STEP)).step_by(STEP) { for x in (0..(width - STEP)).step_by(STEP) { let mut p1_means = [0.0; 4]; let mut p2_means = [0.0; 4]; let mut p1_vars = [0.0; 4]; let mut p2_vars = [0.0; 4]; let mut p1_gmean = 0.0; let mut p2_gmean = 0.0; let mut p1_gvar = 0.0; let mut p2_gvar = 0.0; let mut p1_mask = 0.0; let mut p2_mask = 0.0; for i in 0..8 { for j in 0..8 { p1[i * 8 + j] = i16::cast_from(plane1.data[(y + i) * stride + x + j]); p2[i * 8 + j] = i16::cast_from(plane2.data[(y + i) * stride + x + j]); let sub = ((i & 12) >> 2) + ((j & 12) >> 1); p1_gmean += p1[i * 8 + j] as f64; p2_gmean += p2[i * 8 + j] as f64; p1_means[sub] += p1[i * 8 + j] as f64; p2_means[sub] += p2[i * 8 + j] as f64; } } p1_gmean /= 64.0; p2_gmean /= 64.0; for i in 0..4 { p1_means[i] /= 16.0; p2_means[i] /= 16.0; } for i in 0..8 { for j in 0..8 { let sub = ((i & 12) >> 2) + ((j & 12) >> 1); p1_gvar += (p1[i * 8 + j] as f64 - p1_gmean) * (p1[i * 8 + j] as f64 - p1_gmean); p2_gvar += (p2[i * 8 + j] as f64 - p2_gmean) * (p2[i * 8 + j] as f64 - p2_gmean); p1_vars[sub] += (p1[i * 8 + j] as f64 - p1_means[sub]) * (p1[i * 8 + j] as f64 - p1_means[sub]); p2_vars[sub] += (p2[i * 8 + j] as f64 - p2_means[sub]) * (p2[i * 8 + j] as f64 - p2_means[sub]); } } p1_gvar *= 64.0 / 63.0; p2_gvar *= 64.0 / 63.0; for i in 0..4 { p1_vars[i] *= 16.0 / 15.0; p2_vars[i] *= 16.0 / 15.0; } if p1_gvar > 0.0 { p1_gvar = p1_vars.iter().sum::() / p1_gvar; } if p2_gvar > 0.0 { p2_gvar = p2_vars.iter().sum::() / p2_gvar; } p1.iter().copied().enumerate().for_each(|(i, v)| { dct_p1[i] = v as i32; }); p2.iter().copied().enumerate().for_each(|(i, v)| { dct_p2[i] = v as i32; }); od_bin_fdct8x8(&mut dct_p1); od_bin_fdct8x8(&mut dct_p2); for i in 0..8 { for j in (i == 0) as usize..8 { p1_mask += dct_p1[i * 8 + j].pow(2) as f64 * mask[i][j]; p2_mask += dct_p2[i * 8 + j].pow(2) as f64 * mask[i][j]; } } p1_mask = (p1_mask * p1_gvar).sqrt() / 32.0; p2_mask = (p2_mask * p2_gvar).sqrt() / 32.0; if p2_mask > p1_mask { p1_mask = p2_mask; } for i in 0..8 { for j in 0..8 { let mut err = (dct_p1[i * 8 + j] - dct_p2[i * 8 + j]).abs() as f64; if i != 0 || j != 0 { let err_mask = p1_mask / mask[i][j]; err = if err < err_mask { 0.0 } else { err - err_mask }; } result += (err * csf[i][j]).powi(2); pixels += 1; } } } } result /= pixels as f64; let sample_max: usize = (1 << bit_depth) - 1; result /= sample_max.pow(2) as f64; result } fn log10_convert(score: f64, weight: f64) -> f64 { 10.0 * (-1.0 * (weight * score).log10()) } const DCT_STRIDE: usize = 8; // Based on daala's version. It is different from the 8x8 DCT we use during encoding. fn od_bin_fdct8x8(data: &mut [i32]) { assert!(data.len() >= 64); let mut z = [0; 64]; for i in 0..8 { od_bin_fdct8(&mut z[(DCT_STRIDE * i)..], &data[i..]); } for i in 0..8 { od_bin_fdct8(&mut data[(DCT_STRIDE * i)..], &z[i..]); } } #[allow(clippy::identity_op)] fn od_bin_fdct8(y: &mut [i32], x: &[i32]) { assert!(y.len() >= 8); assert!(x.len() > 7 * DCT_STRIDE); let mut t = [0; 8]; let mut th = [0; 8]; // Initial permutation t[0] = x[0]; t[4] = x[1 * DCT_STRIDE]; t[2] = x[2 * DCT_STRIDE]; t[6] = x[3 * DCT_STRIDE]; t[7] = x[4 * DCT_STRIDE]; t[3] = x[5 * DCT_STRIDE]; t[5] = x[6 * DCT_STRIDE]; t[1] = x[7 * DCT_STRIDE]; // +1/-1 butterflies t[1] = t[0] - t[1]; th[1] = od_dct_rshift(t[1], 1); t[0] -= th[1]; t[4] += t[5]; th[4] = od_dct_rshift(t[4], 1); t[5] -= th[4]; t[3] = t[2] - t[3]; t[2] -= od_dct_rshift(t[3], 1); t[6] += t[7]; th[6] = od_dct_rshift(t[6], 1); t[7] = th[6] - t[7]; // + Embedded 4-point type-II DCT t[0] += th[6]; t[6] = t[0] - t[6]; t[2] = th[4] - t[2]; t[4] = t[2] - t[4]; // |-+ Embedded 2-point type-II DCT t[0] -= (t[4] * 13573 + 16384) >> 15; t[4] += (t[0] * 11585 + 8192) >> 14; t[0] -= (t[4] * 13573 + 16384) >> 15; // |-+ Embedded 2-point type-IV DST t[6] -= (t[2] * 21895 + 16384) >> 15; t[2] += (t[6] * 15137 + 8192) >> 14; t[6] -= (t[2] * 21895 + 16384) >> 15; // + Embedded 4-point type-IV DST t[3] += (t[5] * 19195 + 16384) >> 15; t[5] += (t[3] * 11585 + 8192) >> 14; t[3] -= (t[5] * 7489 + 4096) >> 13; t[7] = od_dct_rshift(t[5], 1) - t[7]; t[5] -= t[7]; t[3] = th[1] - t[3]; t[1] -= t[3]; t[7] += (t[1] * 3227 + 16384) >> 15; t[1] -= (t[7] * 6393 + 16384) >> 15; t[7] += (t[1] * 3227 + 16384) >> 15; t[5] += (t[3] * 2485 + 4096) >> 13; t[3] -= (t[5] * 18205 + 16384) >> 15; t[5] += (t[3] * 2485 + 4096) >> 13; y[0] = t[0]; y[1] = t[1]; y[2] = t[2]; y[3] = t[3]; y[4] = t[4]; y[5] = t[5]; y[6] = t[6]; y[7] = t[7]; } /// This is the strength reduced version of `a / (1 << b)`. /// This will not work for `b == 0`, however currently this is only used for /// `b == 1` anyway. #[inline(always)] fn od_dct_rshift(a: i32, b: u32) -> i32 { debug_assert!(b > 0); debug_assert!(b <= 32); ((a as u32 >> (32 - b)) as i32 + a) >> b } av-metrics-0.9.1/src/video/ssim.rs000064400000000000000000000415351046102023000151330ustar 00000000000000//! Structural Similarity index. //! //! The SSIM index is a full reference metric; in other words, the measurement //! or prediction of image quality is based on an initial uncompressed or //! distortion-free image as reference. SSIM is designed to improve on //! traditional methods such as peak signal-to-noise ratio (PSNR) and mean //! squared error (MSE). //! //! See https://en.wikipedia.org/wiki/Structural_similarity for more details. use crate::video::decode::Decoder; use crate::video::pixel::CastFromPrimitive; use crate::video::pixel::Pixel; use crate::video::ChromaWeight; use crate::video::{PlanarMetrics, VideoMetric}; use crate::MetricsError; use std::cmp; use std::error::Error; use std::f64::consts::{E, PI}; use std::mem::size_of; use v_frame::frame::Frame; use v_frame::plane::Plane; use v_frame::prelude::ChromaSampling; use super::FrameCompare; /// Calculates the SSIM score between two videos. Higher is better. #[inline] pub fn calculate_video_ssim( decoder1: &mut D, decoder2: &mut D, frame_limit: Option, progress_callback: F, ) -> Result> { let cweight = Some( decoder1 .get_video_details() .chroma_sampling .get_chroma_weight(), ); Ssim { cweight }.process_video(decoder1, decoder2, frame_limit, progress_callback) } /// Calculates the SSIM score between two video frames. Higher is better. #[inline] pub fn calculate_frame_ssim( frame1: &Frame, frame2: &Frame, bit_depth: usize, chroma_sampling: ChromaSampling, ) -> Result> { let processor = Ssim::default(); let result = processor.process_frame(frame1, frame2, bit_depth, chroma_sampling)?; let cweight = chroma_sampling.get_chroma_weight(); Ok(PlanarMetrics { y: log10_convert(result.y, 1.0), u: log10_convert(result.u, 1.0), v: log10_convert(result.v, 1.0), avg: log10_convert( result.y + cweight * (result.u + result.v), 1.0 + 2.0 * cweight, ), }) } #[derive(Default)] struct Ssim { pub cweight: Option, } impl VideoMetric for Ssim { type FrameResult = PlanarMetrics; type VideoResult = PlanarMetrics; /// Returns the *unweighted* scores. Depending on whether we output per-frame /// or per-video, these will be weighted at different points. fn process_frame( &self, frame1: &Frame, frame2: &Frame, bit_depth: usize, _chroma_sampling: ChromaSampling, ) -> Result> { if (size_of::() == 1 && bit_depth > 8) || (size_of::() == 2 && bit_depth <= 8) { return Err(Box::new(MetricsError::InputMismatch { reason: "Bit depths does not match pixel width", })); } frame1.can_compare(frame2)?; const KERNEL_SHIFT: usize = 8; const KERNEL_WEIGHT: usize = 1 << KERNEL_SHIFT; let sample_max = (1 << bit_depth) - 1; let mut y = 0.0; let mut u = 0.0; let mut v = 0.0; rayon::scope(|s| { s.spawn(|_| { let y_kernel = build_gaussian_kernel( frame1.planes[0].cfg.height as f64 * 1.5 / 256.0, cmp::min(frame1.planes[0].cfg.width, frame1.planes[0].cfg.height), KERNEL_WEIGHT, ); y = calculate_plane_ssim( &frame1.planes[0], &frame2.planes[0], sample_max, &y_kernel, &y_kernel, ) }); s.spawn(|_| { let u_kernel = build_gaussian_kernel( frame1.planes[1].cfg.height as f64 * 1.5 / 256.0, cmp::min(frame1.planes[1].cfg.width, frame1.planes[1].cfg.height), KERNEL_WEIGHT, ); u = calculate_plane_ssim( &frame1.planes[1], &frame2.planes[1], sample_max, &u_kernel, &u_kernel, ) }); s.spawn(|_| { let v_kernel = build_gaussian_kernel( frame1.planes[2].cfg.height as f64 * 1.5 / 256.0, cmp::min(frame1.planes[2].cfg.width, frame1.planes[2].cfg.height), KERNEL_WEIGHT, ); v = calculate_plane_ssim( &frame1.planes[2], &frame2.planes[2], sample_max, &v_kernel, &v_kernel, ) }); }); Ok(PlanarMetrics { y, u, v, // Not used here avg: 0., }) } fn aggregate_frame_results( &self, metrics: &[Self::FrameResult], ) -> Result> { let cweight = self.cweight.unwrap_or(1.0); let y_sum = metrics.iter().map(|m| m.y).sum::(); let u_sum = metrics.iter().map(|m| m.u).sum::(); let v_sum = metrics.iter().map(|m| m.v).sum::(); Ok(PlanarMetrics { y: log10_convert(y_sum, metrics.len() as f64), u: log10_convert(u_sum, metrics.len() as f64), v: log10_convert(v_sum, metrics.len() as f64), avg: log10_convert( y_sum + cweight * (u_sum + v_sum), (1. + 2. * cweight) * metrics.len() as f64, ), }) } } /// Calculates the MSSSIM score between two videos. Higher is better. /// /// MSSSIM is a variant of SSIM computed over subsampled versions /// of an image. It is designed to be a more accurate metric /// than SSIM. #[inline] pub fn calculate_video_msssim( decoder1: &mut D, decoder2: &mut D, frame_limit: Option, progress_callback: F, ) -> Result> { let cweight = Some( decoder1 .get_video_details() .chroma_sampling .get_chroma_weight(), ); MsSsim { cweight }.process_video(decoder1, decoder2, frame_limit, progress_callback) } /// Calculates the MSSSIM score between two video frames. Higher is better. /// /// MSSSIM is a variant of SSIM computed over subsampled versions /// of an image. It is designed to be a more accurate metric /// than SSIM. #[inline] pub fn calculate_frame_msssim( frame1: &Frame, frame2: &Frame, bit_depth: usize, chroma_sampling: ChromaSampling, ) -> Result> { let processor = MsSsim::default(); let result = processor.process_frame(frame1, frame2, bit_depth, chroma_sampling)?; let cweight = chroma_sampling.get_chroma_weight(); Ok(PlanarMetrics { y: log10_convert(result.y, 1.0), u: log10_convert(result.u, 1.0), v: log10_convert(result.v, 1.0), avg: log10_convert( result.y + cweight * (result.u + result.v), 1.0 + 2.0 * cweight, ), }) } #[derive(Default)] struct MsSsim { pub cweight: Option, } impl VideoMetric for MsSsim { type FrameResult = PlanarMetrics; type VideoResult = PlanarMetrics; /// Returns the *unweighted* scores. Depending on whether we output per-frame /// or per-video, these will be weighted at different points. fn process_frame( &self, frame1: &Frame, frame2: &Frame, bit_depth: usize, _chroma_sampling: ChromaSampling, ) -> Result> { if (size_of::() == 1 && bit_depth > 8) || (size_of::() == 2 && bit_depth <= 8) { return Err(Box::new(MetricsError::InputMismatch { reason: "Bit depths does not match pixel width", })); } frame1.can_compare(frame2)?; let bit_depth = bit_depth; let mut y = 0.0; let mut u = 0.0; let mut v = 0.0; rayon::scope(|s| { s.spawn(|_| { y = calculate_plane_msssim(&frame1.planes[0], &frame2.planes[0], bit_depth) }); s.spawn(|_| { u = calculate_plane_msssim(&frame1.planes[1], &frame2.planes[1], bit_depth) }); s.spawn(|_| { v = calculate_plane_msssim(&frame1.planes[2], &frame2.planes[2], bit_depth) }); }); Ok(PlanarMetrics { y, u, v, // Not used here avg: 0., }) } fn aggregate_frame_results( &self, metrics: &[Self::FrameResult], ) -> Result> { let cweight = self.cweight.unwrap(); let y_sum = metrics.iter().map(|m| m.y).sum::(); let u_sum = metrics.iter().map(|m| m.u).sum::(); let v_sum = metrics.iter().map(|m| m.v).sum::(); Ok(PlanarMetrics { y: log10_convert(y_sum, metrics.len() as f64), u: log10_convert(u_sum, metrics.len() as f64), v: log10_convert(v_sum, metrics.len() as f64), avg: log10_convert( y_sum + cweight * (u_sum + v_sum), (1. + 2. * cweight) * metrics.len() as f64, ), }) } } #[derive(Debug, Clone, Copy, Default)] struct SsimMoments { mux: i64, muy: i64, x2: i64, xy: i64, y2: i64, w: i64, } const SSIM_K1: f64 = 0.01 * 0.01; const SSIM_K2: f64 = 0.03 * 0.03; fn calculate_plane_ssim( plane1: &Plane, plane2: &Plane, sample_max: u64, vert_kernel: &[i64], horiz_kernel: &[i64], ) -> f64 { let vec1 = plane_to_vec(plane1); let vec2 = plane_to_vec(plane2); calculate_plane_ssim_internal( &vec1, &vec2, plane1.cfg.width, plane1.cfg.height, sample_max, vert_kernel, horiz_kernel, ) .0 } fn calculate_plane_ssim_internal( plane1: &[u32], plane2: &[u32], width: usize, height: usize, sample_max: u64, vert_kernel: &[i64], horiz_kernel: &[i64], ) -> (f64, f64) { let vert_offset = vert_kernel.len() >> 1; let line_size = vert_kernel.len().next_power_of_two(); let line_mask = line_size - 1; let mut lines = vec![vec![SsimMoments::default(); width]; line_size]; let horiz_offset = horiz_kernel.len() >> 1; let mut ssim = 0.0; let mut ssimw = 0.0; let mut cs = 0.0; for y in 0..(height + vert_offset) { if y < height { let buf = &mut lines[y & line_mask]; let line1 = &plane1[(y * width)..]; let line2 = &plane2[(y * width)..]; for x in 0..width { let mut moments = SsimMoments::default(); let k_min = horiz_offset.saturating_sub(x); let tmp_offset = (x + horiz_offset + 1).saturating_sub(width); let k_max = horiz_kernel.len() - tmp_offset; for k in k_min..k_max { let window = horiz_kernel[k]; let target_x = (x + k).saturating_sub(horiz_offset); let pix1 = line1[target_x] as i64; let pix2 = line2[target_x] as i64; moments.mux += window * pix1; moments.muy += window * pix2; moments.x2 += window * pix1 * pix1; moments.xy += window * pix1 * pix2; moments.y2 += window * pix2 * pix2; moments.w += window; } buf[x] = moments; } } if y >= vert_offset { let k_min = vert_kernel.len().saturating_sub(y + 1); let tmp_offset = (y + 1).saturating_sub(height); let k_max = vert_kernel.len() - tmp_offset; for x in 0..width { let mut moments = SsimMoments::default(); for k in k_min..k_max { let buf = lines[(y + 1 + k - vert_kernel.len()) & line_mask][x]; let window = vert_kernel[k]; moments.mux += window * buf.mux; moments.muy += window * buf.muy; moments.x2 += window * buf.x2; moments.xy += window * buf.xy; moments.y2 += window * buf.y2; moments.w += window * buf.w; } let w = moments.w as f64; let c1 = sample_max.pow(2) as f64 * SSIM_K1 * w.powi(2); let c2 = sample_max.pow(2) as f64 * SSIM_K2 * w.powi(2); let mx2 = (moments.mux as f64).powi(2); let mxy = moments.mux as f64 * moments.muy as f64; let my2 = (moments.muy as f64).powi(2); let cs_tmp = w * (c2 + 2.0 * (moments.xy as f64 * w - mxy)) / (moments.x2 as f64 * w - mx2 + moments.y2 as f64 * w - my2 + c2); cs += cs_tmp; ssim += cs_tmp * (2.0 * mxy + c1) / (mx2 + my2 + c1); ssimw += w; } } } (ssim / ssimw, cs / ssimw) } fn calculate_plane_msssim(plane1: &Plane, plane2: &Plane, bit_depth: usize) -> f64 { const KERNEL_SHIFT: usize = 10; const KERNEL_WEIGHT: usize = 1 << KERNEL_SHIFT; // These come from the original MS-SSIM implementation paper: // https://ece.uwaterloo.ca/~z70wang/publications/msssim.pdf // They don't add up to 1 due to rounding done in the paper. const MS_WEIGHT: [f64; 5] = [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]; let mut sample_max = (1 << bit_depth) - 1; let mut ssim = [0.0; 5]; let mut cs = [0.0; 5]; let mut width = plane1.cfg.width; let mut height = plane1.cfg.height; let mut plane1 = plane_to_vec(plane1); let mut plane2 = plane_to_vec(plane2); let kernel = build_gaussian_kernel(1.5, 5, KERNEL_WEIGHT); let res = calculate_plane_ssim_internal( &plane1, &plane2, width, height, sample_max, &kernel, &kernel, ); ssim[0] = res.0; cs[0] = res.1; for i in 1..5 { plane1 = msssim_downscale(&plane1, width, height); plane2 = msssim_downscale(&plane2, width, height); width /= 2; height /= 2; sample_max *= 4; let res = calculate_plane_ssim_internal( &plane1, &plane2, width, height, sample_max, &kernel, &kernel, ); ssim[i] = res.0; cs[i] = res.1; } cs.iter() .zip(MS_WEIGHT.iter()) .take(4) .map(|(cs, weight)| cs.powf(*weight)) .fold(1.0, |acc, val| acc * val) * ssim[4].powf(MS_WEIGHT[4]) } fn build_gaussian_kernel(sigma: f64, max_len: usize, kernel_weight: usize) -> Vec { let scale = 1.0 / ((2.0 * PI).sqrt() * sigma); let nhisigma2 = -0.5 / sigma.powi(2); // Compute the kernel size so that the error in the first truncated // coefficient is no larger than 0.5*KERNEL_WEIGHT. // There is no point in going beyond this given our working precision. let s = (0.5 * PI).sqrt() * sigma * (1.0 / kernel_weight as f64); let len = if s >= 1.0 { 0 } else { (sigma * (-2.0 * s.log(E)).sqrt()).floor() as usize }; let kernel_len = if len >= max_len { max_len - 1 } else { len }; let kernel_size = (kernel_len << 1) | 1; let mut kernel = vec![0; kernel_size]; let mut sum = 0; for ci in 1..=kernel_len { let val = kernel_weight as f64 * scale * E.powf(nhisigma2 * ci.pow(2) as f64) + 0.5; let val = val as i64; kernel[kernel_len - ci] = val; kernel[kernel_len + ci] = val; sum += val; } kernel[kernel_len] = kernel_weight as i64 - (sum << 1); kernel } fn plane_to_vec(input: &Plane) -> Vec { input.data.iter().map(|pix| u32::cast_from(*pix)).collect() } // This acts differently from downscaling a plane, and is what // requires us to pass around slices of bytes, instead of `Plane`s. // Instead of averaging the four pixels, it sums them. // In effect, this gives us much more precision when we downscale. fn msssim_downscale(input: &[u32], input_width: usize, input_height: usize) -> Vec { let output_width = input_width / 2; let output_height = input_height / 2; let mut output = vec![0; output_width * output_height]; for j in 0..output_height { let j0 = 2 * j; let j1 = cmp::min(j0 + 1, input_height - 1); for i in 0..output_width { let i0 = 2 * i; let i1 = cmp::min(i0 + 1, input_width - 1); output[j * output_width + i] = input[j0 * input_width + i0] + input[j0 * input_width + i1] + input[j1 * input_width + i0] + input[j1 * input_width + i1]; } } output } fn log10_convert(score: f64, weight: f64) -> f64 { 10.0 * (weight.log10() - (weight - score).log10()) }